python LAADS+Selenium应该如何运用?

mb5f8e5f84eff49 2021-07-20 04:45:03
游戏 游戏开发


from selenium import webdriver

from time import sleep

import tempfile

import os,sys

import pandas as pd

import geopandas as gpd

import time

# 构建查询地址

def GetURL(ProductID,StartTime,EndTime,search_file):

     # 查询边界

     data = gpd.GeoDataFrame.from_file(search_file)

     bbox = (data.bounds.values)[0].tolist()

     # 研究区范围,左上角和右下角。根据需要构造字符串

     Area = str(round(bbox[0],1))+','+str(round(bbox[3],1))+','+str(round(bbox[2],1))+','+str(round(bbox[1],1))

     # 输入 MODIS 轨道矢量

     modis_grid_file = 'E:\***\modis_WGS84_grid_world.shp'

     modis_grid = gpd.GeoDataFrame.from_file(modis_grid_file)

     # 查询边界覆盖的轨道中心坐标

     modis_intersection = modis_grid[modis_grid.intersects(data.geometry[0])]

     path_row = 'Tile:'

     for mv in modis_intersection.values:

         path_row += "H"+str(mv[1])+"V"+str(mv[2])+","

     # 根据以上信息构建 Search 页的网址

     path_row = path_row[0:-1]

url='

     return url

# 使用 Selenium 查询影像

def SearchFileList(url):

     # 创建文件夹,命名规则为程序运行的时刻

     # 将使用 selenium 下载的文件使用该文件夹存储

     csvdir = 'E:\\***\\' + str(time.time()).replace('.','')

     os.mkdir(csvdir)

     # 配置 selenium 的参数

     options = webdriver.ChromeOptions()

     prefs = {'profile.default_content_settings.popups': 0, 'download.default_directory': csvdir}

     options.add_experimental_option('prefs', prefs)

     chromedriver = r"C:\***\Google\Chrome\Application\chromedriver.exe"#chromedriver.exe 的本地路径

     # options.add_argument('--headless')  # 有无浏览器界面模式,根据需要设置

     driver = webdriver.Chrome(executable_path=chromedriver,options=options)

     # 自动打开 Search 页

     driver.get(url)

     # 浏览器打开 Search 页后,外汇跟单gendan5.com还要留足时间让服务器进行数据检索

     # 所以这里 sleep50 秒,可以根据网速自行设定

     sleep(50)

     # 当然也可以判断搜索结果,也就是包含 csv 的标签是否出现

     # WebDriverWait(driver,

     # 下载 csv 文件

     # 找到文本包含 csv 的标签

     # csvElement = driver.find_element_by_link_text('csv')

     csvElement = driver.find_element_by_xpath('// *[ @ id = "tab4download"] / a[2]')

     # 点击下载

     csvElement.click()

     # 留下下载 csv 文件的时间

     sleep(20)

     # 关闭浏览器

     driver.quit()

     return csvdir

# 下载影像

def MODISDown(FileDir):

     # 获取下载的 csv 文件的文件名

     csvfilename = os.listdir(FileDir)[0]

     # 构造文件路径

     csvfilepath = os.path.join(FileDir, csvfilename)

     # print(csvfilepath)

     csvvalues = pd.read_csv(csvfilepath).values

     os.remove(csvfilepath)

     os.rmdir(FileDir)

     # 下载数据

     file_count = 0

     for cv in csvvalues:

         file_count += 1

         # 构建数据的下载链接

         modislink='[1]

         outdir = 'E:/***/MODIS/'+(cv[1].split("/"))[5]

         # outdir = 'E:/Temp/' + (cv[1].split("/"))[5]

         if not os.path.isdir(outdir):

             os.mkdir(www.sangpi.comoutdir)

         path = outdir + '/' + (cv[1].split("/"))[7]

         if not os.path.exists(path):

             print("({0}/{1}) Downloading {2}".format(file_count, len(csvvalues), modislink.split("/")[-1]))

             with open(path, 'w+b') as out:

                 geturl(modislink, out)

# 获取下载链接并下载影像数据

def geturl(url,out=None):

     USERAGENT = 'tis/download.py_1.0--' + sys.version.replace('\n', '').replace('\r', '')

     headers = { 'user-agent' : USERAGENT }

     token = '******' # 你的 token, 可登陆 Earthdata 网站后在 profile 中得到

     headers['Authorization'] = 'Bearer ' + token

     try:

         import ssl

         CTX = ssl.SSLContext(ssl.PROTOCOL_TLSv1_2)

         from urllib.request import urlopen, Request, URLError, HTTPError

         try:

             response = urlopen(Request(url, headers=headers), context=CTX)

             if out is None:

                 return response.read().decode('utf-8')

             else:

                 start = time.time()

                 # 将连接中的下载文件写入临时文件 并返回文件写入进度

                 chunk_read(response, out, report_hook=chunk_report)

                 elapsed = max(time.time() - start,1.0)

                 # 平均下载速度

                 rate = (get_total_size(response) / 1024 ** 2) / elapsed

                 print("Downloaded {0}b in {1:.2f}secs, Average Rate: {2:.2f}MB/sec".format(get_total_size(response), elapsed, rate))

                 # shutil.copyfileobj(response, out)

         except HTTPError as e:

             print('HTTP GET error code: %d' % e.code(), file=sys.stderr)

             print('HTTP GET error message: %s' % e.message, file=sys.stderr)

         except URLError as e:

             print('Failed to make request: %s' % e.reason, file=sys.stderr)

         return None

     except AttributeError:

         # OS X Python 2 and 3 don't support tlsv1.1+ therefore... curl

         import subprocess

         try:

             args = ['curl', '--fail', '-sS', '-L', '--get', url]

             for (k,v) in headers.items():

                 args.extend(['-H', ': '.join([k, v])])

             if out is None:

                 # python3's subprocess.check_output returns stdout as a byte string

                 result = subprocess.check_output(args)

                 return result.decode('utf-8') if isinstance(result, bytes) else result

             else:

                 subprocess.call(args, stdout=out)

         except subprocess.CalledProcessError as e:

             print('curl GET error message: %' + (e.message if hasattr(e, 'message') else e.output), file=sys.stderr)

         return None

#  chunk_read modified from

def chunk_read( response, local_file, chunk_size=10240, report_hook=None):

     # 完整文件大小

     file_size = get_total_size(response)

     # 下载文件大小

     bytes_so_far = 0

     # 文件写入本地

     while 1:

         try:

             # 从地址中读取固定大小文件对象

             chunk = response.read(chunk_size)

         except:

             sys.stdout.write("\n > There was an error reading data. \n")

             break

         try:

             # 将读取出的文件对象写入本地文件

             local_file.write(chunk)

         except TypeError:

             local_file.write(chunk.decode(local_file.encoding))

         # 写入完成即更新已下载文件大小

         bytes_so_far += len(chunk)

 

         if not chunk:

             break

         if report_hook:

             # 获取下载进度

             report_hook(bytes_so_far, file_size)

     return bytes_so_far

def chunk_report( bytes_so_far, file_size):

     if file_size is not None:

         # 计算下载进度游戏进度的百分比

         percent = float(bytes_so_far) / file_size

         percent = round(percent * 100, 2)

         sys.stdout.write(" > Downloaded %d of %d bytes (%0.2f%%)\r" %

                          (bytes_so_far, file_size, percent))

     else:

         # We couldn't figure out the size.

         sys.stdout.write(" > Downloaded %d of unknown Size\r" % (bytes_so_far))

def get_total_size(response):

    try:

       file_size = response.info().getheader('Content-Length').strip()

    except AttributeError:

       try:

          file_size = response.getheader('Content-Length').strip()

       except AttributeError:

          print ("> Problem getting size")

          return None

    return int(file_size)

if __name__ == "__main__":

     # 定义要下载数据的信息

     ProductID = 'MOD021KM--61/'  # 产品号 #sys.argv[1]#

     # 设置数据的起始和截至时间。其实就是根据需要构造一个简单的字符串

     StartTime = '2020-06-01'  # 开始时间 #sys.argv[2]#

     EndTime = '2020-06-03'  # 截至日期 #sys.argv[3]#

     search_file = r'E:\***\ 北京市 .shp'  # 查询范围 #sys.argv[4]#

     # 构建查询地址

     url = GetURL(ProductID,StartTime,EndTime,search_file)

     # 获取数据列表

     csvdir = SearchFileList(url)

     # 根据列表下载数据

     MODISDown(csvdir)

版权声明
本文为[mb5f8e5f84eff49]所创,转载请带上原文链接,感谢
https://blog.51cto.com/u_14967986/2896085

  1. python —pandas库常用函数
  2. Python应用matplotlib绘图简介
  3. Python matplotlib高级绘图详解
  4. 入门训练 Fibonacci数列-python实现
  5. Python -二维数组定义
  6. python二进制相加
  7. Python文本处理:解析json格式的数据
  8. 查看python安装路径
  9. Python编程之计算生态
  10. Python-turtle标准库知识小结(python绘图工具)
  11. Python-time标准库知识小结
  12. Python-random标准库知识小结
  13. python安装第三方库的三种方法
  14. python程序的控制结构
  15. Python程序的函数和代码复用
  16. python之组合数据类型
  17. python【力扣LeetCode算法题库】300 最长上升子序列(动态规划)
  18. python【力扣LeetCode算法题库】695- 岛屿的最大面积(深搜)
  19. python【力扣LeetCode算法题库】面试题 01.06-字符串压缩
  20. python【力扣LeetCode算法题库】1160-拼写单词
  21. python【力扣LeetCode算法题库】836- 矩形重叠
  22. python【力扣LeetCode算法题库】409-最长回文串(数学 计数器)
  23. python【力扣LeetCode算法题库】面试题40- 最小的k个数
  24. python【力扣LeetCode算法题库】945- 使数组唯一的最小增量
  25. python【力扣LeetCode算法题库】365- 水壶问题(裴蜀等式)
  26. python【力扣LeetCode算法题库】876- 链表的中间结点
  27. python【力扣LeetCode算法题库】面试题 17.16- 按摩师(DP)
  28. python【力扣LeetCode算法题库】892-三维形体的表面积
  29. python【力扣LeetCode算法题库】999-车的可用捕获量(DFS)
  30. python【力扣LeetCode算法题库】914. 卡牌分组(reduce & collections.Counter)
  31. python【力扣LeetCode算法题库】820- 单词的压缩编码
  32. python【力扣LeetCode算法题库】1162- 地图分析(BFS)
  33. python【力扣LeetCode算法题库】面试题62- 圆圈中最后剩下的数字(约瑟夫环)
  34. python【力扣LeetCode算法题库】912- 排序数组
  35. python【力扣LeetCode算法题库】1111- 有效括号的嵌套深度
  36. python【力扣LeetCode算法题库】289- 生命游戏
  37. python【力扣LeetCode算法题库】12- 整数转罗马数字(打表 模拟)
  38. python【数据结构与算法】内置函数enumerate(枚举) 函数(看不懂你来打我)
  39. python【力扣LeetCode算法题库】13- 罗马数字转整数
  40. python【数据结构与算法】内置函数 zip() 函数(看不懂你来打我)
  41. python【力扣LeetCode算法题库】14-最长公共前缀(列表解压)
  42. python【蓝桥杯vip练习题库】ADV-281特等奖学金
  43. python【蓝桥杯vip练习题库】ADV-69质因数(数论)
  44. python爬不同图片分别保存在不同文件夹中
  45. python打印a-z
  46. python以16进制打印输出
  47. 每天好心情——Python画一棵樱花树
  48. 在终端输入命令打开mac自带的python工具IDLE
  49. Python3的安装(Windows)
  50. Python第一个爬虫项目
  51. Python模拟日志生成
  52. 【邵奈一】Python爬虫专栏(一)之Python爬虫热身
  53. 用 Python 画一棵圣诞树
  54. 你一定想不到,实现一个Python+Selenium的自动化测试框架就这么简单!
  55. 一文快速教你搭建Python+Selenium环境
  56. 一看就会:Python+Appium实现自动化测试
  57. 【邵奈一】Python爬虫专栏(三)之自动登录
  58. Python core developer: the retirement of the father of Python has no impact
  59. Python3 or Python2? Examples explain the differences between the two
  60. Analysis of Linux DHCP server IP allocation Python script