diff --git a/README.md b/README.md index 62e0817..dc593fc 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,7 @@ http://doi.org/10.5281/zenodo.3233068 * [Installation](#installation) * [Getting started](#getting-started) * [porder Ordersv2 Simple Client](#porder-ordersv2-simple-client) + * [porder version](#porder-version) * [porder quota](#porder-quota) * [base64](#base64) * [shape to geojson](#shape-to-geojson) @@ -80,6 +81,13 @@ To obtain help for a specific functionality, simply call it with _help_ switch, ## porder Simple CLI for Planet ordersv2 API The tool is designed to simplify using the ordersv2 API and allows the user to chain together tools and operations for multiple item and asset types and perform these operations and download the assets locally. +### porder version +This prints the tool version and escapes. Simple use would be + +``` +porder version +``` + ### porder quota Just a simple tool to print your planet subscription quota quickly. @@ -244,7 +252,7 @@ The allows you to multipart download the files in your order, this uses a multip ![porder_multipart](https://user-images.githubusercontent.com/28806922/53097736-2b042c80-34f0-11e9-9724-68e9ed356ab7.png) ### multiprocessing download -The uses the multiprocessing library to quickly download your files to a local folder. It uses the order url generated using the orders tool to access and download the files and includes an expotential rate limiting function to handle too many requests. To save on time it uses an extension filter so for example if you are using the zip operation you can use ".zip" and if you are downloading only images, udm and xml you can use ".tif" or ".xml" accordingly. +The uses the multiprocessing library to quickly download your files to a local folder. It uses the order url generated using the orders tool to access and download the files and includes an expotential rate limiting function to handle too many requests. To save on time it uses an extension filter so for example if you are using the zip operation you can use ".zip" and if you are downloading only images, udm and xml you can use ".tif" or ".xml" accordingly. For python 3.4 or higher, this switches to using an true async downloader instead of using multiprocessing. ![porder_multiprocessing](https://user-images.githubusercontent.com/28806922/53097786-4707ce00-34f0-11e9-9e79-78ba1d4ba27c.png) @@ -254,6 +262,12 @@ A simple setup would be ## Changelog +### v0.3.4 +- Added async downloader for python 3.4 +- Checks for existing files before spawning processes +- Better handling of multiprocessing output +- Added a quick version tool + ### v0.3.3 - Fixed issue with order name when no ops are used. - Used file basename for splitting the idlist. diff --git a/dist/porder-0.3.3.tar.gz b/dist/porder-0.3.3.tar.gz deleted file mode 100644 index 1103e65..0000000 Binary files a/dist/porder-0.3.3.tar.gz and /dev/null differ diff --git a/dist/porder-0.3.3-py2.py3-none-any.whl b/dist/porder-0.3.4-py2.py3-none-any.whl similarity index 50% rename from dist/porder-0.3.3-py2.py3-none-any.whl rename to dist/porder-0.3.4-py2.py3-none-any.whl index 0c115fb..93d6fe5 100644 Binary files a/dist/porder-0.3.3-py2.py3-none-any.whl and b/dist/porder-0.3.4-py2.py3-none-any.whl differ diff --git a/dist/porder-0.3.4.tar.gz b/dist/porder-0.3.4.tar.gz new file mode 100644 index 0000000..9c34d19 Binary files /dev/null and b/dist/porder-0.3.4.tar.gz differ diff --git a/porder/__init__.py b/porder/__init__.py index 7894574..0cf626d 100644 --- a/porder/__init__.py +++ b/porder/__init__.py @@ -2,4 +2,4 @@ __author__ = 'Samapriya Roy' __email__ = 'samapriya.roy@gmail.com' -__version__ = '0.3.3' +__version__ = '0.3.4' diff --git a/porder/async_down.py b/porder/async_down.py new file mode 100644 index 0000000..22b575b --- /dev/null +++ b/porder/async_down.py @@ -0,0 +1,113 @@ +import requests +import asyncio +import os +from concurrent.futures import ThreadPoolExecutor +from timeit import default_timer +from retrying import retry +from planet.api.auth import find_api_key + +#Get Planet API and Authenticate SESSION +try: + PL_API_KEY = find_api_key() +except: + print('Failed to get Planet Key') + sys.exit() +SESSION = requests.Session() +SESSION.auth = (PL_API_KEY, '') + + +@retry( + wait_exponential_multiplier=1000, + wait_exponential_max=10000) +def check_for_redirects(url): + try: + r = SESSION.get(url, allow_redirects=False, timeout=0.5) + if 300 <= r.status_code < 400: + return r.headers['location'] + elif r.status_code==429: + raise Exception("rate limit error") + except requests.exceptions.Timeout: + return '[timeout]' + except requests.exceptions.ConnectionError: + return '[connection error]' + except requests.HTTPError as e: + print(r.status_code) + if r.status_code == 429: # Too many requests + raise Exception("rate limit error") +START_TIME = default_timer() + +def fetch(session, url): + urlcheck=url.split('|')[0] + fullpath=url.split('|')[1] + [head,tail]=os.path.split(fullpath) + #print("Starting download of %s" % fullpath.split('/')[-1]) + if not os.path.exists(head): + os.makedirs(head) + os.chdir(head) + if not os.path.isfile(fullpath): + r = session.get(urlcheck, stream = True) + with open(fullpath, "wb") as f: + for ch in r: + f.write(ch) + elapsed = default_timer() - START_TIME + time_completed_at = "{:5.2f}s".format(elapsed) + print("{0:100} {1:20}".format(tail, time_completed_at)) + + return tail + +urls=[] +def funct(url,final,ext): + if not os.path.exists(final): + os.makedirs(final) + os.chdir(final) + response=SESSION.get(url).json() + print("Polling with exponential backoff..") + while response['state']=='running' or response['state']=='starting': + bar = progressbar.ProgressBar() + for z in bar(range(60)): + time.sleep(1) + response=SESSION.get(url).json() + if response['state']=='success': + for items in response['_links']['results']: + url=(items['location']) + url_to_check = url if url.startswith('https') else "http://%s" % url + redirect_url = check_for_redirects(url_to_check) + + if redirect_url.startswith('https'): + local_path=os.path.join(final,str(os.path.split(items['name'])[-1])) + if not os.path.isfile(local_path) and ext is None: + urls.append(str(redirect_url)+'|'+local_path) + if not os.path.isfile(local_path) and ext is not None: + if local_path.endswith(ext): + urls.append(str(redirect_url)+'|'+local_path) + else: + print('Order Failed with state: '+str(response['state'])) + print('Processing a url list with '+str(len(urls))+' items') + print('\n') + return urls + +async def get_data_asynchronous(url,final,ext): + urllist=funct(url=url,final=final,ext=ext) + print("{0:100} {1:20}".format("File", "Completed at")) + with ThreadPoolExecutor(max_workers=10) as executor: + with requests.Session() as session: + # Set any session parameters here before calling `fetch` + loop = asyncio.get_event_loop() + START_TIME = default_timer() + tasks = [ + loop.run_in_executor( + executor, + fetch, + *(session, url) # Allows us to pass in multiple arguments to `fetch` + ) + for url in urllist + ] + for response in await asyncio.gather(*tasks): + pass + +def downloader(url,final,ext): + loop = asyncio.get_event_loop() + future = asyncio.ensure_future(get_data_asynchronous(url,final,ext)) + loop.run_until_complete(future) + +#downloader(url='https://api.planet.com/compute/ops/orders/v2/bbccc868-bada-4a4c-8c1d-9d8ef81c1d75',final=r'C:\planet_demo\mp2',ext=None) diff --git a/porder/multiproc_pydl.py b/porder/multiproc_pydl.py index c1ea84d..bda8f75 100644 --- a/porder/multiproc_pydl.py +++ b/porder/multiproc_pydl.py @@ -28,6 +28,8 @@ import progressbar import json import sys +from threading import * +screen_lock = Semaphore(value=1) from retrying import retry from planet.api.utils import read_planet_json from planet.api.auth import find_api_key @@ -99,7 +101,9 @@ def worker(self, url): os.makedirs(head) os.chdir(head) if not os.path.isfile(fullpath): - print(msg, multiprocessing.current_process().name) + screen_lock.acquire() + print(str(msg)+' '+str(multiprocessing.current_process().name)) + screen_lock.release() r = requests.get(urlcheck) with open(fullpath, "wb") as f: f.write(r.content) @@ -129,13 +133,14 @@ def funct(url,final,ext): if redirect_url.startswith('https'): local_path=os.path.join(final,str(os.path.split(items['name'])[-1])) - if ext is None: + if not os.path.isfile(local_path) and ext is None: urls.append(str(redirect_url)+'|'+local_path) - elif ext is not None: + if not os.path.isfile(local_path) and ext is not None: if local_path.endswith(ext): urls.append(str(redirect_url)+'|'+local_path) else: print('Order Failed with state: '+str(response['state'])) + print('Downloading a total of '+str(len(urls))+' objects') downloader = MultiProcDownloader(urls) downloader.run() diff --git a/porder/porder.py b/porder/porder.py index 8ea7b50..0b1b9c4 100644 --- a/porder/porder.py +++ b/porder/porder.py @@ -25,6 +25,8 @@ import json import base64 import clipboard +import platform +import pkg_resources from .shp2geojson import shp2gj from .geojson_simplify import geosimple from .geojson2id import idl @@ -35,11 +37,19 @@ from .diffcheck import checker from .async_downloader import asyncdownload from .idcheck import idc +if str(platform.python_version()) > "3.3.0": + from .async_down import downloader os.chdir(os.path.dirname(os.path.realpath(__file__))) lpath=os.path.dirname(os.path.realpath(__file__)) sys.path.append(lpath) +# Get package version +def porder_version(): + print(pkg_resources.get_distribution("porder").version) +def version_from_parser(args): + porder_version() + #Get quota for your account def planet_quota(): try: @@ -151,16 +161,22 @@ def asyncdownload_from_parser(args): local=args.local, ext=args.ext) def multiproc_from_parser(args): - if args.ext==None: - subprocess.call("python multiproc_pydl.py "+args.url+" "+args.local+" ",shell=True) - else: - subprocess.call("python multiproc_pydl.py "+args.url+" "+args.local+" "+args.ext,shell=True) + if str(platform.python_version()) > "3.3.0": + downloader(url=args.url,final=args.local,ext=args.ext) + elif str(platform.python_version()) <= "3.3.0": + if args.ext==None: + subprocess.call("python multiproc_pydl.py "+args.url+" "+args.local+" ",shell=True) + else: + subprocess.call("python multiproc_pydl.py "+args.url+" "+args.local+" "+args.ext,shell=True) spacing=" " def main(args=None): parser = argparse.ArgumentParser(description='Ordersv2 Simple Client') subparsers = parser.add_subparsers() + parser_version = subparsers.add_parser('version', help='Prints porder version and exists') + parser_version.set_defaults(func=version_from_parser) + parser_planet_quota = subparsers.add_parser('quota', help='Prints your Planet Quota Details') parser_planet_quota.set_defaults(func=planet_quota_from_parser) @@ -263,9 +279,9 @@ def main(args=None): optional_named.add_argument('--ext', help="File Extension to download",default=None) parser_asyncdownload.set_defaults(func=asyncdownload_from_parser) - parser_multiproc = subparsers.add_parser('multiproc',help='''Multiprocess based downloader based on satlist''') - parser_multiproc.add_argument('--url',help='Ordersv2 order link') - parser_multiproc.add_argument('--local',help='Local Path to save files') + parser_multiproc = subparsers.add_parser('multiproc',help='Multiprocess based downloader to download for all files in your order') + parser_multiproc.add_argument('--url',help='order url you got for your order') + parser_multiproc.add_argument('--local',help='Output folder where ordered files will be exported') optional_named = parser_multiproc.add_argument_group('Optional named arguments') optional_named.add_argument('--ext', help="File Extension to download",default=None) parser_multiproc.set_defaults(func=multiproc_from_parser) diff --git a/setup.py b/setup.py index e1156bf..26d8100 100644 --- a/setup.py +++ b/setup.py @@ -20,7 +20,7 @@ def readme(): return f.read() setuptools.setup( name='porder', - version='0.3.3', + version='0.3.4', packages=['porder'], url='https://github.com/samapriya/porder', package_data={'': ['bundles.json']},