updated v0.3.4

- Added async downloader for python 3.4 - Checks for existing files before spawning processes - Better handling of multiprocessing output - Added a quick version tool
tyson-swetnam · May 29, 2019 · 10a5b15 · 10a5b15
1 parent 1ee1a28
commit 10a5b15
Show file tree

Hide file tree

Showing 9 changed files with 161 additions and 13 deletions.
diff --git a/README.md b/README.md
@@ -21,6 +21,7 @@ http://doi.org/10.5281/zenodo.3233068
 * [Installation](#installation)
 * [Getting started](#getting-started)
 * [porder Ordersv2 Simple Client](#porder-ordersv2-simple-client)
+    * [porder version](#porder-version)
     * [porder quota](#porder-quota)
     * [base64](#base64)
     * [shape to geojson](#shape-to-geojson)
@@ -80,6 +81,13 @@ To obtain help for a specific functionality, simply call it with _help_ switch,
 ## porder Simple CLI for Planet ordersv2 API
 The tool is designed to simplify using the ordersv2 API and allows the user to chain together tools and operations for multiple item and asset types and perform these operations and download the assets locally.
 
+### porder version
+This prints the tool version and escapes. Simple use would be
+
+```
+porder version
+```
+
 ### porder quota
 Just a simple tool to print your planet subscription quota quickly.
 
@@ -244,7 +252,7 @@ The allows you to multipart download the files in your order, this uses a multip
 ![porder_multipart](https://user-images.githubusercontent.com/28806922/53097736-2b042c80-34f0-11e9-9724-68e9ed356ab7.png)
 
 ### multiprocessing download
-The uses the multiprocessing library to quickly download your files to a local folder. It uses the order url generated using the orders tool to access and download the files and includes an expotential rate limiting function to handle too many requests. To save on time it uses an extension filter so for example if you are using the zip operation you can use ".zip" and if you are downloading only images, udm and xml you can use ".tif" or ".xml" accordingly.
+The uses the multiprocessing library to quickly download your files to a local folder. It uses the order url generated using the orders tool to access and download the files and includes an expotential rate limiting function to handle too many requests. To save on time it uses an extension filter so for example if you are using the zip operation you can use ".zip" and if you are downloading only images, udm and xml you can use ".tif" or ".xml" accordingly. For python 3.4 or higher, this switches to using an true async downloader instead of using multiprocessing.
 
 ![porder_multiprocessing](https://user-images.githubusercontent.com/28806922/53097786-4707ce00-34f0-11e9-9e79-78ba1d4ba27c.png)
 
@@ -254,6 +262,12 @@ A simple setup would be
 
 ## Changelog
 
+### v0.3.4
+- Added async downloader for python 3.4
+- Checks for existing files before spawning processes
+- Better handling of multiprocessing output
+- Added a quick version tool
+
 ### v0.3.3
 - Fixed issue with order name when no ops are used.
 - Used file basename for splitting the idlist.

diff --git a/dist/porder-0.3.3.tar.gz b/dist/porder-0.3.3.tar.gz
diff --git a/dist/porder-0.3.3-py2.py3-none-any.whl → dist/porder-0.3.4-py2.py3-none-any.whl b/dist/porder-0.3.3-py2.py3-none-any.whl → dist/porder-0.3.4-py2.py3-none-any.whl
diff --git a/dist/porder-0.3.4.tar.gz b/dist/porder-0.3.4.tar.gz
diff --git a/porder/__init__.py b/porder/__init__.py
@@ -2,4 +2,4 @@
 
 __author__ = 'Samapriya Roy'
 __email__ = '[email protected]'
-__version__ = '0.3.3'
+__version__ = '0.3.4'
diff --git a/porder/async_down.py b/porder/async_down.py
@@ -0,0 +1,113 @@
+import requests
+import asyncio
+import os
+from concurrent.futures import ThreadPoolExecutor
+from timeit import default_timer
+from retrying import retry
+from planet.api.auth import find_api_key
+
+#Get Planet API and Authenticate SESSION
+try:
+    PL_API_KEY = find_api_key()
+except:
+    print('Failed to get Planet Key')
+    sys.exit()
+SESSION = requests.Session()
+SESSION.auth = (PL_API_KEY, '')
+
+
+@retry(
+    wait_exponential_multiplier=1000,
+    wait_exponential_max=10000)
+def check_for_redirects(url):
+    try:
+        r = SESSION.get(url, allow_redirects=False, timeout=0.5)
+        if 300 <= r.status_code < 400:
+            return r.headers['location']
+        elif r.status_code==429:
+            raise Exception("rate limit error")
+    except requests.exceptions.Timeout:
+        return '[timeout]'
+    except requests.exceptions.ConnectionError:
+        return '[connection error]'
+    except requests.HTTPError as e:
+        print(r.status_code)
+        if r.status_code == 429:  # Too many requests
+            raise Exception("rate limit error")
+START_TIME = default_timer()
+
+def fetch(session, url):
+    urlcheck=url.split('|')[0]
+    fullpath=url.split('|')[1]
+    [head,tail]=os.path.split(fullpath)
+    #print("Starting download of %s" % fullpath.split('/')[-1])
+    if not os.path.exists(head):
+        os.makedirs(head)
+    os.chdir(head)
+    if not os.path.isfile(fullpath):
+        r = session.get(urlcheck, stream = True)
+        with open(fullpath, "wb") as f:
+            for ch in r:
+                f.write(ch)
+    elapsed = default_timer() - START_TIME
+    time_completed_at = "{:5.2f}s".format(elapsed)
+    print("{0:100} {1:20}".format(tail, time_completed_at))
+
+    return tail
+
+urls=[]
+def funct(url,final,ext):
+    if not os.path.exists(final):
+        os.makedirs(final)
+    os.chdir(final)
+    response=SESSION.get(url).json()
+    print("Polling with exponential backoff..")
+    while response['state']=='running' or response['state']=='starting':
+        bar = progressbar.ProgressBar()
+        for z in bar(range(60)):
+            time.sleep(1)
+        response=SESSION.get(url).json()
+    if response['state']=='success':
+        for items in response['_links']['results']:
+            url=(items['location'])
+            url_to_check = url if url.startswith('https') else "http://%s" % url
+            redirect_url = check_for_redirects(url_to_check)
+
+            if redirect_url.startswith('https'):
+                local_path=os.path.join(final,str(os.path.split(items['name'])[-1]))
+                if not os.path.isfile(local_path) and ext is None:
+                    urls.append(str(redirect_url)+'|'+local_path)
+                if not os.path.isfile(local_path) and ext is not None:
+                    if local_path.endswith(ext):
+                        urls.append(str(redirect_url)+'|'+local_path)
+    else:
+        print('Order Failed with state: '+str(response['state']))
+    print('Processing a url list with '+str(len(urls))+' items')
+    print('\n')
+    return urls
+
+async def get_data_asynchronous(url,final,ext):
+    urllist=funct(url=url,final=final,ext=ext)
+    print("{0:100} {1:20}".format("File", "Completed at"))
+    with ThreadPoolExecutor(max_workers=10) as executor:
+        with requests.Session() as session:
+            # Set any session parameters here before calling `fetch`
+            loop = asyncio.get_event_loop()
+            START_TIME = default_timer()
+            tasks = [
+                loop.run_in_executor(
+                    executor,
+                    fetch,
+                    *(session, url) # Allows us to pass in multiple arguments to `fetch`
+                )
+                for url in urllist
+            ]
+            for response in await asyncio.gather(*tasks):
+                pass
+
+def downloader(url,final,ext):
+    loop = asyncio.get_event_loop()
+    future = asyncio.ensure_future(get_data_asynchronous(url,final,ext))
+    loop.run_until_complete(future)
+
+#downloader(url='https://api.planet.com/compute/ops/orders/v2/bbccc868-bada-4a4c-8c1d-9d8ef81c1d75',final=r'C:\planet_demo\mp2',ext=None)
diff --git a/porder/multiproc_pydl.py b/porder/multiproc_pydl.py
@@ -28,6 +28,8 @@
 import progressbar
 import json
 import sys
+from threading import *
+screen_lock = Semaphore(value=1)
 from retrying import retry
 from planet.api.utils import read_planet_json
 from planet.api.auth import find_api_key
@@ -99,7 +101,9 @@ def worker(self, url):
                 os.makedirs(head)
             os.chdir(head)
             if not os.path.isfile(fullpath):
-                print(msg, multiprocessing.current_process().name)
+                screen_lock.acquire()
+                print(str(msg)+' '+str(multiprocessing.current_process().name))
+                screen_lock.release()
                 r = requests.get(urlcheck)
                 with open(fullpath, "wb") as f:
                     f.write(r.content)
@@ -129,13 +133,14 @@ def funct(url,final,ext):
 
             if redirect_url.startswith('https'):
                 local_path=os.path.join(final,str(os.path.split(items['name'])[-1]))
-                if ext is None:
+                if not os.path.isfile(local_path) and ext is None:
                     urls.append(str(redirect_url)+'|'+local_path)
-                elif ext is not None:
+                if not os.path.isfile(local_path) and ext is not None:
                     if local_path.endswith(ext):
                         urls.append(str(redirect_url)+'|'+local_path)
     else:
         print('Order Failed with state: '+str(response['state']))
+    print('Downloading a total of '+str(len(urls))+' objects')
     downloader = MultiProcDownloader(urls)
     downloader.run()
 

diff --git a/porder/porder.py b/porder/porder.py
@@ -25,6 +25,8 @@
 import json
 import base64
 import clipboard
+import platform
+import pkg_resources
 from .shp2geojson import shp2gj
 from .geojson_simplify import geosimple
 from .geojson2id import idl
@@ -35,11 +37,19 @@
 from .diffcheck import checker
 from .async_downloader import asyncdownload
 from .idcheck import idc
+if str(platform.python_version()) > "3.3.0":
+    from .async_down import downloader
 os.chdir(os.path.dirname(os.path.realpath(__file__)))
 lpath=os.path.dirname(os.path.realpath(__file__))
 sys.path.append(lpath)
 
 
+# Get package version
+def porder_version():
+    print(pkg_resources.get_distribution("porder").version)
+def version_from_parser(args):
+    porder_version()
+
 #Get quota for your account
 def planet_quota():
     try:
@@ -151,16 +161,22 @@ def asyncdownload_from_parser(args):
         local=args.local,
         ext=args.ext)
 def multiproc_from_parser(args):
-    if args.ext==None:
-        subprocess.call("python multiproc_pydl.py "+args.url+" "+args.local+" ",shell=True)
-    else:
-        subprocess.call("python multiproc_pydl.py "+args.url+" "+args.local+" "+args.ext,shell=True)
+    if str(platform.python_version()) > "3.3.0":
+        downloader(url=args.url,final=args.local,ext=args.ext)
+    elif str(platform.python_version()) <= "3.3.0":
+        if args.ext==None:
+            subprocess.call("python multiproc_pydl.py "+args.url+" "+args.local+" ",shell=True)
+        else:
+            subprocess.call("python multiproc_pydl.py "+args.url+" "+args.local+" "+args.ext,shell=True)
 
 spacing="                               "
 
 def main(args=None):
     parser = argparse.ArgumentParser(description='Ordersv2 Simple Client')
     subparsers = parser.add_subparsers()
+    parser_version = subparsers.add_parser('version', help='Prints porder version and exists')
+    parser_version.set_defaults(func=version_from_parser)
+
     parser_planet_quota = subparsers.add_parser('quota', help='Prints your Planet Quota Details')
     parser_planet_quota.set_defaults(func=planet_quota_from_parser)
 
@@ -263,9 +279,9 @@ def main(args=None):
     optional_named.add_argument('--ext', help="File Extension to download",default=None)
     parser_asyncdownload.set_defaults(func=asyncdownload_from_parser)
 
-    parser_multiproc = subparsers.add_parser('multiproc',help='''Multiprocess based downloader based on satlist''')
-    parser_multiproc.add_argument('--url',help='Ordersv2 order link')
-    parser_multiproc.add_argument('--local',help='Local Path to save files')
+    parser_multiproc = subparsers.add_parser('multiproc',help='Multiprocess based downloader to download for all files in your order')
+    parser_multiproc.add_argument('--url',help='order url you got for your order')
+    parser_multiproc.add_argument('--local',help='Output folder where ordered files will be exported')
     optional_named = parser_multiproc.add_argument_group('Optional named arguments')
     optional_named.add_argument('--ext', help="File Extension to download",default=None)
     parser_multiproc.set_defaults(func=multiproc_from_parser)

diff --git a/setup.py b/setup.py
@@ -20,7 +20,7 @@ def readme():
         return f.read()
 setuptools.setup(
     name='porder',
-    version='0.3.3',
+    version='0.3.4',
     packages=['porder'],
     url='https://github.com/samapriya/porder',
     package_data={'': ['bundles.json']},