From 0014fd88cfce1c8eecb0b1ad94caef6256e82766 Mon Sep 17 00:00:00 2001 From: TechShreyash <82265247+TechShreyash@users.noreply.github.com> Date: Fri, 28 Jun 2024 10:44:09 +0530 Subject: [PATCH] techzdl v1.2.1 Release - Changed how techzdl was being before - Added method to stop the running download process - Added background downloading option - Added documentation --- DOCS.md | 134 ++++ README.md | 329 +-------- demos/background_download.py | 27 + demos/basic.py | 11 - demos/basic_usage.py | 16 + demos/custom_header.py | 10 +- demos/custom_progress_callback.py | 6 +- ...disable_debug_logs_and_default_progress.py | 6 +- demos/fixed_no_of_workers.py | 5 +- demos/force_stoping_download.py | 20 + demos/getting_file_info.py | 6 +- demos/setting_custom_file_and_folder_name.py | 6 +- demos/single_threaded_mode.py | 6 +- demos/timeout_and_max_retries.py | 4 +- setup.py | 2 +- techzdl/__init__.py | 647 +++++++++++++++++- techzdl/api.py | 93 --- techzdl/downloader.py | 494 ------------- techzdl/extra.py | 10 +- techzdl/logger.py | 2 +- 20 files changed, 898 insertions(+), 936 deletions(-) create mode 100644 DOCS.md create mode 100644 demos/background_download.py delete mode 100644 demos/basic.py create mode 100644 demos/basic_usage.py create mode 100644 demos/force_stoping_download.py delete mode 100644 techzdl/api.py delete mode 100644 techzdl/downloader.py diff --git a/DOCS.md b/DOCS.md new file mode 100644 index 0000000..f435748 --- /dev/null +++ b/DOCS.md @@ -0,0 +1,134 @@ +# TechZDL v1.2.1 Documentation + +## Installation + +You can install TechZDL using pip: + +```sh +pip install techzdl +``` + +To update TechZDL to the latest version, use: + +```sh +pip install --upgrade techzdl +``` + +**Note**: If it doesn't update to the latest version, use: + +```sh +pip install --upgrade --force-reinstall techzdl +``` + +## Usage + +Here's a basic example of how to use the TechZDL package: + +### Basic Usage + +```python +import asyncio +from techzdl import TechZDL + +async def main(): + downloader = TechZDL(url="https://link.testfile.org/bNYZFw") + await downloader.start() + +asyncio.run(main()) +``` + +https://github.com/TechShreyash/techzdl/assets/82265247/33267e71-2b41-4dd1-b306-c87a197a3b57 + +## The TechZDL Class + +You can import it using: + +```python +from techzdl import TechZDL +``` + +### Arguments + +Here is a list of arguments you can pass to the `TechZDL` class to modify your downloading process: + +- `url` `(str)`: URL of the file to download. +- `custom_headers` `(Optional[dict])`: Custom headers to send with the request. Defaults to None. +- `output_dir` `(Union[str, Path])`: Directory where the file will be saved. Defaults to "downloads". +- `filename` `(Optional[str])`: Name to save the file as (including extension). By default, this will be determined automatically. +- `workers` `(Optional[int])`: Number of fixed concurrent download workers. By default, this will be dynamically adjusted based on the download speed. Setting this will disable dynamic worker adjustment. +- `initial_dynamic_workers` `(int)`: Initial number of dynamic workers. Defaults to 2. +- `dynamic_workers_update_interval` `(int)`: Interval in seconds to update dynamic worker count. Defaults to 5. +- `debug` `(bool)`: Enable debug logs. Defaults to True. +- `progress` `(bool)`: Enable download progress display. Defaults to True. +- `progress_callback` `(Optional[Callable[..., Any]])`: Callback function for download progress updates. Can be synchronous. Defaults to None. Setting this disables tqdm progress. +- `progress_args` `(tuple)`: Additional arguments for `progress_callback`. Defaults to (). +- `progress_interval` `(int)`: Time interval for progress updates in seconds. Defaults to 1. +- `chunk_size` `(int)`: Size of each download chunk in bytes. Defaults to 5 MB. +- `single_threaded` `(bool)`: Force single-threaded download. Defaults to False. +- `max_retries` `(int)`: Maximum retries for each chunk/file download. Defaults to 3. +- `timeout` `(int)`: Timeout for each request in seconds. Defaults to 60. + +### Attributes + +- `id` `(str)`: ID of the TechZDL downloader object, uniquely generated at the time of object creation. +- `is_running` `(bool)`: True if the download process is running, else False. + +```python +import asyncio +from techzdl import TechZDL + +async def main(): + downloader = TechZDL(url="https://link.testfile.org/bNYZFw") + print(downloader.id) + print(downloader.is_running) + +asyncio.run(main()) +``` + +> Note: You can access the above attributes as modified attributes of the class object, but modifying them directly is not recommended and may cause issues with the downloader object. + +## Methods + +### TechZDL.start() + +Starts the download process. + +#### Args + +- `in_background` `(bool, optional)`: Run the download process in the background. Defaults to False. + +#### Returns + +- `filepath` `(Path)`: Path to the downloaded file. + +> Note: `Path` here refers to the `Path` object from the `pathlib` library. + +```python +from pathlib import Path +``` + +### TechZDL.stop() + +Forcefully stops the download process. + +### TechZDL.get_file_info() + +Fetches file information from the server. + +#### Returns + +- `dict`: File information in the format `{"filename": str, "total_size": int}`. + + - `filename` `(str)`: Name as returned by the server or determined by the TechZDL package using response headers and download URL. + - `total_size` `(int)`: Total size of the file in bytes. + + + + +## Support + +For inquiries or support, join our [Telegram Support Group](https://telegram.me/TechZBots_Support) or email [techshreyash123@gmail.com](mailto:techshreyash123@gmail.com). + +## Stay Connected + +- Join our [Telegram Channel](https://telegram.me/TechZBots) diff --git a/README.md b/README.md index 8f51239..8a0439c 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,17 @@ -# TechZDL +# TechZDL v1.2.1 TechZDL is a powerful file downloader package for Python that supports multi-threaded downloads, dynamic worker adjustments based on network speed, custom headers, and more. Total Repo Views +## Features + +- **Multi-threaded downloads**: Efficiently download files using multiple threads. +- **Dynamic worker adjustments**: Automatically adjusts the number of workers based on network speed. +- **Custom headers**: Add custom headers to your download requests. +- **Error handling**: Robust error handling and retry mechanisms. +- **Asynchronous support**: Fully asynchronous for non-blocking operations. + ## Installation You can install TechZDL using pip: @@ -18,323 +26,38 @@ To update TechZDL to the latest version, use: pip install --upgrade techzdl ``` -## Usage - -Here's a basic example of how to use TechZDL: - -### Basic Usage - -Code from [demos/basic.py](demos/basic.py) file - -```python -import asyncio -from techzdl.api import TechZDL - - -async def main(): - techzdl = TechZDL() - downloader = techzdl.get_downloader(url="https://link.testfile.org/bNYZFw") - await downloader.start() - - -asyncio.run(main()) -``` - -https://github.com/TechShreyash/techzdl/assets/82265247/33267e71-2b41-4dd1-b306-c87a197a3b57 - -## Configuration Parameters - -The `get_downloader` method accepts several parameters to customize the download process: - -- **url (str)**: URL of the file to download. - -- **custom_headers (Optional[dict], optional)**: Custom headers to send with the request. Defaults to `None`. - -- **output_dir (Union[str, Path], optional)**: Directory where the file will be saved. Defaults to `"downloads"`. - -- **filename (Optional[str], optional)**: Name to save the file as (including extension). By default, this will be determined automatically based on the URL or Content-Disposition header. - -- **workers (Optional[int], optional)**: Number of fixed concurrent download workers. By default, this will be dynamically adjusted based on the download speed. Setting this will disable dynamic worker adjustment. - -> Workers here means the number of parallel connections that will be used to download the file. - -- **initial_dynamic_workers (int, optional)**: Initial number of dynamic workers. Defaults to `2`. - -- **dynamic_workers_update_interval (int, optional)**: Interval in seconds to update the dynamic worker count. Defaults to `5`. - -- **debug (bool, optional)**: Enable debug logs. Defaults to `True`. - -- **progress (bool, optional)**: Enable download progress display. Defaults to `True`. - -- **progress_callback (Optional[Union[Callable[..., Any], Callable[..., Awaitable[Any]]]], optional)**: - Callback function for download progress updates. Can be sync or async. Defaults to `None`. Setting this disables `tqdm` progress. - -- **progress_args (tuple, optional)**: Additional arguments for `progress_callback`. Defaults to `()`. - -- **progress_interval (int, optional)**: Time interval for progress updates in seconds. Defaults to `1`. - -- **chunk_size (int, optional)**: Size of each download chunk in bytes. Defaults to `5 MB`. - -- **single_threaded (bool, optional)**: Force single-threaded download. Defaults to `False`. - -- **max_retries (int, optional)**: Maximum retries for each chunk/file download. Defaults to `3`. - -- **timeout (int, optional)**: Timeout for each request in seconds. Defaults to `60`. - -## Examples / Demos - -### Getting File Info - -Code from [demos/getting_file_info.py](demos/getting_file_info.py) file - -```python -# This script demonstrates how to use the TechZDL package to fetch file information asynchronously. - -import asyncio -from techzdl.api import TechZDL - - -async def main(): - techzdl = TechZDL() - - downloader = techzdl.get_downloader(url="https://link.testfile.org/bNYZFw") - - # Retrieve file information asynchronously - file_info = await downloader.get_file_info() - - # Print the retrieved file information - print(f"Filename: {file_info['filename']}") - print(f"Total Size: {file_info['total_size']} bytes") - - -asyncio.run(main()) -``` - -#### Output - -![image](https://github.com/TechShreyash/techzdl/assets/82265247/01b5e894-eb1c-48be-8ad8-74c8cb4b6349) - -### Setting Custom File And Folder Name - -Code from [demos/setting_custom_file_and_folder_name.py](demos/setting_custom_file_and_folder_name.py) file - -```python -# By specifying the output directory and filename, you can organize your downloads and ensure files are saved with your preferred names. -# This is useful when you need to manage multiple downloads and want to store them in specific locations with specific names. - -import asyncio -from techzdl.api import TechZDL - - -async def main(): - techzdl = TechZDL() - - downloader = techzdl.get_downloader( - url="https://link.testfile.org/bNYZFw", - output_dir="my_files", # Custom directory where the file will be saved - filename="my_video.mp4", # Custom filename for the downloaded file - ) - await downloader.start() - - -asyncio.run(main()) -``` - -#### Output - -![image](https://github.com/TechShreyash/techzdl/assets/82265247/4c1c94d2-7d5a-4031-902b-555e4c80bf32) - -### Custom Headers - -Code from [demos/custom_header.py](demos/custom_header.py) file - -```python -# You can pass custom headers to the downloader by providing a dictionary to the 'custom_headers' parameter of the get_downloader method. -# This is useful when you need to include specific headers such as 'referer' or 'user-agent' to access the resource. - -import asyncio -from techzdl.api import TechZDL - - -async def main(): - techzdl = TechZDL() - headers = { - "referer": "https://testfile.org/", - "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36", - } - downloader = techzdl.get_downloader( - url="https://link.testfile.org/bNYZFw", - custom_headers=headers, # Custom headers for the downloader - ) - await downloader.start() - - -asyncio.run(main()) -``` - -#### Output - -![image](https://github.com/TechShreyash/techzdl/assets/82265247/1de8ed93-2b4d-4f8e-a341-0a236c88dfe8) - -### Fixed No. Of Workers - -Code from [demos/fixed_no_of_workers.py](demos/fixed_no_of_workers.py) file - -```python -# You can set a fixed number of workers for the downloader by passing the 'workers' parameter to the get_downloader method. -# In this context, 'workers' refers to the number of parallel connections that will be used to download the file. -# This is useful when you want to limit the number of connections to the server. -# Note: Setting this parameter will disable dynamic worker adjustments based on download speed. -# For optimal performance, you can omit this parameter and allow the library to automatically determine the number of workers. - -import asyncio -from techzdl.api import TechZDL - - -async def main(): - techzdl = TechZDL() - downloader = techzdl.get_downloader( - url="https://link.testfile.org/bNYZFw", - workers=4, # Fixed number of workers for the downloader - ) - await downloader.start() - - -asyncio.run(main()) -``` - -#### Output - -![image](https://github.com/TechShreyash/techzdl/assets/82265247/87dcfee5-f4b2-4d42-af80-120cb06fdc2f) - -### Custom Progress Callback - -Code from [demos/custom_progress_callback.py](demos/custom_progress_callback.py) file - -```python -# This script demonstrates how to monitor the download progress by providing a custom callback function. -# By setting the 'progress_callback' parameter, the provided function will be called periodically with the current progress. -# This will disable the default progress bar and you can use your own progress bar or any other progress indicator. -# This is useful for updating a UI, logging progress, or executing other actions based on the download status. - - -import asyncio -from techzdl.api import TechZDL - -def progress_callback(description, done, total, arg1, arg2): - print(f"{description}: {done}/{total} bytes downloaded", arg1, arg2) - -async def main(): - techzdl = TechZDL() - - downloader = techzdl.get_downloader( - url="https://link.testfile.org/bNYZFw", # URL of the file to download - progress_callback=progress_callback, # Custom progress callback function - progress_args=("arg1", "arg2"), # Additional arguments to pass to the callback function - progress_interval=2, # Interval in seconds for calling the progress callback - ) - await downloader.start() - -asyncio.run(main()) -``` - -#### Output +**Note**: If it doesn't update to the latest version, use: -![image](https://github.com/TechShreyash/techzdl/assets/82265247/3098eb9b-0e04-45a5-8bd2-5e3ad46e53a6) - -### Timeouts And Max Retries - -Code from [demos/timeout_and_max_retries.py](demos/timeout_and_max_retries.py) file - -```python -# This script demonstrates how to configure the downloader to handle timeouts and retries. -# The 'timeout' parameter sets the maximum time (in seconds) to wait for a server response. -# The 'max_retries' parameter sets the maximum number of retry attempts for each chunk or file download. -# These settings are useful for handling unreliable network conditions or server issues. - -import asyncio -from techzdl.api import TechZDL - - -async def main(): - techzdl = TechZDL() - - downloader = techzdl.get_downloader( - url="https://link.testfile.org/bNYZFw", # URL of the file to download - timeout=30, # Timeout in seconds for each request (default: 60 seconds) - max_retries=5, # Maximum number of retries for each chunk/file download (default: 3) - ) - await downloader.start() - - -asyncio.run(main()) +```sh +pip install --upgrade --force-reinstall techzdl ``` -#### Output - -![image](https://github.com/TechShreyash/techzdl/assets/82265247/87dcfee5-f4b2-4d42-af80-120cb06fdc2f) +## Usage -### Single Threaded Mode +Here's a basic example of how to use the TechZDL package: -Code from [demos/single_threaded_mode.py](demos/single_threaded_mode.py) file +### Basic Usage ```python -# The 'single_threaded' parameter can be set to True to force the downloader to operate with a single connection. -# This is useful when you want to limit resource usage or when the server does not support multiple connections. -# Note that using a single-threaded approach may affect download speed, especially for large files. -# The single-threaded mode is automatically enabled when the 'workers' parameter is set to 1 or when the server does not support range requests. - import asyncio -from techzdl.api import TechZDL - +from techzdl import TechZDL async def main(): - techzdl = TechZDL() - - downloader = techzdl.get_downloader( - url="https://link.testfile.org/bNYZFw", - single_threaded=True, # Enable single-threaded mode - ) + downloader = TechZDL(url="https://link.testfile.org/bNYZFw") await downloader.start() - asyncio.run(main()) ``` -#### Output - -![image](https://github.com/TechShreyash/techzdl/assets/82265247/beb0574c-08e0-4903-92ce-d9202166880f) - -### Disable Debug Logs And Default Progress Bar - -Code from [demos/disable_debug_logs_and_default_progress.py](demos/disable_debug_logs_and_default_progress.py) file - -```python -# Setting 'debug' to False will disable detailed logging, which can be useful to reduce log clutter in production. -# Setting 'progress' to False will disable the tqdm progress bar by techzdl, which can be useful in environments where a progress bar is not needed, such as in automated scripts or background processes. -# Adding custom progress_callback will still work - -import asyncio -from techzdl.api import TechZDL - - -async def main(): - techzdl = TechZDL() - - downloader = techzdl.get_downloader( - url="https://link.testfile.org/bNYZFw", - debug=False, # Disable debug logs - progress=False, # Disable progress display - ) - await downloader.start() +https://github.com/TechShreyash/techzdl/assets/82265247/33267e71-2b41-4dd1-b306-c87a197a3b57 +## More Examples / Demos -asyncio.run(main()) -``` +Check the [demos](demos) folder for more examples and detailed demonstrations of file downloading using the TechZDL package. The demos include more information about the various features of TechZDL and how to use them effectively. -#### Output +## Documentation -![image](https://github.com/TechShreyash/techzdl/assets/82265247/81005cb5-5c46-4fe4-a14a-b8d2d3a52118) +Check [DOCS.md](DOCS.md) for detailed documentation of the TechZDL package. ## License @@ -343,3 +66,11 @@ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file ## Support For inquiries or support, join our [Telegram Support Group](https://telegram.me/TechZBots_Support) or email [techshreyash123@gmail.com](mailto:techshreyash123@gmail.com). + +## Acknowledgements + +Thanks to all contributors and users for their support and feedback. + +## Stay Connected + +- Join our [Telegram Channel](https://telegram.me/TechZBots) diff --git a/demos/background_download.py b/demos/background_download.py new file mode 100644 index 0000000..d905222 --- /dev/null +++ b/demos/background_download.py @@ -0,0 +1,27 @@ +# This includes starting the downloader in the background, performing other tasks, and waiting for the download to finish before showing a message. + +import asyncio +from techzdl import TechZDL + + +async def main(): + downloader = TechZDL(url="https://link.testfile.org/bNYZFw") + + # Start the download process in the background + await downloader.start(in_background=True) + + # Perform other tasks here, run your other code + + # For this demo, let's wait until the download starts, then show a message when it's finished + + await asyncio.sleep(5) # A sleep timeout to let the download start first + + # Check if the download is running + while downloader.is_running: + await asyncio.sleep(1) + + # After the download is finished + print("Downloading Finished") + + +asyncio.run(main()) diff --git a/demos/basic.py b/demos/basic.py deleted file mode 100644 index 841348a..0000000 --- a/demos/basic.py +++ /dev/null @@ -1,11 +0,0 @@ -import asyncio -from techzdl.api import TechZDL - - -async def main(): - techzdl = TechZDL() - downloader = techzdl.get_downloader(url="https://link.testfile.org/bNYZFw") - await downloader.start() - - -asyncio.run(main()) diff --git a/demos/basic_usage.py b/demos/basic_usage.py new file mode 100644 index 0000000..f1c8c3f --- /dev/null +++ b/demos/basic_usage.py @@ -0,0 +1,16 @@ +# This is a demo script to illustrate how to use the TechZDL library for downloading files asynchronously. + +import asyncio +from techzdl import TechZDL + + +async def main(): + # Initialize the downloader with a URL to download the file from + downloader = TechZDL(url="https://link.testfile.org/bNYZFw") + + # Start the download process + await downloader.start() + + +# Run the main function using asyncio +asyncio.run(main()) diff --git a/demos/custom_header.py b/demos/custom_header.py index 004e2d2..4a9cdea 100644 --- a/demos/custom_header.py +++ b/demos/custom_header.py @@ -2,18 +2,18 @@ # This is useful when you need to include specific headers such as 'referer' or 'user-agent' to access the resource. import asyncio -from techzdl.api import TechZDL +from techzdl import TechZDL async def main(): - techzdl = TechZDL() headers = { "referer": "https://testfile.org/", "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36", - } - downloader = techzdl.get_downloader( + } # Custom headers for the downloader + + downloader = TechZDL( url="https://link.testfile.org/bNYZFw", - custom_headers=headers, # Custom headers for the downloader + custom_headers=headers, # Pass custom headers ) await downloader.start() diff --git a/demos/custom_progress_callback.py b/demos/custom_progress_callback.py index 74ff7b7..55bcce3 100644 --- a/demos/custom_progress_callback.py +++ b/demos/custom_progress_callback.py @@ -5,7 +5,7 @@ import asyncio -from techzdl.api import TechZDL +from techzdl import TechZDL def progress_callback(description, done, total, arg1, arg2): @@ -13,9 +13,7 @@ def progress_callback(description, done, total, arg1, arg2): async def main(): - techzdl = TechZDL() - - downloader = techzdl.get_downloader( + downloader = TechZDL( url="https://link.testfile.org/bNYZFw", # URL of the file to download progress_callback=progress_callback, # Custom progress callback function progress_args=( diff --git a/demos/disable_debug_logs_and_default_progress.py b/demos/disable_debug_logs_and_default_progress.py index 31bc9ef..03fb9e3 100644 --- a/demos/disable_debug_logs_and_default_progress.py +++ b/demos/disable_debug_logs_and_default_progress.py @@ -3,13 +3,11 @@ # Adding custom progress_callback will still work import asyncio -from techzdl.api import TechZDL +from techzdl import TechZDL async def main(): - techzdl = TechZDL() - - downloader = techzdl.get_downloader( + downloader = TechZDL( url="https://link.testfile.org/bNYZFw", debug=False, # Disable debug logs progress=False, # Disable progress display diff --git a/demos/fixed_no_of_workers.py b/demos/fixed_no_of_workers.py index 3593400..f10136e 100644 --- a/demos/fixed_no_of_workers.py +++ b/demos/fixed_no_of_workers.py @@ -5,12 +5,11 @@ # For optimal performance, you can omit this parameter and allow the library to automatically determine the number of workers. import asyncio -from techzdl.api import TechZDL +from techzdl import TechZDL async def main(): - techzdl = TechZDL() - downloader = techzdl.get_downloader( + downloader = TechZDL( url="https://link.testfile.org/bNYZFw", workers=4, # Fixed number of workers for the downloader ) diff --git a/demos/force_stoping_download.py b/demos/force_stoping_download.py new file mode 100644 index 0000000..cc16eb1 --- /dev/null +++ b/demos/force_stoping_download.py @@ -0,0 +1,20 @@ +# This includes starting the downloader in the background and stopping it after a specific duration. + +import asyncio +from techzdl import TechZDL + + +async def main(): + downloader = TechZDL(url="https://mp4-download.com/8k-5-MP4") + + # Start the downloader in the background + await downloader.start(in_background=True) + + # Wait for 20 seconds + await asyncio.sleep(20) + + # Stop the downloader + await downloader.stop() + + +asyncio.run(main()) diff --git a/demos/getting_file_info.py b/demos/getting_file_info.py index d2da93e..62ef20a 100644 --- a/demos/getting_file_info.py +++ b/demos/getting_file_info.py @@ -1,13 +1,11 @@ # This script demonstrates how to use the TechZDL package to fetch file information asynchronously. import asyncio -from techzdl.api import TechZDL +from techzdl import TechZDL async def main(): - techzdl = TechZDL() - - downloader = techzdl.get_downloader(url="https://link.testfile.org/bNYZFw") + downloader = TechZDL(url="https://link.testfile.org/bNYZFw") # Retrieve file information asynchronously file_info = await downloader.get_file_info() diff --git a/demos/setting_custom_file_and_folder_name.py b/demos/setting_custom_file_and_folder_name.py index 086bfef..12ed984 100644 --- a/demos/setting_custom_file_and_folder_name.py +++ b/demos/setting_custom_file_and_folder_name.py @@ -2,13 +2,11 @@ # This is useful when you need to manage multiple downloads and want to store them in specific locations with specific names. import asyncio -from techzdl.api import TechZDL +from techzdl import TechZDL async def main(): - techzdl = TechZDL() - - downloader = techzdl.get_downloader( + downloader = TechZDL( url="https://link.testfile.org/bNYZFw", output_dir="my_files", # Custom directory where the file will be saved filename="my_video.mp4", # Custom filename for the downloaded file diff --git a/demos/single_threaded_mode.py b/demos/single_threaded_mode.py index 51c1b71..25bdd21 100644 --- a/demos/single_threaded_mode.py +++ b/demos/single_threaded_mode.py @@ -4,13 +4,11 @@ # The single-threaded mode is automatically enabled when the 'workers' parameter is set to 1 or when the server does not support range requests. import asyncio -from techzdl.api import TechZDL +from techzdl import TechZDL async def main(): - techzdl = TechZDL() - - downloader = techzdl.get_downloader( + downloader = TechZDL( url="https://link.testfile.org/bNYZFw", # URL of the file to download single_threaded=True, # Enable single-threaded mode ) diff --git a/demos/timeout_and_max_retries.py b/demos/timeout_and_max_retries.py index 1992eda..60c11f6 100644 --- a/demos/timeout_and_max_retries.py +++ b/demos/timeout_and_max_retries.py @@ -8,9 +8,7 @@ async def main(): - techzdl = TechZDL() - - downloader = techzdl.get_downloader( + downloader = TechZDL( url="https://link.testfile.org/bNYZFw", # URL of the file to download timeout=30, # Timeout in seconds for each request (default: 60 seconds) max_retries=5, # Maximum number of retries for each chunk/file download (default: 3) diff --git a/setup.py b/setup.py index a06fcc1..7e4326c 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ setup( name="techzdl", - version="1.1.7", + version="1.2.1", author="TechShreyash", author_email="techshreyash123@gmail.com", description="A simple yet powerfull file downloader package for python", diff --git a/techzdl/__init__.py b/techzdl/__init__.py index 306b8f8..bd729e2 100644 --- a/techzdl/__init__.py +++ b/techzdl/__init__.py @@ -1,7 +1,650 @@ # Name: techzdl -# Version: 1.1.7 +# Version: 1.2.1 # Summary: A simple yet powerfull file downloader package for python # Home-page: https://github.com/TechShreyash/techzdl # Author: TechShreyash # Author-email: techshreyash123@gmail.com -# License: MIT \ No newline at end of file +# License: MIT + +import aiohttp +import aiofiles +import asyncio +import inspect +from tqdm import tqdm +from pathlib import Path +from techzdl.extra import ( + change_file_path_if_exist, + get_random_string, + AdjustableSemaphore, + get_filename, +) +from techzdl.logger import Logger +from typing import Callable, Any, Union, Awaitable, Optional +from curl_cffi.requests import AsyncSession + + +class TechZDL: + def __init__( + self, + url: str, + custom_headers: Optional[dict] = None, + output_dir: Union[str, Path] = Path("downloads"), + filename: Optional[str] = None, + workers: Optional[int] = None, + initial_dynamic_workers: int = 2, + dynamic_workers_update_interval: int = 5, + debug: bool = True, + progress: bool = True, + progress_callback: Optional[ + Union[Callable[..., Any], Callable[..., Awaitable[Any]]] + ] = None, + progress_args: tuple = (), + progress_interval: int = 1, + chunk_size: int = 5 * 1024 * 1024, + single_threaded: bool = False, + max_retries: int = 3, + timeout: int = 60, + ) -> None: + """ + Initialize the TechZDL object. + + #### Args: + - `url` `(str)`: URL of the file to download. + - `custom_headers` `(Optional[dict], optional)`: Custom headers to send with the request. Defaults to None. + - `output_dir` `(Union[str, Path], optional)`: Directory where the file will be saved. Defaults to "downloads". + - `filename` `(Optional[str], optional)`: Name to save the file as (including extension). By default, this will be determined automatically. + - `workers` `(Optional[int], optional)`: Number of fixed concurrent download workers. By default, this will be dynamically adjusted based on the download speed. Setting this will disable dynamic worker adjustment. + - `initial_dynamic_workers` `(int, optional)`: Initial number of dynamic workers. Defaults to 2. + - `dynamic_workers_update_interval` `(int, optional)`: Interval in seconds to update dynamic worker count. Defaults to 5. + - `debug` `(bool, optional)`: Enable debug logs. Defaults to True. + - `progress` `(bool, optional)`: Enable download progress display. Defaults to True. + - `progress_callback` `(Optional[Union[Callable[..., Any], Callable[..., Awaitable[Any]]]], optional)`: Callback function for download progress updates. Can be sync or async. Defaults to None. Setting this disables tqdm progress. + - `progress_args` `(tuple, optional)`: Additional arguments for progress_callback. Defaults to (). + - `progress_interval` `(int, optional)`: Time interval for progress updates in seconds. Defaults to 1. + - `chunk_size` `(int, optional)`: Size of each download chunk in bytes. Defaults to 5 MB. + - `single_threaded` `(bool, optional)`: Force single-threaded download. Defaults to False. + - `max_retries` `(int, optional)`: Maximum retries for each chunk/file download. Defaults to 3. + - `timeout` `(int, optional)`: Timeout for each request in seconds. Defaults to 60. + + #### Examples: + ```python + import asyncio + from techzdl import TechZDL + + async def main(): + downloader = TechZDL(url="https://link.testfile.org/bNYZFw") + await downloader.start() + + asyncio.run(main()) + ``` + + For more examples and usage, check: [TechZDL Demos](https://github.com/TechShreyash/techzdl/tree/main/demos) + """ + + self.id = get_random_string(6) + self.url = url + self.custom_headers = custom_headers + self.output_dir = ( + Path(output_dir) if isinstance(output_dir, str) else output_dir + ) + self.output_dir.mkdir(parents=True, exist_ok=True) + self.output_path = None + self.filename = filename + self.workers = workers + self.debug = debug + self.logger = Logger(f"TechZDL - {self.id}") + self.progress = progress + self.progress_callback = progress_callback + self.progress_args = progress_args + self.chunk_size = chunk_size + self.progress_interval = progress_interval + self.single_threaded = single_threaded or workers == 1 + self.is_callback_async = inspect.iscoroutinefunction(progress_callback) + self.dynamic_workers = initial_dynamic_workers + self.dynamic_workers_update_interval = dynamic_workers_update_interval + self.curl_cffi_required = False + self.max_retries = max_retries + self.session = None + self.timeout = timeout + self.is_running = False + self.downloader_tasks = [] + self.temp_file_path = None + + self.logger.info(f"Created TechZ FileDownloader with ID: {self.id} URL: {url}") + + async def start(self, in_background: bool = False) -> Path: + """ + Starts the download process. + + #### Args + + - `in_background` `(bool, optional)`: Run the download process in the background. Defaults to False. + + #### Returns + + - `filepath` `(Path)`: Path to the downloaded file. + + > Note: `Path` here refers to the `Path` object from the `pathlib` library. + + ```python + from pathlib import Path + ``` + """ + if self.is_running: + raise Exception("Download process is already started") + + main_task = asyncio.create_task(self._download_manager()) + self.downloader_tasks.append(main_task) + self.is_running = True + + if not in_background: + return await main_task + + async def stop(self) -> None: + """ + Forcefully stops the download process. + """ + if self.is_running: + for task in self.downloader_tasks: + task.cancel() + + await asyncio.gather(*self.downloader_tasks, return_exceptions=True) + + self._log("Download process stopped", level="warning") + await self._cleanup() + self.is_running = False + else: + self._log( + "Download process is not running! Why are you trying to stop it.", + level="warning", + ) + + async def get_file_info(self) -> dict: + """ + Fetches file information from the server. + + #### Returns + + - `dict`: File information in the format `{"filename": str, "total_size": int}`. + + - `filename` `(str)`: Name as returned by the server or determined by the TechZDL package using response headers and download URL. + - `total_size` `(int)`: Total size of the file in bytes. + """ + for i in range(self.max_retries): + try: + + session = aiohttp.ClientSession( + timeout=aiohttp.ClientTimeout(total=self.timeout) + ) + + self._log(f"Fetching file info from {self.url}") + response = None + try: + response = await session.get( + url=self.url, headers=self.custom_headers + ) + except Exception as e: + raise e + finally: + if response: + response.close() + total_size = int(response.headers.get("Content-Length", 0)) + if total_size == 0: + raise Exception("Content-Length header is missing or invalid") + + filename = get_filename(response.headers, response.url, self.id) + break + except Exception as e: + try: + self._log( + f"Failed to get file info using aiohttp: {e}", level="error" + ) + await session.close() + + session = AsyncSession(timeout=self.timeout) + + response = None + try: + response = await session.get( + url=self.url, headers=self.custom_headers, stream=True + ) + except Exception as e: + raise e + finally: + if response: + response.close() + + total_size = int(response.headers.get("Content-Length", 0)) + if total_size == 0: + raise Exception("Content-Length header is missing or invalid") + + filename = get_filename(response.headers, response.url, self.id) + break + except Exception as e: + self._log(f"Error getting file info: {e}", level="error") + if i == self.max_retries - 1: + await session.close() + raise e + self._log( + f"Retrying getting file info ({i + 1}/{self.max_retries})", + level="warning", + ) + await asyncio.sleep(2**i) # Exponential backoff + + await session.close() + return {"filename": str(filename), "total_size": total_size} + + def _log(self, message: str, level: str = "info") -> None: + """ + Log a message with the specified level. + + Args: + message (str): Message to log. + level (str): Log level ('info', 'warning', 'error'). Defaults to 'info'. + """ + + if level == "warning": + self.logger.warning(message) + elif level == "error": + self.logger.error(message) + elif self.debug: + self.logger.info(message) + + async def _task_runner(self, tasks: list[Awaitable]) -> None: + """ + Run a list of async tasks concurrently, handling exceptions and cancellations. + + Args: + tasks (list[Awaitable]): List of async tasks to run. + """ + + try: + new_tasks = [asyncio.create_task(task) for task in tasks] + tasks = new_tasks + for task in tasks: + self.downloader_tasks.append(task) + + done, pending = await asyncio.wait( + tasks, return_when=asyncio.FIRST_EXCEPTION + ) + for task in tasks: + self.downloader_tasks.remove(task) + + for task in done: + if task.exception(): + for pending_task in pending: + pending_task.cancel() + await asyncio.gather(*pending, return_exceptions=True) + raise task.exception() + except Exception as e: + self._log( + f"Exception raised in task runner: {e}", + level="error", + ) + raise e + + async def _show_progress(self, description: str) -> None: + """ + Show download progress either via a callback or tqdm progress bar. + + Args: + description (str): Description for the progress display. + """ + if self.progress_callback: + while self.size_done < self.total_size: + if self.is_callback_async: + await self.progress_callback( + description, + self.size_done, + self.total_size, + *self.progress_args, + ) + else: + self.progress_callback( + description, + self.size_done, + self.total_size, + *self.progress_args, + ) + await asyncio.sleep(self.progress_interval) + if self.is_callback_async: + await self.progress_callback( + description, self.total_size, self.total_size, *self.progress_args + ) + else: + self.progress_callback( + description, self.total_size, self.total_size, *self.progress_args + ) + else: + if self.progress: + with tqdm( + total=self.total_size, + unit="B", + unit_scale=True, + unit_divisor=1024, + desc=description, + bar_format="{desc}: {percentage:3.0f}% |{bar}| {n_fmt}B/{total_fmt}B [{elapsed}<{remaining}, {rate_fmt}{postfix}]", + ) as pbar: + previous_size = 0 + while self.size_done < self.total_size: + pbar.update(self.size_done - previous_size) + previous_size = self.size_done + await asyncio.sleep(self.progress_interval) + pbar.update(self.total_size - previous_size) + + async def _load_chunk( + self, start: int, end: int, semaphore: AdjustableSemaphore + ) -> None: + """ + Load a chunk of the file. + + Args: + temp_file_path (Path): Path to the temporary file. + start (int): Start byte of the chunk. + end (int): End byte of the chunk. + semaphore (AdjustableSemaphore): Semaphore to control concurrency. + """ + await semaphore.acquire() + try: + for i in range(self.max_retries): + try: + headers = {"Range": f"bytes={start}-{end}"} + if self.custom_headers: + headers.update(self.custom_headers) + + response = None + try: + response = await self.session.get(url=self.url, headers=headers) + chunk = await response.content.read() + except Exception as e: + raise e + finally: + if response: + response.close() + + async with aiofiles.open(self.temp_file_path, "r+b") as file: + await file.seek(start) + await file.write(chunk) + + self.size_done += len(chunk) + break + except Exception as e: + self._log( + f"Error downloading chunk {start}-{end}: {e}", level="error" + ) + if i == self.max_retries - 1: + raise e + self._log( + f"Retrying chunk {start}-{end} ({i + 1}/{self.max_retries})", + level="warning", + ) + await asyncio.sleep(2**i) # Exponential backoff + except asyncio.CancelledError: + pass + except Exception as e: + self._log(f"Failed to download chunk {start}-{end}: {e}", level="error") + raise e + finally: + await semaphore.release() + + async def _temp_file_creator(self, total_chunks: int) -> None: + """ + Create a temporary file with the specified size. + + Args: + temp_file_path (Path): Path to the temporary file. + total_chunks (int): Total number of chunks. + """ + async with aiofiles.open(self.temp_file_path, "wb") as file: + for i in range(total_chunks): + start = i * self.chunk_size + end = min(start + self.chunk_size - 1, self.total_size - 1) + await file.write(b"\0" * (end - start + 1)) + self.size_done += end - start + 1 + + async def _dynamic_worker_updater(self, semaphore: AdjustableSemaphore) -> None: + """ + Dynamically update the number of workers based on download speed. + + Args: + semaphore (AdjustableSemaphore): Semaphore to control concurrency. + """ + prev_downloaded = 0 + prev_speed = 0 + + while True: + if self.size_done >= self.total_size: + break + await asyncio.sleep(self.dynamic_workers_update_interval) + + speed = ( + self.size_done - prev_downloaded + ) / self.dynamic_workers_update_interval + + if speed > prev_speed: + self.dynamic_workers += 2 + await semaphore.set_limit(self.dynamic_workers) + elif speed < prev_speed: + self.dynamic_workers = max(2, self.dynamic_workers - 2) + await semaphore.set_limit(self.dynamic_workers) + + prev_downloaded = self.size_done + prev_speed = speed + + async def _single_threaded_download(self) -> None: + """ + Perform a single-threaded download of the file. + """ + for i in range(self.max_retries): + try: + response = None + if self.curl_cffi_required: + try: + response = await self.session.get( + url=self.url, headers=self.custom_headers, stream=True + ) + async with aiofiles.open(self.output_path, "wb") as output_file: + async for chunk in response.aiter_content(): + await output_file.write(chunk) + self.size_done += len(chunk) + except Exception as e: + raise e + finally: + if response: + response.close() + else: + try: + response = await self.session.get( + self.url, headers=self.custom_headers + ) + async with aiofiles.open(self.output_path, "wb") as output_file: + while chunk := await response.content.read(self.chunk_size): + await output_file.write(chunk) + self.size_done += len(chunk) + except Exception as e: + raise e + finally: + if response: + response.close() + break + except Exception as e: + self._log(f"Error downloading file: {e}", level="error") + if i == self.max_retries - 1: + raise e + self._log( + f"Retrying download ({i + 1}/{self.max_retries})", level="warning" + ) + await asyncio.sleep(2**i) # Exponential backoff + + async def _multi_threaded_download(self) -> None: + """ + Perform a multi-threaded download of the file. + """ + total_chunks = (self.total_size + self.chunk_size - 1) // self.chunk_size + self.temp_file_path = self.output_dir / (self.output_path.stem + ".temp") + self._log( + f"Creating temp file {self.temp_file_path.name} of size {self.total_size} bytes" + ) + + await self._task_runner( + [ + self._temp_file_creator(total_chunks), + self._show_progress("Creating Temp File"), + ] + ) + self.size_done = 0 + + semaphore = AdjustableSemaphore(self.dynamic_workers) + tasks = [] + + for i in range(total_chunks): + start = i * self.chunk_size + end = min(start + self.chunk_size - 1, self.total_size - 1) + task = self._load_chunk(start, end, semaphore) + tasks.append(task) + + self._log(f"Starting download of {self.filename}") + + if self.workers: + self.dynamic_workers = self.workers + await semaphore.set_limit(self.dynamic_workers) + else: + tasks.append(self._dynamic_worker_updater(semaphore)) + + tasks.append(self._show_progress("Downloading")) + + await self._task_runner(tasks) + self.temp_file_path.rename(self.output_path) + self.temp_file_path = None + + async def _cleanup(self) -> None: + if self.is_running and self.output_path: + self.output_path.unlink(missing_ok=True) + + if self.temp_file_path: + self.temp_file_path.unlink(missing_ok=True) + + if self.session: + await self.session.close() + + async def _download_manager(self) -> Path: + try: + self.size_done = 0 + self._log("Initializing download process") + + for i in range(self.max_retries): + + try: + if self.session: + await self.session.close() + self.session = aiohttp.ClientSession( + timeout=aiohttp.ClientTimeout(total=self.timeout) + ) + + self._log(f"Fetching file info from {self.url}") + response = None + try: + response = await self.session.get( + url=self.url, headers=self.custom_headers + ) + except Exception as e: + raise e + finally: + if response: + response.close() + + self.total_size = int(response.headers.get("Content-Length", 0)) + if self.total_size == 0: + raise Exception("Content-Length header is missing or invalid") + + if not self.filename: + self.filename = get_filename( + response.headers, response.url, self.id + ) + accept_ranges = response.headers.get("Accept-Ranges") + break + except Exception as e: + try: + self._log( + f"Failed to get file info using aiohttp: {e}", level="error" + ) + await self.session.close() + + self.session = AsyncSession(timeout=self.timeout) + self.curl_cffi_required = True + + response = None + try: + response = await self.session.get( + url=self.url, headers=self.custom_headers, stream=True + ) + except Exception as e: + raise e + finally: + if response: + response.close() + + self.total_size = int(response.headers.get("Content-Length", 0)) + if self.total_size == 0: + raise Exception( + "Content-Length header is missing or invalid" + ) + + if not self.filename: + self.filename = get_filename( + response.headers, response.url, self.id + ) + accept_ranges = response.headers.get("Accept-Ranges") + break + except Exception as e: + self._log(f"Error getting file info: {e}", level="error") + if i == self.max_retries - 1: + if self.session: + await self.session.close() + raise e + self._log( + f"Retrying getting file info ({i + 1}/{self.max_retries})", + level="warning", + ) + await asyncio.sleep(2**i) # Exponential backoff + + self.output_path = change_file_path_if_exist( + self.output_dir / self.filename + ) + self.filename = self.output_path.name + + if accept_ranges != "bytes" or self.single_threaded: + if accept_ranges != "bytes": + self._log( + "Server does not support range requests. Multi-threaded download not supported.", + level="warning", + ) + self._log("Starting single-threaded download") + self._log(f"Downloading {self.filename}") + + await self._task_runner( + [ + self._single_threaded_download(), + self._show_progress("Downloading"), + ] + ) + else: + self._log( + "Server supports range requests. Starting multi-threaded download" + ) + await self._multi_threaded_download() + + self._log(f"Download completed: {self.filename}") + self.is_running = False + await self.session.close() + + return self.output_path + + except asyncio.CancelledError: + raise asyncio.CancelledError + + except Exception as e: + self._log(f"Error in download process: {e}", level="error") + await self._cleanup() + self.is_running = False + raise e diff --git a/techzdl/api.py b/techzdl/api.py deleted file mode 100644 index 38f9c51..0000000 --- a/techzdl/api.py +++ /dev/null @@ -1,93 +0,0 @@ -from techzdl.downloader import FileDownloader -from pathlib import Path -from techzdl.logger import Logger -from typing import Callable, Any, Union, Awaitable, Optional - - -class TechZDL: - def __init__(self) -> None: - """ - Initialize the TechZDL class. - """ - self.logger = Logger("TechZDL") - - def get_downloader( - self, - url: str, - custom_headers: Optional[dict] = None, - output_dir: Union[str, Path] = Path("downloads"), - filename: Optional[str] = None, - workers: Optional[int] = None, - initial_dynamic_workers: int = 2, - dynamic_workers_update_interval: int = 5, - debug: bool = True, - progress: bool = True, - progress_callback: Optional[ - Union[Callable[..., Any], Callable[..., Awaitable[Any]]] - ] = None, - progress_args: tuple = (), - progress_interval: int = 1, - chunk_size: int = 5 * 1024 * 1024, - single_threaded: bool = False, - max_retries: int = 3, - timeout: int = 60, - ) -> FileDownloader: - """ - Create a FileDownloader object with the specified parameters. - - Args: - url (str): URL of the file to download. - custom_headers (Optional[dict], optional): Custom headers to send with the request. Defaults to None. - output_dir (Union[str, Path], optional): Directory where the file will be saved. Defaults to "downloads". - filename (Optional[str], optional): Name to save the file as (including extension). By default, this will be determined automatically. - workers (Optional[int], optional): Number of fixed concurrent download workers. By default, this will be dynamically changed based on the download speed. Setting this will disable dynamic worker adjustment. - initial_dynamic_workers (int, optional): Initial number of dynamic workers. Defaults to 2. - dynamic_workers_update_interval (int, optional): Interval in seconds to update dynamic worker count. Defaults to 5. - debug (bool, optional): Enable debug logs. Defaults to True. - progress (bool, optional): Enable download progress display. Defaults to True. - progress_callback (Optional[Union[Callable[..., Any], Callable[..., Awaitable[Any]]]], optional): - Callback function for download progress updates. Can be sync or async. Defaults to None. Setting this disables tqdm progress. - progress_args (tuple, optional): Additional arguments for progress_callback. Defaults to (). - progress_interval (int, optional): Time interval for progress updates in seconds. Defaults to 1. - chunk_size (int, optional): Size of each download chunk in bytes. Defaults to 5 MB. - single_threaded (bool, optional): Force single-threaded download. Defaults to False. - max_retries (int, optional): Maximum retries for each chunk/file download. Defaults to 3. - timeout (int, optional): Timeout for each request in seconds. Defaults to 60. - - Returns: - FileDownloader: Instance of FileDownloader configured with the specified parameters. - - Example: - ```python - import asyncio - - async def main(): - techzdl = TechZDL() - downloader = techzdl.get_downloader("https://link.testfile.org/aXCg7h") - await downloader.start() - - asyncio.run(main()) - ``` - """ - if debug: - self.logger.info(f"Creating TechZ FileDownloader with URL: {url}") - - downloader = FileDownloader( - url, - custom_headers, - output_dir, - filename, - workers, - initial_dynamic_workers, - dynamic_workers_update_interval, - debug, - progress, - progress_callback, - progress_args, - progress_interval, - chunk_size, - single_threaded, - max_retries, - timeout, - ) - return downloader diff --git a/techzdl/downloader.py b/techzdl/downloader.py deleted file mode 100644 index a9cb6de..0000000 --- a/techzdl/downloader.py +++ /dev/null @@ -1,494 +0,0 @@ -import aiohttp -import aiofiles -import asyncio -import inspect -from tqdm import tqdm -from pathlib import Path -from techzdl.extra import ( - change_file_path_if_exist, - get_random_string, - AdjustableSemaphore, - get_filename, -) -from techzdl.logger import Logger -from typing import Callable, Any, Union, Awaitable, Optional -from curl_cffi.requests import AsyncSession - - -class FileDownloader: - def __init__( - self, - url: str, - custom_headers: Optional[dict] = None, - output_dir: Union[str, Path] = Path("downloads"), - filename: Optional[str] = None, - workers: Optional[int] = None, - initial_dynamic_workers: int = 2, - dynamic_workers_update_interval: int = 5, - debug: bool = True, - progress: bool = True, - progress_callback: Optional[ - Union[Callable[..., Any], Callable[..., Awaitable[Any]]] - ] = None, - progress_args: tuple = (), - progress_interval: int = 1, - chunk_size: int = 5 * 1024 * 1024, - single_threaded: bool = False, - max_retries: int = 3, - timeout: int = 60, - ) -> None: - """ - Initialize the FileDownloader object. - - Args: - url (str): URL of the file to download. - custom_headers (Optional[dict], optional): Custom headers to send with the request. Defaults to None. - output_dir (Union[str, Path], optional): Directory where the file will be saved. Defaults to "downloads". - filename (Optional[str], optional): Name to save the file as (including extension). By default, this will be determined automatically. - workers (Optional[int], optional): Number of fixed concurrent download workers. By default, this will be dynamically changed based on the download speed. Setting this will disable dynamic worker adjustment. - initial_dynamic_workers (int, optional): Initial number of dynamic workers. Defaults to 2. - dynamic_workers_update_interval (int, optional): Interval in seconds to update dynamic worker count. Defaults to 5. - debug (bool, optional): Enable debug logs. Defaults to True. - progress (bool, optional): Enable download progress display. Defaults to True. - progress_callback (Optional[Union[Callable[..., Any], Callable[..., Awaitable[Any]]]], optional): - Callback function for download progress updates. Can be sync or async. Defaults to None. Setting this disables tqdm progress. - progress_args (tuple, optional): Additional arguments for progress_callback. Defaults to (). - progress_interval (int, optional): Time interval for progress updates in seconds. Defaults to 1. - chunk_size (int, optional): Size of each download chunk in bytes. Defaults to 5 MB. - single_threaded (bool, optional): Force single-threaded download. Defaults to False. - max_retries (int, optional): Maximum retries for each chunk/file download. Defaults to 3. - timeout (int, optional): Timeout for each request in seconds. Defaults to 60. - """ - self.id = get_random_string(6) - self.url = url - self.custom_headers = custom_headers - self.output_dir = ( - Path(output_dir) if isinstance(output_dir, str) else output_dir - ) - self.output_dir.mkdir(parents=True, exist_ok=True) - self.filename = filename - self.workers = workers - self.debug = debug - self.logger = Logger(f"TechZDL - {self.id}") - self.progress = progress - self.progress_callback = progress_callback - self.progress_args = progress_args - self.chunk_size = chunk_size - self.progress_interval = progress_interval - self.single_threaded = single_threaded or workers == 1 - self.is_callback_async = inspect.iscoroutinefunction(progress_callback) - self.dynamic_workers = initial_dynamic_workers - self.dynamic_workers_update_interval = dynamic_workers_update_interval - self.curl_cffi_required = False - self.max_retries = max_retries - self.session = None - self.timeout = timeout - - def log(self, message: str, level: str = "info") -> None: - """ - Log a message with the specified level. - - Args: - message (str): Message to log. - level (str): Log level ('info', 'debug', 'warning', 'error'). Defaults to 'info'. - """ - if level == "info" and self.debug: - self.logger.info(message) - elif level == "warning": - self.logger.warning(message) - elif level == "error": - self.logger.error(message) - - async def task_runner(self, tasks: list[Awaitable]) -> None: - """ - Run a list of async tasks concurrently, handling exceptions and cancellations. - - Args: - tasks (list[Awaitable]): List of async tasks to run. - """ - new_tasks = [asyncio.create_task(task) for task in tasks] - tasks = new_tasks - done, pending = await asyncio.wait(tasks, return_when=asyncio.FIRST_EXCEPTION) - - for task in done: - if task.exception(): - for pending_task in pending: - pending_task.cancel() - await asyncio.gather(*pending, return_exceptions=True) - self.log( - f"Exception raised in task runner: {task.exception()}", - level="error", - ) - raise task.exception() - - async def show_progress(self, description: str) -> None: - """ - Show download progress either via a callback or tqdm progress bar. - - Args: - description (str): Description for the progress display. - """ - if self.progress_callback: - while self.size_done < self.total_size: - if self.is_callback_async: - await self.progress_callback( - description, - self.size_done, - self.total_size, - *self.progress_args, - ) - else: - self.progress_callback( - description, - self.size_done, - self.total_size, - *self.progress_args, - ) - await asyncio.sleep(self.progress_interval) - if self.is_callback_async: - await self.progress_callback( - description, self.total_size, self.total_size, *self.progress_args - ) - else: - self.progress_callback( - description, self.total_size, self.total_size, *self.progress_args - ) - else: - if self.progress: - with tqdm( - total=self.total_size, - unit="B", - unit_scale=True, - unit_divisor=1024, - desc=description, - bar_format="{desc}: {percentage:3.0f}% |{bar}| {n_fmt}B/{total_fmt}B [{elapsed}<{remaining}, {rate_fmt}{postfix}]", - ) as pbar: - previous_size = 0 - while self.size_done < self.total_size: - pbar.update(self.size_done - previous_size) - previous_size = self.size_done - await asyncio.sleep(self.progress_interval) - pbar.update(self.total_size - previous_size) - - async def load_chunk( - self, temp_file_path: Path, start: int, end: int, semaphore: AdjustableSemaphore - ) -> None: - """ - Load a chunk of the file. - - Args: - temp_file_path (Path): Path to the temporary file. - start (int): Start byte of the chunk. - end (int): End byte of the chunk. - semaphore (AdjustableSemaphore): Semaphore to control concurrency. - """ - await semaphore.acquire() - try: - for i in range(self.max_retries): - try: - headers = {"Range": f"bytes={start}-{end}"} - if self.custom_headers: - headers.update(self.custom_headers) - - if self.curl_cffi_required: - response = await self.session.get(url=self.url, headers=headers) - chunk = response.content - else: - response = await self.session.get(url=self.url, headers=headers) - chunk = await response.content.read() - - async with aiofiles.open(temp_file_path, "r+b") as file: - await file.seek(start) - await file.write(chunk) - - self.size_done += len(chunk) - break - except Exception as e: - self.log( - f"Error downloading chunk {start}-{end}: {e}", level="error" - ) - if i == self.max_retries - 1: - raise e - self.log( - f"Retrying chunk {start}-{end} ({i + 1}/{self.max_retries})", - level="warning", - ) - await asyncio.sleep(2**i) # Exponential backoff - except asyncio.CancelledError: - pass - except Exception as e: - self.log(f"Failed to download chunk {start}-{end}: {e}", level="error") - raise e - finally: - await semaphore.release() - - async def temp_file_creator(self, temp_file_path: Path, total_chunks: int) -> None: - """ - Create a temporary file with the specified size. - - Args: - temp_file_path (Path): Path to the temporary file. - total_chunks (int): Total number of chunks. - """ - async with aiofiles.open(temp_file_path, "wb") as file: - for i in range(total_chunks): - start = i * self.chunk_size - end = min(start + self.chunk_size - 1, self.total_size - 1) - await file.write(b"\0" * (end - start + 1)) - self.size_done += end - start + 1 - - async def dynamic_worker_updater(self, semaphore: AdjustableSemaphore) -> None: - """ - Dynamically update the number of workers based on download speed. - - Args: - semaphore (AdjustableSemaphore): Semaphore to control concurrency. - """ - prev_downloaded = 0 - prev_speed = 0 - - while True: - if self.size_done >= self.total_size: - break - await asyncio.sleep(self.dynamic_workers_update_interval) - - speed = ( - self.size_done - prev_downloaded - ) / self.dynamic_workers_update_interval - - if speed > prev_speed: - self.dynamic_workers += 2 - await semaphore.set_limit(self.dynamic_workers) - elif speed < prev_speed: - self.dynamic_workers = max(2, self.dynamic_workers - 2) - await semaphore.set_limit(self.dynamic_workers) - - prev_downloaded = self.size_done - prev_speed = speed - - async def single_threaded_download(self) -> None: - """ - Perform a single-threaded download of the file. - """ - for i in range(self.max_retries): - try: - if self.curl_cffi_required: - response = await self.session.get( - url=self.url, headers=self.custom_headers, stream=True - ) - async with aiofiles.open(self.output_path, "wb") as output_file: - async for chunk in response.aiter_content(): - await output_file.write(chunk) - self.size_done += len(chunk) - else: - response = await self.session.get( - self.url, headers=self.custom_headers - ) - async with aiofiles.open(self.output_path, "wb") as output_file: - while chunk := await response.content.read(self.chunk_size): - await output_file.write(chunk) - self.size_done += len(chunk) - response.close() - break - except Exception as e: - self.log(f"Error downloading file: {e}", level="error") - if i == self.max_retries - 1: - raise e - self.log( - f"Retrying download ({i + 1}/{self.max_retries})", level="warning" - ) - await asyncio.sleep(2**i) # Exponential backoff - - async def multi_threaded_download(self) -> None: - """ - Perform a multi-threaded download of the file. - """ - total_chunks = (self.total_size + self.chunk_size - 1) // self.chunk_size - temp_file_path = self.output_dir / (self.output_path.stem + ".temp") - self.log( - f"Creating temp file {temp_file_path.name} of size {self.total_size} bytes" - ) - - await self.task_runner( - [ - self.temp_file_creator(temp_file_path, total_chunks), - self.show_progress("Creating Temp File"), - ] - ) - self.size_done = 0 - - semaphore = AdjustableSemaphore(self.dynamic_workers) - tasks = [] - - for i in range(total_chunks): - start = i * self.chunk_size - end = min(start + self.chunk_size - 1, self.total_size - 1) - task = self.load_chunk(temp_file_path, start, end, semaphore) - tasks.append(task) - - self.log(f"Starting download of {self.filename}") - - if self.workers: - self.dynamic_workers = self.workers - await semaphore.set_limit(self.dynamic_workers) - else: - tasks.append(self.dynamic_worker_updater(semaphore)) - - tasks.append(self.show_progress("Downloading")) - - await self.task_runner(tasks) - temp_file_path.rename(self.output_path) - - async def get_file_info(self) -> dict: - """ - Get file information from the server. - - Returns: - dict: File information. {"filename": str, "total_size": int} - total_size (int): Total size of the file in bytes. - """ - for i in range(self.max_retries): - try: - - session = aiohttp.ClientSession( - timeout=aiohttp.ClientTimeout(total=self.timeout) - ) - - self.log(f"Fetching file info from {self.url}") - response = await session.get(url=self.url, headers=self.custom_headers) - response.close() - total_size = int(response.headers.get("Content-Length", 0)) - if total_size == 0: - raise Exception("Content-Length header is missing or invalid") - - filename = get_filename(response.headers, response.url, self.id) - break - except Exception as e: - try: - self.log( - f"Failed to get file info using aiohttp: {e}", level="error" - ) - await session.close() - - session = AsyncSession(timeout=self.timeout) - - response = await session.get( - url=self.url, headers=self.custom_headers, stream=True - ) - response.close() - total_size = int(response.headers.get("Content-Length", 0)) - if total_size == 0: - raise Exception("Content-Length header is missing or invalid") - - filename = get_filename(response.headers, response.url, self.id) - break - except Exception as e: - self.log(f"Error getting file info: {e}", level="error") - if i == self.max_retries - 1: - await session.close() - raise e - self.log( - f"Retrying getting file info ({i + 1}/{self.max_retries})", - level="warning", - ) - await asyncio.sleep(2**i) # Exponential backoff - - await session.close() - return {"filename": str(filename), "total_size": total_size} - - async def start(self) -> Path: - """ - Start the download process. - - Returns: - Path: Path to the downloaded file. - """ - self.size_done = 0 - self.log("Initializing download process") - - for i in range(self.max_retries): - try: - if self.session: - await self.session.close() - self.session = aiohttp.ClientSession( - timeout=aiohttp.ClientTimeout(total=self.timeout) - ) - - self.log(f"Fetching file info from {self.url}") - response = await self.session.get( - url=self.url, headers=self.custom_headers - ) - response.close() - self.total_size = int(response.headers.get("Content-Length", 0)) - if self.total_size == 0: - raise Exception("Content-Length header is missing or invalid") - - if not self.filename: - self.filename = get_filename( - response.headers, response.url, self.id - ) - accept_ranges = response.headers.get("Accept-Ranges") - break - except Exception as e: - try: - self.log( - f"Failed to get file info using aiohttp: {e}", level="error" - ) - await self.session.close() - - self.session = AsyncSession(timeout=self.timeout) - self.curl_cffi_required = True - - response = await self.session.get( - url=self.url, headers=self.custom_headers, stream=True - ) - response.close() - self.total_size = int(response.headers.get("Content-Length", 0)) - if self.total_size == 0: - raise Exception("Content-Length header is missing or invalid") - - if not self.filename: - self.filename = get_filename( - response.headers, response.url, self.id - ) - accept_ranges = response.headers.get("Accept-Ranges") - break - except Exception as e: - self.log(f"Error getting file info: {e}", level="error") - if i == self.max_retries - 1: - await self.session.close() - raise e - self.log( - f"Retrying getting file info ({i + 1}/{self.max_retries})", - level="warning", - ) - await asyncio.sleep(2**i) # Exponential backoff - - self.output_path = change_file_path_if_exist(self.output_dir / self.filename) - self.filename = self.output_path.name - - if accept_ranges != "bytes" or self.single_threaded: - if accept_ranges != "bytes": - self.log( - "Server does not support range requests. Multi-threaded download not supported.", - level="warning", - ) - self.log("Starting single-threaded download") - self.log(f"Downloading {self.filename}") - - if self.progress: - await self.task_runner( - [ - self.single_threaded_download(), - self.show_progress("Downloading"), - ] - ) - else: - await self.single_threaded_download() - else: - self.log("Server supports range requests. Starting multi-threaded download") - await self.multi_threaded_download() - - self.log(f"Download completed: {self.filename}") - await self.session.close() - return self.output_path diff --git a/techzdl/extra.py b/techzdl/extra.py index 4e2f331..67d5cc5 100644 --- a/techzdl/extra.py +++ b/techzdl/extra.py @@ -1,7 +1,7 @@ import string import random import re -from pathlib import Path,PurePath +from pathlib import Path, PurePath import asyncio import re import urllib.parse @@ -83,11 +83,13 @@ async def set_limit(self, new_limit: int): self._value += new_limit - self._value self._condition.notify_all() + def sanitize_filename(filename): """ Replace invalid characters in filenames with an underscore. """ - return re.sub(r'[<>:"/\\|?*]', '_', filename) + return re.sub(r'[<>:"/\\|?*]', "_", filename) + def parse_content_disposition(content_disposition: str) -> Optional[str]: """ @@ -146,5 +148,5 @@ def get_filename(headers: Dict[str, str], url: str, id: str) -> str: else: filename = id - filename= filename.strip().replace("/", "_") - return PurePath(sanitize_filename(filename)) + filename = filename.strip().replace("/", "_") + return PurePath(sanitize_filename(filename)) \ No newline at end of file diff --git a/techzdl/logger.py b/techzdl/logger.py index e28e0ef..c3495d9 100644 --- a/techzdl/logger.py +++ b/techzdl/logger.py @@ -5,7 +5,7 @@ class Logger: def __init__(self, name, level=logging.DEBUG): self.logger = logging.getLogger(name) self.logger.setLevel(level) - self.formatter = logging.Formatter("%(name)s - %(message)s") + self.formatter = logging.Formatter("%(name)s - %(levelname)s - %(message)s") # StreamHandler for console output self.stream_handler = logging.StreamHandler()