-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathimage_processing.py
162 lines (129 loc) · 8.16 KB
/
image_processing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
import os
import json
import subprocess
import concurrent.futures
from typing import List, Tuple
from file_utils import estimate_scale_factor
def get_image_info(ffprobe_path: str, input_file: str, verbose: bool) -> Tuple[int, int, int]:
"""Get information about an image file using FFprobe."""
# Suppress the verbose output for getting info
# if verbose:
# print(f"Getting info for {input_file}...") # Verbose output for debugging
cmd = [
ffprobe_path,
"-v", "quiet", # Suppress unnecessary output
"-print_format", "json", # Output in JSON format
"-show_format", # Show format information
"-show_streams", # Show stream information
input_file # The input image file
]
result = subprocess.run(cmd, capture_output=True, text=True) # Run the command
data = json.loads(result.stdout) # Parse the JSON output
# Extract width, height, and size from the parsed data
width = int(data['streams'][0]['width'])
height = int(data['streams'][0]['height'])
size = int(data['format']['size'])
return width, height, size # Return the extracted information
def process_file_sizes(input_folder: str, target_size: int, verbose: bool) -> List[str]:
"""Check file sizes and return a list of files that need processing."""
image_files = [os.path.join(input_folder, f) for f in os.listdir(input_folder) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif'))]
files_to_process = [] # List to store files that need processing
for file in image_files:
try:
current_size = os.path.getsize(file) # Get the current size of the file
except OSError as e:
print(f"Error accessing {file}: {e}") # Log the error
current_size = 0 # Set current_size to 0 if there's an error
continue # Skip this file if there's an error
if current_size > target_size: # Only add files that exceed the target size
files_to_process.append(file)
elif verbose:
print(f"Skipping {file}: current size {current_size} bytes is within target size {target_size} bytes.")
if verbose:
print(f"\nFiles to process: {len(files_to_process)} out of {len(image_files)} total files.") # Summary of files to process
return files_to_process # Return the list of files that need processing
def process_file(ffprobe_path: str, input_file: str, target_size: int, verbose: bool) -> Tuple[str, Tuple[int, int], str, int]:
"""Process a single image file to determine its dimensions based on target file size."""
# Get the current size of the file
current_size = os.path.getsize(input_file)
# Check if the current size is already within the target size limit
if current_size <= target_size:
if verbose:
print(f"Skipping {input_file}: current size {current_size} bytes is within target size {target_size} bytes.")
return input_file, (0, 0), os.path.join("out", f"resized_{os.path.basename(input_file)}"), target_size # Return without processing
# Get image info to calculate new dimensions
width, height, _ = get_image_info(ffprobe_path, input_file, verbose) # Get image info
# Set initial dimensions
initial_width = int(width * .95)
initial_height = int(height * .95)
# Calculate the target dimensions based on the target file size
target_bytes = target_size
# Return the initial dimensions for resizing
output_file = os.path.join("out", f"resized_{os.path.basename(input_file)}") # Define output file path
return input_file, (initial_width, initial_height), output_file, target_size # Return input file, initial dimensions, output file path, and target size
def probe(ffprobe_path: str, files_to_process: List[str], target_size: int, verbose: bool) -> List[Tuple[str, Tuple[int, int], str, int]]:
"""Probe all image files that need processing to determine their resizing parameters."""
results = [] # Initialize a list to store results
for file in files_to_process:
if verbose:
print(f"Getting info for {file}...") # Verbose output for debugging
width, height, _ = get_image_info(ffprobe_path, file, verbose) # Get image info
current_size = os.path.getsize(file) # Get current size
scale_factor = estimate_scale_factor(current_size, target_size) # Calculate scale factor
output_file = os.path.join("out", f"resized_{os.path.basename(file)}") # Define output file path
# Calculate new dimensions based on scale factor
new_dimensions = (int(width * scale_factor), int(height * scale_factor))
if verbose:
print(f"Processing {file}:")
print(f" Current size: {current_size} bytes") # Log current size
print(f" Target size: {target_size} bytes") # Log target size
print(f" Scale factor = {scale_factor}") # Log scale factor
print(f" Output = {output_file}") # Verbose output
results.append((file, new_dimensions, output_file, target_size)) # Append results
return results # Return the list of results
def resize_image(ffmpeg_path: str, input_file: str, output_file: str, new_dimensions: Tuple[int, int], target_size: int, verbose: bool) -> Tuple[str, int, int, float, int]:
"""Resize the image to the target size."""
# Validate dimensions
if new_dimensions[0] <= 0 or new_dimensions[1] <= 0:
print(f"Invalid dimensions {new_dimensions} for {input_file}. Skipping resizing.")
return output_file, os.path.getsize(input_file), os.path.getsize(output_file), 0, 0 # Return without resizing
cmd = [
ffmpeg_path,
"-i", input_file,
"-vf", f"scale={new_dimensions[0]}:{new_dimensions[1]}",
"-y",
output_file
]
if verbose:
print(f"Resizing {input_file} to {output_file} with dimensions {new_dimensions}...")
subprocess.run(cmd, check=True, capture_output=True)
initial_size = os.path.getsize(input_file)
final_size = os.path.getsize(output_file)
compression_ratio = initial_size / final_size if final_size != 0 else 0
iteration_count = 1 # Initialize iteration count
# Adjust the weight difference calculation
weight_difference = abs(initial_size - target_size)
adjustment_factor = 0.9 # Factor to reduce the weight difference impact
new_dimensions = (
int(new_dimensions[0] * (1 - adjustment_factor * (weight_difference / initial_size))),
int(new_dimensions[1] * (1 - adjustment_factor * (weight_difference / initial_size)))
)
# Print the number of iterations
print(f"Iteration {iteration_count}: Resized {input_file} to {output_file} with dimensions {new_dimensions}.")
return output_file, initial_size, final_size, compression_ratio, iteration_count
def process_images(ffmpeg_path: str, probe_results: List[Tuple[str, Tuple[int, int], str, int]], verbose: bool) -> None:
"""Process all images by resizing them based on the probe results."""
os.makedirs("out", exist_ok=True) # Ensure the output directory exists
compression_data = [] # List to store compression results
with concurrent.futures.ThreadPoolExecutor() as executor: # Use a thread pool for parallel processing
# Submit tasks to resize each image
futures = [executor.submit(resize_image, ffmpeg_path, input_file, output_file, new_dimensions, target_size, verbose)
for input_file, new_dimensions, output_file, target_size in probe_results] # Include target_size
for future in concurrent.futures.as_completed(futures):
output_file, initial_size, final_size, compression_ratio, iteration_count = future.result() # Get results
compression_data.append((os.path.basename(output_file), initial_size, final_size, compression_ratio, iteration_count)) # Store results
# Print summary of compression results
print("\nCompression Summary:")
for file_name, initial_size, final_size, compression_ratio, iteration_count in compression_data:
print(f"{file_name}: Initial size: {initial_size} bytes, Final size: {final_size} bytes, Compression ratio: {compression_ratio:.2f}, Iterations: {iteration_count}")
print("Image processing completed.") # Indicate completion