forked from NisaarAgharia/AI-Shorts-Creator
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathautocropper.py
371 lines (282 loc) · 21.3 KB
/
autocropper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
# -*- coding: utf-8 -*-
"""AutoCropper_15May.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1NX3nKLD4ik3syVKj33B-Nl8aZUeOLG_A
Cell 1: Installing necessary libraries
"""
!pip install pytube
!pip install opencv-python
!pip install openai
!pip install youtube-transcript-api
"""Cell 2: Importing libraries and setting OpenAI API key"""
from pytube import YouTube
import cv2
import subprocess
import openai
import numpy as np
import json
import math
import pdb
from youtube_transcript_api import YouTubeTranscriptApi
openai.api_key = '' # Replace with your actual OpenAI API key
"""Cell 3: Download YouTube Video function"""
def download_video(url, filename):
yt = YouTube(url)
video = yt.streams.filter(file_extension='mp4').first()
# Download the video
video.download(filename=filename)
#Segment Video function
def segment_video(response):
for i, segment in enumerate(response):
start_time = math.floor(float(segment.get("start_time", 0)))
end_time = math.ceil(float(segment.get("end_time", 0))) + 2
output_file = f"output{str(i).zfill(3)}.mp4"
command = f"ffmpeg -i input_video.mp4 -ss {start_time} -to {end_time} -c copy {output_file}"
subprocess.call(command, shell=True)
#Face Detection function
def detect_faces(video_file):
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
# Load the video
cap = cv2.VideoCapture(video_file)
faces = []
# Detect and store unique faces
while len(faces) < 5:
ret, frame = cap.read()
if ret:
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
detected_faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))
# Iterate through the detected faces
for face in detected_faces:
# Check if the face is already in the list of faces
if not any(np.array_equal(face, f) for f in faces):
faces.append(face)
# Print the number of unique faces detected so far
print(f"Number of unique faces detected: {len(faces)}")
# Release the video capture object
cap.release()
# If faces detected, return the list of faces
if len(faces) > 0:
return faces
# If no faces detected, return None
return None
#Crop Video function
import cv2
import cv2
def crop_video(faces, input_file, output_file):
try:
if len(faces) > 0:
# Constants for cropping
CROP_RATIO = 0.9 # Adjust the ratio to control how much of the face is visible in the cropped video
VERTICAL_RATIO = 9 / 16 # Aspect ratio for the vertical video
# Read the input video
cap = cv2.VideoCapture(input_file)
# Get the frame dimensions
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
# Calculate the target width and height for cropping (vertical format)
target_height = int(frame_height * CROP_RATIO)
target_width = int(target_height * VERTICAL_RATIO)
# Create a VideoWriter object to save the output video
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
output_video = cv2.VideoWriter(output_file, fourcc, 30.0, (target_width, target_height))
# Loop through each frame of the input video
while True:
ret, frame = cap.read()
# If no more frames, break out of the loop
if not ret:
break
# Iterate through each detected face
for face in faces:
# Unpack the face coordinates
x, y, w, h = face
# Calculate the crop coordinates
crop_x = max(0, x + (w - target_width) // 2) # Adjust the crop region to center the face
crop_y = max(0, y + (h - target_height) // 2)
crop_x2 = min(crop_x + target_width, frame_width)
crop_y2 = min(crop_y + target_height, frame_height)
# Crop the frame based on the calculated crop coordinates
cropped_frame = frame[crop_y:crop_y2, crop_x:crop_x2]
# Resize the cropped frame to the target dimensions
resized_frame = cv2.resize(cropped_frame, (target_width, target_height))
# Write the resized frame to the output video
output_video.write(resized_frame)
# Release the input and output video objects
cap.release()
output_video.release()
print("Video cropped successfully.")
else:
print("No faces detected in the video.")
except Exception as e:
print(f"Error during video cropping: {str(e)}")
def crop_video2(faces, input_file, output_file):
try:
if len(faces) > 0:
# Constants for cropping
CROP_RATIO = 0.9 # Adjust the ratio to control how much of the face is visible in the cropped video
VERTICAL_RATIO = 9 / 16 # Aspect ratio for the vertical video
BATCH_DURATION = 5 # Duration of each batch in seconds
# Read the input video
cap = cv2.VideoCapture(input_file)
# Get the frame dimensions
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
# Calculate the target width and height for cropping (vertical format)
target_height = int(frame_height * CROP_RATIO)
target_width = int(target_height * VERTICAL_RATIO)
# Calculate the number of frames per batch
frames_per_batch = int(cap.get(cv2.CAP_PROP_FPS) * BATCH_DURATION)
# Create a VideoWriter object to save the output video
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
output_video = cv2.VideoWriter(output_file, fourcc, 30.0, (target_width, target_height))
# Loop through each batch of frames
while True:
ret, frame = cap.read()
# If no more frames, break out of the loop
if not ret:
break
frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) # Convert frame to BGR color format
# Iterate through each detected face
for face in faces:
# Unpack the face coordinates
x, y, w, h = face
# Calculate the crop coordinates
crop_x = max(0, x + (w - target_width) // 2) # Adjust the crop region to center the face
crop_y = max(0, y + (h - target_height) // 2)
crop_x2 = min(crop_x + target_width, frame_width)
crop_y2 = min(crop_y + target_height, frame_height)
# Crop the frame based on the calculated crop coordinates
cropped_frame = frame[crop_y:crop_y2, crop_x:crop_x2]
# Resize the cropped frame to the target dimensions
resized_frame = cv2.resize(cropped_frame, (target_width, target_height))
# Write the resized frame to the output video
output_video.write(resized_frame)
# Check if the current frame index is divisible by frames_per_batch
if cap.get(cv2.CAP_PROP_POS_FRAMES) % frames_per_batch == 0:
# Analyze the lip movement or facial muscle activity within the batch
is_talking = is_talking_in_batch(resized_frame)
# Adjust the focus based on the speaking activity
adjust_focus(is_talking)
# Release the input and output video objects
cap.release()
output_video.release()
print("Video cropped successfully.")
else:
print("No faces detected in the video.")
except Exception as e:
print(f"Error during video cropping: {str(e)}")
def is_talking_in_batch(frames):
# Calculate the motion between consecutive frames
motion_scores = []
for i in range(len(frames) - 1):
frame1 = frames[i]
frame2 = frames[i+1]
motion_score = calculate_motion_score(frame1, frame2) # Replace with your motion analysis function
motion_scores.append(motion_score)
# Determine if talking behavior is present based on motion scores
threshold = 0.5 # Adjust the threshold as needed
talking = any(score > threshold for score in motion_scores)
return talking
def calculate_motion_score(frame1, frame2):
# Convert frames to grayscale
gray1 = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY)
gray2 = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY)
# Calculate dense optical flow
flow = cv2.calcOpticalFlowFarneback(gray1, gray2, None, 0.5, 3, 15, 3, 5, 1.2, 0)
# Calculate magnitude of optical flow vectors
magnitude = np.sqrt(flow[..., 0] ** 2 + flow[..., 1] ** 2)
# Calculate motion score as the average magnitude of optical flow vectors
motion_score = np.mean(magnitude)
return motion_score
def adjust_focus(frame, talking):
if talking:
# Apply visual effects or adjustments to emphasize the speaker
# For example, you can add a bounding box or overlay text on the frame
# indicating the speaker is talking
# You can also experiment with resizing or positioning the frame to
# focus on the talking person
# Example: Draw a bounding box around the face region
face_coordinates = get_face_coordinates(frame) # Replace with your face detection logic
if face_coordinates is not None:
x, y, w, h = face_coordinates
cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
return frame
def get_face_coordinates(frame):
# Load the pre-trained Haar cascade classifier for face detection
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
# Convert frame to grayscale for face detection
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# Detect faces in the frame
faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))
if len(faces) > 0:
# Return the coordinates of the first detected face
x, y, w, h = faces[0]
return x, y, w, h
# If no face detected, return None
return None
def get_transcript(video_id):
# Get the transcript for the given YouTube video ID
transcript = YouTubeTranscriptApi.get_transcript(video_id)
# Format the transcript for feeding into GPT-4
formatted_transcript = ''
for entry in transcript:
start_time = "{:.2f}".format(entry['start'])
end_time = "{:.2f}".format(entry['start'] + entry['duration'])
text = entry['text']
formatted_transcript += f"{start_time} --> {end_time} : {text}\n"
return transcript
#Analyze transcript with GPT-3 function
response_obj='''[
{
"start_time": 97.19,
"end_time": 127.43,
"description": "Spoken Text here"
"duration":36 #Length in seconds
},
{
"start_time": 169.58,
"end_time": 199.10,
"description": "Spoken Text here"
"duration":33
},
]'''
def analyze_transcript(transcript):
prompt = f"This is a transcript of a video. Please identify the 3 most viral sections from the whole, make sure they are more than 30 seconds in duration,Make Sure you provide extremely accurate timestamps respond only in this format {response_obj} \n Here is the Transcription:\n{transcript}"
messages = [
{"role": "system", "content": "You are a ViralGPT helpful assistant. You are master at reading youtube transcripts and identifying the most Interesting and Viral Content"},
{"role": "user", "content": prompt}
]
response = openai.ChatCompletion.create(
model="gpt-4",
messages=messages,
max_tokens=512,
n=1,
stop=None
)
return response.choices[0]['message']
"""Main function and execution"""
interseting_seg='''[{'text': 'happiness through Curiosity on Dr', 'start': 0.0, 'duration': 4.82}, {'text': 'eclipse', 'start': 2.28, 'duration': 2.54}, {'text': 'little rookie question for you okay and', 'start': 6.899, 'duration': 4.021}, {'text': "I'm asking this on behalf of mainstream", 'start': 9.24, 'duration': 3.6}, {'text': 'media how do you feel when you see', 'start': 10.92, 'duration': 5.4}, {'text': 'movies like pathan or tiger or any', 'start': 12.84, 'duration': 5.939}, {'text': "Indian I think we haven't got the art of", 'start': 16.32, 'duration': 4.5}, {'text': 'doing those movies you think they can be', 'start': 18.779, 'duration': 4.321}, {'text': 'done better oh yes I mean they can be', 'start': 20.82, 'duration': 3.42}, {'text': 'realistic', 'start': 23.1, 'duration': 3.12}, {'text': "okay we're not realistic what you see", 'start': 24.24, 'duration': 4.32}, {'text': 'what is not realistic about them huh', 'start': 26.22, 'duration': 4.219}, {'text': "it's not realistic", 'start': 28.56, 'duration': 4.38}, {'text': "you're trying to make a James Bond movie", 'start': 30.439, 'duration': 5.741}, {'text': 'which is also not realistic okay', 'start': 32.94, 'duration': 5.88}, {'text': 'then you have this story of the isi girl', 'start': 36.18, 'duration': 4.74}, {'text': 'in the raw man', 'start': 38.82, 'duration': 4.86}, {'text': 'living happily ever after I mean', 'start': 40.92, 'duration': 4.639}, {'text': 'take a break', 'start': 43.68, 'duration': 7.08}, {'text': 'has that ever happened not really right', 'start': 45.559, 'duration': 7.48}, {'text': 'no the whole atmospherics of the whole', 'start': 50.76, 'duration': 3.54}, {'text': 'thing you know', 'start': 53.039, 'duration': 3.36}, {'text': "I haven't seen batana and I won't see it", 'start': 54.3, 'duration': 5.099}, {'text': "because I don't think it is an accurate", 'start': 56.399, 'duration': 4.98}, {'text': "depiction it's not an accurate I'm not", 'start': 59.399, 'duration': 4.941}, {'text': 'going to waste my time', 'start': 61.379, 'duration': 2.961}, {'text': 'and I laughed and I enjoyed that because', 'start': 65.18, 'duration': 6.28}, {'text': 'it was so quaint', 'start': 68.04, 'duration': 5.7}, {'text': 'not because it was defeating anything', 'start': 71.46, 'duration': 3.659}, {'text': 'yeah', 'start': 73.74, 'duration': 5.4}, {'text': 'like you had that other movie of um', 'start': 75.119, 'duration': 7.5}, {'text': 'war that they can no this was this', 'start': 79.14, 'duration': 5.82}, {'text': 'fellow Salman Khan going under a tunnel', 'start': 82.619, 'duration': 5.281}, {'text': 'into Pakistan to deliver a girl who had', 'start': 84.96, 'duration': 4.88}, {'text': 'got legendary', 'start': 87.9, 'duration': 4.14}, {'text': 'but whatever', 'start': 89.84, 'duration': 4.86}, {'text': 'I mean', 'start': 92.04, 'duration': 2.66}, {'text': 'could I exaggerated okay this is not you', 'start': 95.46, 'duration': 5.4}, {'text': 'have to have entertainment which is fun', 'start': 99.0, 'duration': 4.079}, {'text': 'and realistic you should see that movie', 'start': 100.86, 'duration': 3.36}, {'text': 'The', 'start': 103.079, 'duration': 4.86}, {'text': 'Bridge of spies hey that is a real movie', 'start': 104.22, 'duration': 6.78}, {'text': 'okay that is how real spy movies are', 'start': 107.939, 'duration': 5.521}, {'text': 'made what does a real spy movie', 'start': 111.0, 'duration': 5.46}, {'text': 'constitute it means dealing with actual', 'start': 113.46, 'duration': 5.64}, {'text': 'facts no no blonde round no nothing', 'start': 116.46, 'duration': 4.74}, {'text': "around it's okay living a lonely life", 'start': 119.1, 'duration': 4.799}, {'text': "you're living on by yourself living your", 'start': 121.2, 'duration': 6.0}, {'text': 'cover story he able uh', 'start': 123.899, 'duration': 5.821}, {'text': 'with goldfish was actually a notice so', 'start': 127.2, 'duration': 3.839}, {'text': 'he was doing paintings he used to make', 'start': 129.72, 'duration': 3.78}, {'text': 'him make money out of it and but he was', 'start': 131.039, 'duration': 5.161}, {'text': 'doing this other job also so running is', 'start': 133.5, 'duration': 5.099}, {'text': 'espionage ring', 'start': 136.2, 'duration': 4.92}, {'text': 'and they show all that how a documents', 'start': 138.599, 'duration': 5.22}, {'text': 'are exchanged or document information is', 'start': 141.12, 'duration': 4.86}, {'text': 'exchanged you have things called letter', 'start': 143.819, 'duration': 5.941}, {'text': 'dead letter boxes a dead letter box in', 'start': 145.98, 'duration': 7.2}, {'text': 'in Espionage is a place it could be a', 'start': 149.76, 'duration': 6.42}, {'text': "book let's say or or that statue I put", 'start': 153.18, 'duration': 6.48}, {'text': 'my UBS under it', 'start': 156.18, 'duration': 5.279}, {'text': 'and leave it', 'start': 159.66, 'duration': 5.46}, {'text': 'and leave a sign outside on some tree or', 'start': 161.459, 'duration': 4.801}, {'text': 'a wall', 'start': 165.12, 'duration': 5.759}, {'text': "that I've I've fed the the dead litter", 'start': 166.26, 'duration': 6.42}, {'text': 'box okay so the other chap comes and', 'start': 170.879, 'duration': 3.661}, {'text': 'picks it up and takes it away the two', 'start': 172.68, 'duration': 4.26}, {'text': 'never meet based on the true nature of', 'start': 174.54, 'duration': 3.72}, {'text': 'espionage', 'start': 176.94, 'duration': 4.2}, {'text': "which Indian actor's style would be best", 'start': 178.26, 'duration': 7.259}, {'text': 'suited to portray the character of a spy', 'start': 181.14, 'duration': 6.84}, {'text': 'you know I I saw um', 'start': 185.519, 'duration': 4.921}, {'text': 'three three three actors were three or', 'start': 187.98, 'duration': 4.679}, {'text': 'four actors were very good this kind of', 'start': 190.44, 'duration': 3.299}, {'text': 'a thing', 'start': 192.659, 'duration': 3.901}, {'text': 'who could fit into these kind Sorrows', 'start': 193.739, 'duration': 6.481}, {'text': 'not giving any order of preference but', 'start': 196.56, 'duration': 7.02}, {'text': 'I like nawazuddin Siddiqui I used to', 'start': 200.22, 'duration': 5.599}, {'text': 'like Imran Khan', 'start': 203.58, 'duration': 4.439}, {'text': 'Irfan Khan sorry', 'start': 205.819, 'duration': 6.28}, {'text': 'and he was he was a consummate actor', 'start': 208.019, 'duration': 8.821}, {'text': 'Anup anupam care and', 'start': 212.099, 'duration': 8.241}, {'text': 'these two actors', 'start': 216.84, 'duration': 3.5}, {'text': 'the one who played family man um', 'start': 220.62, 'duration': 6.96}, {'text': 'very good okay they could fit into the', 'start': 224.84, 'duration': 8.02}, {'text': 'room and Mishra pankaj Mishra foreign', 'start': 227.58, 'duration': 5.28}, {'text': '[Music]', 'start': 233.72, 'duration': 3.11}, {'text': "spy all right it's a cold war story", 'start': 259.699, 'duration': 6.461}, {'text': "about the the it's actually based on", 'start': 263.52, 'duration': 5.179}, {'text': 'this Cambridge 5.', 'start': 266.16, 'duration': 5.9}, {'text': 'you know the Cambridge five those', 'start': 268.699, 'duration': 6.341}, {'text': 'Kim philby and others who were spying', 'start': 272.06, 'duration': 6.1}, {'text': 'for who were actually with the MI6 but', 'start': 275.04, 'duration': 6.0}, {'text': 'it was actually a KGB agent okay the', 'start': 278.16, 'duration': 5.58}, {'text': 'real mole and he would have been Chief', 'start': 281.04, 'duration': 4.08}, {'text': 'maybe one day', 'start': 283.74, 'duration': 4.08}, {'text': 'at the not been caught out', 'start': 285.12, 'duration': 7.579}, {'text': 'so on that is made a novel Tinker spy', 'start': 287.82, 'duration': 7.26}, {'text': "it's beautifully done the book is", 'start': 292.699, 'duration': 6.241}, {'text': 'marvelous and the acting and the', 'start': 295.08, 'duration': 3.86}, {'text': 'you should watch it okay and watch this', 'start': 302.78, 'duration': 6.04}, {'text': 'uh Bridge of spies if you enjoyed this', 'start': 305.88, 'duration': 5.9}, {'text': 'video subscribe TRS clips for more', 'start': 308.82, 'duration': 15.86}, {'text': '[Music]', 'start': 311.78, 'duration': 15.33}, {'text': 'thank you', 'start': 324.68, 'duration': 8.55}, {'text': '[Music]', 'start': 327.11, 'duration': 6.12}]''';
def main():
video_id='92nse3cvG_Y'
url = 'https://www.youtube.com/watch?v='+video_id # Replace with your video's URL
filename = 'input_video.mp4'
download_video(url,filename)
transcript = get_transcript(video_id)
print(transcript)
interesting_segment = analyze_transcript(transcript)
print(interesting_segment)
content = interesting_segment["content"]
parsed_content = json.loads(content)
print(parsed_content)
#pdb.set_trace()
segment_video(parsed_content)
# Loop through each segment
for i in range(0, 3): # Replace 3 with the actual number of segments
input_file = f'output{str(i).zfill(3)}.mp4'
output_file = f'output_cropped{str(i).zfill(3)}.mp4'
faces = detect_faces(input_file)
crop_video(faces, input_file, output_file)
# Assume you have a way to get the transcript. This is not shown here.
# Replace with actual transcript
# Run the main function
main()