Skip to content

Commit

Permalink
Hey, we got a UI now
Browse files Browse the repository at this point in the history
Got a gradio UI, thanks Opus. Is partially broken, but its something
  • Loading branch information
rmusser01 committed May 9, 2024
1 parent 0aa1d9d commit 8f4f5be
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 7 deletions.
Binary file modified .gitignore
Binary file not shown.
7 changes: 4 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ Original: `YouTube contains an incredible amount of knowledge, much of which is
* `python summarize.py https://www.youtube.com/watch?v=4nd1CDZP21s`
- **Download Audio+Video from URL -> Transcribe audio from Video:**
* `python summarize.py -v https://www.youtube.com/watch?v=4nd1CDZP21s`
- **Download Audio only from URL -> Transcribe audio -> Summarize using (`anthropic`/`cohere`/`openai`/`llama` i.e. llama.cpp/`ooba`/`kobold`/`tabby`) API:**
- **Download Audio only from URL -> Transcribe audio -> Summarize using (`anthropic`/`cohere`/`openai`/`llama` (llama.cpp)/`ooba` (oobabooga/text-gen-webui)/`kobold` (kobold.cpp)/`tabby` (Tabbyapi)) API:**
* `python summarize.py -v https://www.youtube.com/watch?v=4nd1CDZP21s -api <your choice of API>`
- **Download Audio+Video from a list of videos in a text file (can be file paths or URLs) and have them all summarized:**
* `python summarize.py ./local/file_on_your/system --api_name <API_name>`
Expand All @@ -25,8 +25,9 @@ Original: `YouTube contains an incredible amount of knowledge, much of which is
- [Credits](#credits)

### <a name="what"></a>What?
- Use the script to transcribe a local file or remote url.
* Any url youtube-dl supports _should_ work.
- Use the script to (download->)transcribe(->summarize) a local file or remote url.
* Any youtube video. (Playlists you have to use the `Get_Playlist_URLs.py` with `Get_Playlist_URLs.py <Playlist URL>` and it'll create a text file with all the URLs for each video, so you can pass the text file as input and they'll all be downloaded. Pull requests are welcome.)
* Any url youtube-dl supports _should_ work.
* If you pass an API name (anthropic/cohere/grok/openai/) as a second argument, and add your API key to the config file, you can have your resulting transcriptions summarized as well.
* Alternatively, you can pass `llama`/`ooba`/`kobold`/`tabby` and have the script perform a request to your local API endpoint for summarization. You will need to modify the `llama_api_IP` value in the `config.txt` to reflect the `IP:Port` of your local server.
* Or pass the `--api_url` argument with the `IP:Port` to avoid making changes to the `config.txt` file.
Expand Down
78 changes: 74 additions & 4 deletions summarize.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
#!/usr/bin/env python3
import gradio as gr
import argparse, configparser, datetime, json, logging, os, platform, requests, shutil, subprocess, sys, time, unicodedata
import zipfile
from datetime import datetime
import contextlib
import ffmpeg # Used for issuing commands to underlying ffmpeg executable, pip package ffmpeg is from 2018
import ffmpeg
import torch
import yt_dlp

Expand Down Expand Up @@ -1101,6 +1102,71 @@ def save_summary_to_file(summary, file_path):



####################################################################################################################################
# Gradio UI
#

def process_url(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model="small.en", offset=0, vad_filter=False, download_video_flag=False):
try:
results = main(input_path, api_name=api_name, api_key=api_key, num_speakers=num_speakers, whisper_model=whisper_model, offset=offset, vad_filter=vad_filter, download_video_flag=download_video_flag)

if results:
transcription_result = results[0]
json_file_path = transcription_result['audio_file'].replace('.wav', '.segments.json')
with open(json_file_path, 'r') as file:
json_data = json.load(file)

summary = transcription_result.get('summary', '')

return json_data, summary, json_file_path, json_file_path.replace('.segments.json', '_summary.txt')
else:
return None, "No results found.", None, None
except Exception as e:
error_message = f"An error occurred: {str(e)}"
return None, error_message, None, None



def launch_ui():
def process_transcription(json_data):
if json_data:
return "\n".join([item["text"] for item in json_data])
else:
return ""

iface = gr.Interface(
fn=process_url,
inputs=[
gr.components.Textbox(label="URL"),
gr.components.Dropdown(choices=["openai", "anthropic", "cohere", "groq", "llama", "kobold", "ooba"], label="API Name"),
gr.components.Textbox(label="API Key"),
gr.components.Number(value=2, label="Number of Speakers"),
gr.components.Dropdown(choices=whisper_models, value="small.en", label="Whisper Model"),
gr.components.Number(value=0, label="Offset"),
gr.components.Checkbox(value=False, label="VAD Filter"),
gr.components.Checkbox(value=False, label="Download Video")
],
outputs=[
gr.components.Textbox(label="Transcription", value=lambda: "", max_lines=10),
gr.components.Textbox(label="Summary"),
gr.components.File(label="Download Transcription JSON"),
gr.components.File(label="Download Summary")
],
title="Video Transcription and Summarization",
description="Submit a video URL for transcription and summarization.",
allow_flagging="never"
)
iface.launch()

#
#
####################################################################################################################################







####################################################################################################################################
# Main()
Expand Down Expand Up @@ -1238,12 +1304,16 @@ def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model=
parser.add_argument('-off', '--offset', type=int, default=0, help='Offset in seconds (default: 0)')
parser.add_argument('-vad', '--vad_filter', action='store_true', help='Enable VAD filter')
parser.add_argument('-log', '--log_level', type=str, default='INFO', choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], help='Log level (default: INFO)')
parser.add_argument('-ui', '--user_interface', action='store_true', help='Launch the Gradio user interface')
#parser.add_argument('--log_file', action=str, help='Where to save logfile (non-default)')
args = parser.parse_args()

if args.input_path is None:
parser.print_help()
sys.exit(1)
if args.user_interface:
launch_ui()
else:
if args.input_path is None:
parser.print_help()
sys.exit(1)

logging.basicConfig(level=getattr(logging, args.log_level), format='%(asctime)s - %(levelname)s - %(message)s')

Expand Down

0 comments on commit 8f4f5be

Please sign in to comment.