Hey, we got a UI now

Got a gradio UI, thanks Opus. Is partially broken, but its something
rmusser01 · May 9, 2024 · 8f4f5be · 8f4f5be
1 parent 0aa1d9d
commit 8f4f5be
Show file tree

Hide file tree

Showing 3 changed files with 78 additions and 7 deletions.
diff --git a/.gitignore b/.gitignore
diff --git a/README.md b/README.md
@@ -11,7 +11,7 @@ Original: `YouTube contains an incredible amount of knowledge, much of which is
   * `python summarize.py https://www.youtube.com/watch?v=4nd1CDZP21s`
 - **Download Audio+Video from URL -> Transcribe audio from Video:**
   * `python summarize.py -v https://www.youtube.com/watch?v=4nd1CDZP21s`
-- **Download Audio only from URL -> Transcribe audio -> Summarize using (`anthropic`/`cohere`/`openai`/`llama` i.e. llama.cpp/`ooba`/`kobold`/`tabby`) API:**
+- **Download Audio only from URL -> Transcribe audio -> Summarize using (`anthropic`/`cohere`/`openai`/`llama` (llama.cpp)/`ooba` (oobabooga/text-gen-webui)/`kobold` (kobold.cpp)/`tabby` (Tabbyapi)) API:**
   * `python summarize.py -v https://www.youtube.com/watch?v=4nd1CDZP21s -api <your choice of API>`
 - **Download Audio+Video from a list of videos in a text file (can be file paths or URLs) and have them all summarized:**
   * `python summarize.py ./local/file_on_your/system --api_name <API_name>`
@@ -25,8 +25,9 @@ Original: `YouTube contains an incredible amount of knowledge, much of which is
 - [Credits](#credits)
 
 ### <a name="what"></a>What?
-- Use the script to transcribe a local file or remote url. 
-  * Any url youtube-dl supports _should_ work.
+- Use the script to (download->)transcribe(->summarize) a local file or remote url. 
+  * Any youtube video. (Playlists you have to use the `Get_Playlist_URLs.py` with `Get_Playlist_URLs.py <Playlist URL>` and it'll create a text file with all the URLs for each video, so you can pass the text file as input and they'll all be downloaded. Pull requests are welcome.)
+    * Any url youtube-dl supports _should_ work.
   * If you pass an API name (anthropic/cohere/grok/openai/) as a second argument, and add your API key to the config file, you can have your resulting transcriptions summarized as well. 
     * Alternatively, you can pass `llama`/`ooba`/`kobold`/`tabby` and have the script perform a request to your local API endpoint for summarization. You will need to modify the `llama_api_IP` value in the `config.txt` to reflect the `IP:Port` of your local server.
     * Or pass the `--api_url` argument with the `IP:Port` to avoid making changes to the `config.txt` file.

diff --git a/summarize.py b/summarize.py
@@ -1,9 +1,10 @@
 #!/usr/bin/env python3
+import gradio as gr
 import argparse, configparser, datetime, json, logging, os, platform, requests, shutil, subprocess, sys, time, unicodedata
 import zipfile
 from datetime import datetime
 import contextlib
-import ffmpeg # Used for issuing commands to underlying ffmpeg executable, pip package ffmpeg is from 2018
+import ffmpeg
 import torch
 import yt_dlp
 
@@ -1101,6 +1102,71 @@ def save_summary_to_file(summary, file_path):
 
 
 
+####################################################################################################################################
+# Gradio UI
+#
+
+def process_url(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model="small.en", offset=0, vad_filter=False, download_video_flag=False):
+    try:
+        results = main(input_path, api_name=api_name, api_key=api_key, num_speakers=num_speakers, whisper_model=whisper_model, offset=offset, vad_filter=vad_filter, download_video_flag=download_video_flag)
+
+        if results:
+            transcription_result = results[0]
+            json_file_path = transcription_result['audio_file'].replace('.wav', '.segments.json')
+            with open(json_file_path, 'r') as file:
+                json_data = json.load(file)
+
+            summary = transcription_result.get('summary', '')
+
+            return json_data, summary, json_file_path, json_file_path.replace('.segments.json', '_summary.txt')
+        else:
+            return None, "No results found.", None, None
+    except Exception as e:
+        error_message = f"An error occurred: {str(e)}"
+        return None, error_message, None, None
+
+
+
+def launch_ui():
+    def process_transcription(json_data):
+        if json_data:
+            return "\n".join([item["text"] for item in json_data])
+        else:
+            return ""
+
+    iface = gr.Interface(
+        fn=process_url,
+        inputs=[
+            gr.components.Textbox(label="URL"),
+            gr.components.Dropdown(choices=["openai", "anthropic", "cohere", "groq", "llama", "kobold", "ooba"], label="API Name"),
+            gr.components.Textbox(label="API Key"),
+            gr.components.Number(value=2, label="Number of Speakers"),
+            gr.components.Dropdown(choices=whisper_models, value="small.en", label="Whisper Model"),
+            gr.components.Number(value=0, label="Offset"),
+            gr.components.Checkbox(value=False, label="VAD Filter"),
+            gr.components.Checkbox(value=False, label="Download Video")
+        ],
+        outputs=[
+            gr.components.Textbox(label="Transcription", value=lambda: "", max_lines=10),
+            gr.components.Textbox(label="Summary"),
+            gr.components.File(label="Download Transcription JSON"),
+            gr.components.File(label="Download Summary")
+        ],
+        title="Video Transcription and Summarization",
+        description="Submit a video URL for transcription and summarization.",
+        allow_flagging="never"
+    )
+    iface.launch()
+
+#
+#
+####################################################################################################################################
+
+
+
+
+
+
 
 ####################################################################################################################################
 # Main()
@@ -1238,12 +1304,16 @@ def main(input_path, api_name=None, api_key=None, num_speakers=2, whisper_model=
     parser.add_argument('-off', '--offset', type=int, default=0, help='Offset in seconds (default: 0)')
     parser.add_argument('-vad', '--vad_filter', action='store_true', help='Enable VAD filter')
     parser.add_argument('-log', '--log_level', type=str, default='INFO', choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], help='Log level (default: INFO)')
+    parser.add_argument('-ui', '--user_interface', action='store_true', help='Launch the Gradio user interface')
     #parser.add_argument('--log_file', action=str, help='Where to save logfile (non-default)')
     args = parser.parse_args()
 
-    if args.input_path is None:
-        parser.print_help()
-        sys.exit(1)
+    if args.user_interface:
+        launch_ui()
+    else:
+        if args.input_path is None:
+            parser.print_help()
+            sys.exit(1)
 
     logging.basicConfig(level=getattr(logging, args.log_level), format='%(asctime)s - %(levelname)s - %(message)s')