Skip to content

Commit

Permalink
Merge pull request #36 from rmusser01/main
Browse files Browse the repository at this point in the history
Fixes for transcription pipeline; Transcription display on the audio tab; RAG Libary -> RAG Library; checkbox for VAD
  • Loading branch information
rmusser01 authored Oct 14, 2024
2 parents 9503e7f + 4e560c6 commit 08cebf5
Show file tree
Hide file tree
Showing 12 changed files with 65 additions and 26 deletions.
13 changes: 9 additions & 4 deletions App_Function_Libraries/Audio/Audio_Files.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,18 +268,23 @@ def process_audio_files(audio_urls, audio_file, whisper_model, api_name, api_key
progress = []
all_transcriptions = []
all_summaries = []

#v2
def format_transcription_with_timestamps(segments):
if keep_timestamps:
formatted_segments = []
for segment in segments:
start = segment.get('Time_Start', 0)
end = segment.get('Time_End', 0)
text = segment.get('Text', '')
text = segment.get('Text', '').strip() # Ensure text is stripped of leading/trailing spaces

# Add the formatted timestamp and text to the list, followed by a newline
formatted_segments.append(f"[{start:.2f}-{end:.2f}] {text}")
return " ".join(formatted_segments)

# Join the segments with a newline to ensure proper formatting
return "\n".join(formatted_segments)
else:
return " ".join([segment.get('Text', '') for segment in segments])
# Join the text without timestamps
return "\n".join([segment.get('Text', '').strip() for segment in segments])

def update_progress(message):
progress.append(message)
Expand Down
13 changes: 8 additions & 5 deletions App_Function_Libraries/Audio/Audio_Transcription_Lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import gc
import json
import logging
import multiprocessing
import os
import queue
import sys
Expand Down Expand Up @@ -45,7 +46,7 @@
whisper_model_instance = None
config = load_comprehensive_config()
processing_choice = config.get('Processing', 'processing_choice', fallback='cpu')

total_thread_count = multiprocessing.cpu_count()


class WhisperModel(OriginalWhisperModel):
Expand All @@ -55,7 +56,7 @@ class WhisperModel(OriginalWhisperModel):
valid_model_sizes = [
"tiny.en", "tiny", "base.en", "base", "small.en", "small", "medium.en", "medium",
"large-v1", "large-v2", "large-v3", "large", "distil-large-v2", "distil-medium.en",
"distil-small.en", "distil-large-v3"
"distil-small.en", "distil-large-v3",
]

def __init__(
Expand All @@ -64,7 +65,7 @@ def __init__(
device: str = processing_choice,
device_index: Union[int, List[int]] = 0,
compute_type: str = "default",
cpu_threads: int = 16,
cpu_threads: int = 0,#total_thread_count, FIXME - I think this should be 0
num_workers: int = 1,
download_root: Optional[str] = None,
local_files_only: bool = False,
Expand Down Expand Up @@ -199,11 +200,13 @@ def speech_to_text(audio_file_path, selected_source_lang='en', whisper_model='me
return segments

logging.info('speech-to-text: Starting transcription...')
options = dict(language=selected_source_lang, beam_size=5, best_of=5, vad_filter=vad_filter)
# FIXME - revisit this
options = dict(language=selected_source_lang, beam_size=10, best_of=10, vad_filter=vad_filter)
transcribe_options = dict(task="transcribe", **options)
# use function and config at top of file
logging.debug("speech-to-text: Using whisper model: %s", whisper_model)
whisper_model_instance = get_whisper_model(whisper_model, processing_choice)
# faster_whisper transcription right here - FIXME -test batching - ha
segments_raw, info = whisper_model_instance.transcribe(audio_file_path, **transcribe_options)

segments = []
Expand All @@ -216,7 +219,7 @@ def speech_to_text(audio_file_path, selected_source_lang='en', whisper_model='me
logging.debug("Segment: %s", chunk)
segments.append(chunk)
# Print to verify its working
print(f"{segment_chunk.start:.2f}s - {segment_chunk.end:.2f}s | {segment_chunk.text}")
logging.info(f"{segment_chunk.start:.2f}s - {segment_chunk.end:.2f}s | {segment_chunk.text}")

# Log it as well.
logging.debug(
Expand Down
2 changes: 1 addition & 1 deletion App_Function_Libraries/Gradio_UI/RAG_Chat_tab.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#
# Local Imports

from App_Function_Libraries.RAG.RAG_Libary_2 import enhanced_rag_pipeline
from App_Function_Libraries.RAG.RAG_Library_2 import enhanced_rag_pipeline
#
########################################################################################################################
#
Expand Down
2 changes: 1 addition & 1 deletion App_Function_Libraries/Gradio_UI/RAG_QA_Chat_tab.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from App_Function_Libraries.Books.Book_Ingestion_Lib import read_epub
from App_Function_Libraries.DB.DB_Manager import DatabaseError, get_paginated_files, add_media_with_keywords
from App_Function_Libraries.PDF.PDF_Ingestion_Lib import extract_text_and_format_from_pdf
from App_Function_Libraries.RAG.RAG_Libary_2 import generate_answer, enhanced_rag_pipeline
from App_Function_Libraries.RAG.RAG_Library_2 import generate_answer, enhanced_rag_pipeline
from App_Function_Libraries.RAG.RAG_QA_Chat import search_database, rag_qa_chat
# Eventually... FIXME
from App_Function_Libraries.RAG.RAG_QA_Chat import load_chat_history, save_chat_history
Expand Down
44 changes: 34 additions & 10 deletions App_Function_Libraries/Gradio_UI/Video_transcription_tab.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ def create_video_transcription_tab():
lines=5)
video_file_input = gr.File(label="Upload Video File (Optional)", file_types=["video/*"])
diarize_input = gr.Checkbox(label="Enable Speaker Diarization", value=False)
vad_checkbox = gr.Checkbox(label="Enable Voice-Audio-Detection(VAD)", value=True)
whisper_model_input = gr.Dropdown(choices=whisper_models, value="medium", label="Whisper Model")

with gr.Row():
Expand Down Expand Up @@ -185,7 +186,7 @@ def update_prompts(preset_name):
download_summary = gr.File(label="Download All Summaries as Text")

@error_handler
def process_videos_with_error_handling(inputs, start_time, end_time, diarize, whisper_model,
def process_videos_with_error_handling(inputs, start_time, end_time, diarize, vad_use, whisper_model,
custom_prompt_checkbox, custom_prompt, chunking_options_checkbox,
chunk_method, max_chunk_size, chunk_overlap, use_adaptive_chunking,
use_multi_level_chunking, chunk_language, api_name,
Expand Down Expand Up @@ -301,7 +302,7 @@ def process_videos_with_error_handling(inputs, start_time, end_time, diarize, wh
input_item, 2, whisper_model,
custom_prompt,
start_seconds, api_name, api_key,
False, False, False, False, 0.01, None, keywords, None, diarize,
vad_use, False, False, False, 0.01, None, keywords, None, diarize,
end_time=end_seconds,
include_timestamps=timestamp_option,
metadata=video_metadata,
Expand Down Expand Up @@ -425,7 +426,7 @@ def process_videos_with_error_handling(inputs, start_time, end_time, diarize, wh
None
)

def process_videos_wrapper(url_input, video_file, start_time, end_time, diarize, whisper_model,
def process_videos_wrapper(url_input, video_file, start_time, end_time, diarize, vad_use, whisper_model,
custom_prompt_checkbox, custom_prompt, chunking_options_checkbox,
chunk_method, max_chunk_size, chunk_overlap, use_adaptive_chunking,
use_multi_level_chunking, chunk_language, summarize_recursively, api_name,
Expand Down Expand Up @@ -460,7 +461,7 @@ def process_videos_wrapper(url_input, video_file, start_time, end_time, diarize,
raise ValueError("No input provided. Please enter URLs or upload a video file.")

result = process_videos_with_error_handling(
inputs, start_time, end_time, diarize, whisper_model,
inputs, start_time, end_time, diarize, vad_use, whisper_model,
custom_prompt_checkbox, custom_prompt, chunking_options_checkbox,
chunk_method, max_chunk_size, chunk_overlap, use_adaptive_chunking,
use_multi_level_chunking, chunk_language, api_name,
Expand Down Expand Up @@ -507,6 +508,7 @@ def process_url_with_metadata(input_item, num_speakers, whisper_model, custom_pr
try:
logging.info(f"Starting process_url_metadata for URL: {input_item}")
# Create download path

download_path = create_download_directory("Video_Downloads")
logging.info(f"Download path created at: {download_path}")

Expand Down Expand Up @@ -743,15 +745,37 @@ def toggle_confabulation_output(checkbox_value):
inputs=[confab_checkbox],
outputs=[confabulation_output]
)

process_button.click(
fn=process_videos_wrapper,
inputs=[
url_input, video_file_input, start_time_input, end_time_input, diarize_input, whisper_model_input,
custom_prompt_checkbox, custom_prompt_input, chunking_options_checkbox,
chunk_method, max_chunk_size, chunk_overlap, use_adaptive_chunking,
use_multi_level_chunking, chunk_language, summarize_recursively, api_name_input, api_key_input,
keywords_input, use_cookies_input, cookies_input, batch_size_input,
timestamp_option, keep_original_video, confab_checkbox, overwrite_checkbox
url_input,
video_file_input,
start_time_input,
end_time_input,
diarize_input,
vad_checkbox,
whisper_model_input,
custom_prompt_checkbox,
custom_prompt_input,
chunking_options_checkbox,
chunk_method,
max_chunk_size,
chunk_overlap,
use_adaptive_chunking,
use_multi_level_chunking,
chunk_language,
summarize_recursively,
api_name_input,
api_key_input,
keywords_input,
use_cookies_input,
cookies_input,
batch_size_input,
timestamp_option,
keep_original_video,
confab_checkbox,
overwrite_checkbox
],
outputs=[progress_output, error_output, results_output, download_transcription, download_summary, confabulation_output]
)
File renamed without changes.
2 changes: 1 addition & 1 deletion App_Function_Libraries/RAG/RAG_QA_Chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
#
# Local Imports
from App_Function_Libraries.DB.DB_Manager import db, search_db, DatabaseError, get_media_content
from App_Function_Libraries.RAG.RAG_Libary_2 import generate_answer
from App_Function_Libraries.RAG.RAG_Library_2 import generate_answer
#
########################################################################################################################
#
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1151,7 +1151,7 @@ def perform_transcription(video_path, offset, whisper_model, vad_filter, diarize

return audio_file_path, diarized_segments

# Non-diarized transcription (existing functionality)
# Non-diarized transcription
if os.path.exists(segments_json_path):
logging.info(f"Segments file already exists: {segments_json_path}")
try:
Expand Down
7 changes: 7 additions & 0 deletions Docs/Issues/ISSUES.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,10 @@ Writing
https://github.com/EMNLP-2024-CritiCS/Collective-Critics-for-Creative-Story-Generation


Update model suggestions for RAG vs Chatting/General use
https://huggingface.co/THUDM/glm-4-9b-chat/blob/main/README_en.md
https://huggingface.co/byroneverson/glm-4-9b-chat-abliterated-gguf/tree/main

Whisper pipeline
https://huggingface.co/spaces/aadnk/faster-whisper-webui
https://huggingface.co/spaces/zhang082799/openai-whisper-large-v3-turbo
2 changes: 1 addition & 1 deletion Tests/RAG/test_RAG_Library_2.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
sys.path.append(parent_dir)

# Import the functions to test
from App_Function_Libraries.RAG.RAG_Libary_2 import (
from App_Function_Libraries.RAG.RAG_Library_2 import (
fetch_relevant_media_ids,
perform_vector_search,
perform_full_text_search
Expand Down
2 changes: 1 addition & 1 deletion Tests/RAG/test_enhanced_rag_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
sys.path.append(parent_dir)

# Now import the necessary modules
from App_Function_Libraries.RAG.RAG_Libary_2 import enhanced_rag_pipeline
from App_Function_Libraries.RAG.RAG_Library_2 import enhanced_rag_pipeline
from App_Function_Libraries.RAG.Embeddings_Create import create_embeddings_batch
from App_Function_Libraries.RAG.ChromaDB_Library import vector_search

Expand Down
2 changes: 1 addition & 1 deletion summarize.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@
#
# Global variables
whisper_models = ["small", "medium", "small.en", "medium.en", "medium", "large", "large-v1", "large-v2", "large-v3",
"distil-large-v2", "distil-medium.en", "distil-small.en"]
"distil-large-v2", "distil-medium.en", "distil-small.en", ]
server_mode = False
share_public = False

Expand Down

0 comments on commit 08cebf5

Please sign in to comment.