Spaces:
Runtime error
Runtime error
Upload Summarization_General_Lib.py
Browse files
App_Function_Libraries/Summarization_General_Lib.py
CHANGED
|
@@ -20,6 +20,7 @@ import json
|
|
| 20 |
import logging
|
| 21 |
import os
|
| 22 |
import time
|
|
|
|
| 23 |
|
| 24 |
import requests
|
| 25 |
from requests import RequestException
|
|
@@ -30,9 +31,9 @@ from App_Function_Libraries.Chunk_Lib import semantic_chunking, rolling_summariz
|
|
| 30 |
from App_Function_Libraries.Diarization_Lib import combine_transcription_and_diarization
|
| 31 |
from App_Function_Libraries.Local_Summarization_Lib import summarize_with_llama, summarize_with_kobold, \
|
| 32 |
summarize_with_oobabooga, summarize_with_tabbyapi, summarize_with_vllm, summarize_with_local_llm
|
| 33 |
-
from App_Function_Libraries.DB_Manager import add_media_to_database
|
| 34 |
# Import Local
|
| 35 |
-
from App_Function_Libraries.Utils import load_and_log_configs, load_comprehensive_config, sanitize_filename, \
|
| 36 |
clean_youtube_url, create_download_directory, is_valid_url
|
| 37 |
from App_Function_Libraries.Video_DL_Ingestion_Lib import download_video, extract_video_info
|
| 38 |
|
|
@@ -43,6 +44,55 @@ from App_Function_Libraries.Video_DL_Ingestion_Lib import download_video, extrac
|
|
| 43 |
config = load_comprehensive_config()
|
| 44 |
openai_api_key = config.get('API', 'openai_api_key', fallback=None)
|
| 45 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
def extract_text_from_segments(segments):
|
| 47 |
logging.debug(f"Segments received: {segments}")
|
| 48 |
logging.debug(f"Type of segments: {type(segments)}")
|
|
@@ -65,18 +115,18 @@ def extract_text_from_segments(segments):
|
|
| 65 |
|
| 66 |
def summarize_with_openai(api_key, input_data, custom_prompt_arg, temp=None, system_message=None):
|
| 67 |
loaded_config_data = load_and_log_configs()
|
| 68 |
-
|
| 69 |
try:
|
| 70 |
# API key validation
|
| 71 |
-
if api_key
|
| 72 |
logging.info("OpenAI: #1 API key not provided as parameter")
|
| 73 |
logging.info("OpenAI: Attempting to use API key from config file")
|
| 74 |
api_key = loaded_config_data['api_keys']['openai']
|
| 75 |
|
| 76 |
-
if api_key
|
| 77 |
logging.error("OpenAI: #2 API key not found or is empty")
|
| 78 |
return "OpenAI: API Key Not Provided/Found in Config file or is empty"
|
| 79 |
|
|
|
|
| 80 |
logging.debug(f"OpenAI: Using API Key: {api_key[:5]}...{api_key[-5:]}")
|
| 81 |
|
| 82 |
# Input data handling
|
|
@@ -121,7 +171,6 @@ def summarize_with_openai(api_key, input_data, custom_prompt_arg, temp=None, sys
|
|
| 121 |
else:
|
| 122 |
raise ValueError(f"OpenAI: Invalid input data format: {type(data)}")
|
| 123 |
|
| 124 |
-
openai_model = loaded_config_data['models']['openai'] or "gpt-4o"
|
| 125 |
logging.debug(f"OpenAI: Extracted text (first 500 chars): {text[:500]}...")
|
| 126 |
logging.debug(f"OpenAI: Custom prompt: {custom_prompt_arg}")
|
| 127 |
|
|
@@ -205,6 +254,7 @@ def summarize_with_anthropic(api_key, input_data, custom_prompt_arg, temp=None,
|
|
| 205 |
if not anthropic_api_key or not anthropic_api_key.strip():
|
| 206 |
logging.error("Anthropic: No valid API key available")
|
| 207 |
# You might want to raise an exception here or handle this case as appropriate for your application
|
|
|
|
| 208 |
# For example: raise ValueError("No valid Anthropic API key available")
|
| 209 |
|
| 210 |
|
|
@@ -344,6 +394,7 @@ def summarize_with_cohere(api_key, input_data, custom_prompt_arg, temp=None, sys
|
|
| 344 |
if not cohere_api_key or not cohere_api_key.strip():
|
| 345 |
logging.error("Cohere: No valid API key available")
|
| 346 |
# You might want to raise an exception here or handle this case as appropriate for your application
|
|
|
|
| 347 |
# For example: raise ValueError("No valid Anthropic API key available")
|
| 348 |
|
| 349 |
if custom_prompt_arg is None:
|
|
@@ -455,6 +506,7 @@ def summarize_with_groq(api_key, input_data, custom_prompt_arg, temp=None, syste
|
|
| 455 |
if not groq_api_key or not groq_api_key.strip():
|
| 456 |
logging.error("Anthropic: No valid API key available")
|
| 457 |
# You might want to raise an exception here or handle this case as appropriate for your application
|
|
|
|
| 458 |
# For example: raise ValueError("No valid Anthropic API key available")
|
| 459 |
|
| 460 |
logging.debug(f"Groq: Using API Key: {groq_api_key[:5]}...{groq_api_key[-5:]}")
|
|
@@ -683,6 +735,7 @@ def summarize_with_huggingface(api_key, input_data, custom_prompt_arg, temp=None
|
|
| 683 |
if not huggingface_api_key or not huggingface_api_key.strip():
|
| 684 |
logging.error("HuggingFace: No valid API key available")
|
| 685 |
# You might want to raise an exception here or handle this case as appropriate for your application
|
|
|
|
| 686 |
# For example: raise ValueError("No valid Anthropic API key available")
|
| 687 |
|
| 688 |
|
|
@@ -733,7 +786,7 @@ def summarize_with_huggingface(api_key, input_data, custom_prompt_arg, temp=None
|
|
| 733 |
response = requests.post(API_URL, headers=headers, json=data)
|
| 734 |
|
| 735 |
if response.status_code == 200:
|
| 736 |
-
summary = response.json()[0]['
|
| 737 |
logging.debug("huggingface: Summarization successful")
|
| 738 |
print("Summarization successful.")
|
| 739 |
return summary
|
|
@@ -772,6 +825,7 @@ def summarize_with_deepseek(api_key, input_data, custom_prompt_arg, temp=None, s
|
|
| 772 |
if not deepseek_api_key or not deepseek_api_key.strip():
|
| 773 |
logging.error("DeepSeek: No valid API key available")
|
| 774 |
# You might want to raise an exception here or handle this case as appropriate for your application
|
|
|
|
| 775 |
# For example: raise ValueError("No valid deepseek API key available")
|
| 776 |
|
| 777 |
|
|
@@ -877,6 +931,7 @@ def summarize_with_mistral(api_key, input_data, custom_prompt_arg, temp=None, sy
|
|
| 877 |
if not mistral_api_key or not mistral_api_key.strip():
|
| 878 |
logging.error("Mistral: No valid API key available")
|
| 879 |
# You might want to raise an exception here or handle this case as appropriate for your application
|
|
|
|
| 880 |
# For example: raise ValueError("No valid deepseek API key available")
|
| 881 |
|
| 882 |
|
|
@@ -1145,39 +1200,14 @@ def save_transcription_and_summary(transcription_text, summary_text, download_pa
|
|
| 1145 |
def summarize_chunk(api_name, text, custom_prompt_input, api_key, temp=None, system_message=None):
|
| 1146 |
logging.debug("Entered 'summarize_chunk' function")
|
| 1147 |
try:
|
| 1148 |
-
|
| 1149 |
-
|
| 1150 |
-
|
| 1151 |
-
return summarize_with_anthropic(api_key, text, custom_prompt_input, temp, system_message)
|
| 1152 |
-
elif api_name.lower() == "cohere":
|
| 1153 |
-
return summarize_with_cohere(api_key, text, custom_prompt_input, temp, system_message)
|
| 1154 |
-
elif api_name.lower() == "groq":
|
| 1155 |
-
return summarize_with_groq(api_key, text, custom_prompt_input, temp, system_message)
|
| 1156 |
-
elif api_name.lower() == "openrouter":
|
| 1157 |
-
return summarize_with_openrouter(api_key, text, custom_prompt_input, temp, system_message)
|
| 1158 |
-
elif api_name.lower() == "deepseek":
|
| 1159 |
-
return summarize_with_deepseek(api_key, text, custom_prompt_input, temp, system_message)
|
| 1160 |
-
elif api_name.lower() == "mistral":
|
| 1161 |
-
return summarize_with_mistral(api_key, text, custom_prompt_input, temp, system_message)
|
| 1162 |
-
elif api_name.lower() == "llama.cpp":
|
| 1163 |
-
return summarize_with_llama(text, custom_prompt_input, temp, system_message)
|
| 1164 |
-
elif api_name.lower() == "kobold":
|
| 1165 |
-
return summarize_with_kobold(text, api_key, custom_prompt_input, temp, system_message)
|
| 1166 |
-
elif api_name.lower() == "ooba":
|
| 1167 |
-
return summarize_with_oobabooga(text, api_key, custom_prompt_input, temp, system_message)
|
| 1168 |
-
elif api_name.lower() == "tabbyapi":
|
| 1169 |
-
return summarize_with_tabbyapi(text, custom_prompt_input, temp, system_message)
|
| 1170 |
-
elif api_name.lower() == "vllm":
|
| 1171 |
-
return summarize_with_vllm(text, custom_prompt_input, temp, system_message)
|
| 1172 |
-
elif api_name.lower() == "local-llm":
|
| 1173 |
-
return summarize_with_local_llm(text, custom_prompt_input, temp, system_message)
|
| 1174 |
-
elif api_name.lower() == "huggingface":
|
| 1175 |
-
return summarize_with_huggingface(api_key, text, custom_prompt_input, temp, )#system_message)
|
| 1176 |
-
else:
|
| 1177 |
-
logging.warning(f"Unsupported API: {api_name}")
|
| 1178 |
return None
|
|
|
|
|
|
|
| 1179 |
except Exception as e:
|
| 1180 |
-
logging.error(f"Error in summarize_chunk with {api_name}: {str(e)}")
|
| 1181 |
return None
|
| 1182 |
|
| 1183 |
|
|
|
|
| 20 |
import logging
|
| 21 |
import os
|
| 22 |
import time
|
| 23 |
+
from typing import Optional
|
| 24 |
|
| 25 |
import requests
|
| 26 |
from requests import RequestException
|
|
|
|
| 31 |
from App_Function_Libraries.Diarization_Lib import combine_transcription_and_diarization
|
| 32 |
from App_Function_Libraries.Local_Summarization_Lib import summarize_with_llama, summarize_with_kobold, \
|
| 33 |
summarize_with_oobabooga, summarize_with_tabbyapi, summarize_with_vllm, summarize_with_local_llm
|
| 34 |
+
from App_Function_Libraries.DB.DB_Manager import add_media_to_database
|
| 35 |
# Import Local
|
| 36 |
+
from App_Function_Libraries.Utils.Utils import load_and_log_configs, load_comprehensive_config, sanitize_filename, \
|
| 37 |
clean_youtube_url, create_download_directory, is_valid_url
|
| 38 |
from App_Function_Libraries.Video_DL_Ingestion_Lib import download_video, extract_video_info
|
| 39 |
|
|
|
|
| 44 |
config = load_comprehensive_config()
|
| 45 |
openai_api_key = config.get('API', 'openai_api_key', fallback=None)
|
| 46 |
|
| 47 |
+
|
| 48 |
+
def summarize(
|
| 49 |
+
input_data: str,
|
| 50 |
+
custom_prompt_arg: Optional[str],
|
| 51 |
+
api_name: str,
|
| 52 |
+
api_key: Optional[str],
|
| 53 |
+
temp: Optional[float],
|
| 54 |
+
system_message: Optional[str]
|
| 55 |
+
) -> str:
|
| 56 |
+
try:
|
| 57 |
+
logging.debug(f"api_name type: {type(api_name)}, value: {api_name}")
|
| 58 |
+
if api_name.lower() == "openai":
|
| 59 |
+
return summarize_with_openai(api_key, input_data, custom_prompt_arg, temp, system_message)
|
| 60 |
+
elif api_name.lower() == "anthropic":
|
| 61 |
+
return summarize_with_anthropic(api_key, input_data, custom_prompt_arg, temp, system_message)
|
| 62 |
+
elif api_name.lower() == "cohere":
|
| 63 |
+
return summarize_with_cohere(api_key, input_data, custom_prompt_arg, temp, system_message)
|
| 64 |
+
elif api_name.lower() == "groq":
|
| 65 |
+
return summarize_with_groq(api_key, input_data, custom_prompt_arg, temp, system_message)
|
| 66 |
+
elif api_name.lower() == "huggingface":
|
| 67 |
+
return summarize_with_huggingface(api_key, input_data, custom_prompt_arg, temp)
|
| 68 |
+
elif api_name.lower() == "openrouter":
|
| 69 |
+
return summarize_with_openrouter(api_key, input_data, custom_prompt_arg, temp, system_message)
|
| 70 |
+
elif api_name.lower() == "deepseek":
|
| 71 |
+
return summarize_with_deepseek(api_key, input_data, custom_prompt_arg, temp, system_message)
|
| 72 |
+
elif api_name.lower() == "mistral":
|
| 73 |
+
return summarize_with_mistral(api_key, input_data, custom_prompt_arg, temp, system_message)
|
| 74 |
+
elif api_name.lower() == "llama.cpp":
|
| 75 |
+
return summarize_with_llama(input_data, custom_prompt_arg, temp, system_message)
|
| 76 |
+
elif api_name.lower() == "kobold":
|
| 77 |
+
return summarize_with_kobold(input_data, api_key, custom_prompt_arg, temp, system_message)
|
| 78 |
+
elif api_name.lower() == "ooba":
|
| 79 |
+
return summarize_with_oobabooga(input_data, api_key, custom_prompt_arg, temp, system_message)
|
| 80 |
+
elif api_name.lower() == "tabbyapi":
|
| 81 |
+
return summarize_with_tabbyapi(input_data, custom_prompt_arg, temp, system_message)
|
| 82 |
+
elif api_name.lower() == "vllm":
|
| 83 |
+
return summarize_with_vllm(input_data, custom_prompt_arg, None, system_message)
|
| 84 |
+
elif api_name.lower() == "local-llm":
|
| 85 |
+
return summarize_with_local_llm(input_data, custom_prompt_arg, temp, system_message)
|
| 86 |
+
elif api_name.lower() == "huggingface":
|
| 87 |
+
return summarize_with_huggingface(api_key, input_data, custom_prompt_arg, temp, )#system_message)
|
| 88 |
+
else:
|
| 89 |
+
return f"Error: Invalid API Name {api_name}"
|
| 90 |
+
|
| 91 |
+
except Exception as e:
|
| 92 |
+
logging.error(f"Error in summarize function: {str(e)}", exc_info=True)
|
| 93 |
+
return f"Error: {str(e)}"
|
| 94 |
+
|
| 95 |
+
|
| 96 |
def extract_text_from_segments(segments):
|
| 97 |
logging.debug(f"Segments received: {segments}")
|
| 98 |
logging.debug(f"Type of segments: {type(segments)}")
|
|
|
|
| 115 |
|
| 116 |
def summarize_with_openai(api_key, input_data, custom_prompt_arg, temp=None, system_message=None):
|
| 117 |
loaded_config_data = load_and_log_configs()
|
|
|
|
| 118 |
try:
|
| 119 |
# API key validation
|
| 120 |
+
if not api_key or api_key.strip() == "":
|
| 121 |
logging.info("OpenAI: #1 API key not provided as parameter")
|
| 122 |
logging.info("OpenAI: Attempting to use API key from config file")
|
| 123 |
api_key = loaded_config_data['api_keys']['openai']
|
| 124 |
|
| 125 |
+
if not api_key or api_key.strip() == "":
|
| 126 |
logging.error("OpenAI: #2 API key not found or is empty")
|
| 127 |
return "OpenAI: API Key Not Provided/Found in Config file or is empty"
|
| 128 |
|
| 129 |
+
openai_api_key = api_key
|
| 130 |
logging.debug(f"OpenAI: Using API Key: {api_key[:5]}...{api_key[-5:]}")
|
| 131 |
|
| 132 |
# Input data handling
|
|
|
|
| 171 |
else:
|
| 172 |
raise ValueError(f"OpenAI: Invalid input data format: {type(data)}")
|
| 173 |
|
|
|
|
| 174 |
logging.debug(f"OpenAI: Extracted text (first 500 chars): {text[:500]}...")
|
| 175 |
logging.debug(f"OpenAI: Custom prompt: {custom_prompt_arg}")
|
| 176 |
|
|
|
|
| 254 |
if not anthropic_api_key or not anthropic_api_key.strip():
|
| 255 |
logging.error("Anthropic: No valid API key available")
|
| 256 |
# You might want to raise an exception here or handle this case as appropriate for your application
|
| 257 |
+
#FIXME
|
| 258 |
# For example: raise ValueError("No valid Anthropic API key available")
|
| 259 |
|
| 260 |
|
|
|
|
| 394 |
if not cohere_api_key or not cohere_api_key.strip():
|
| 395 |
logging.error("Cohere: No valid API key available")
|
| 396 |
# You might want to raise an exception here or handle this case as appropriate for your application
|
| 397 |
+
# FIXME
|
| 398 |
# For example: raise ValueError("No valid Anthropic API key available")
|
| 399 |
|
| 400 |
if custom_prompt_arg is None:
|
|
|
|
| 506 |
if not groq_api_key or not groq_api_key.strip():
|
| 507 |
logging.error("Anthropic: No valid API key available")
|
| 508 |
# You might want to raise an exception here or handle this case as appropriate for your application
|
| 509 |
+
# FIXME
|
| 510 |
# For example: raise ValueError("No valid Anthropic API key available")
|
| 511 |
|
| 512 |
logging.debug(f"Groq: Using API Key: {groq_api_key[:5]}...{groq_api_key[-5:]}")
|
|
|
|
| 735 |
if not huggingface_api_key or not huggingface_api_key.strip():
|
| 736 |
logging.error("HuggingFace: No valid API key available")
|
| 737 |
# You might want to raise an exception here or handle this case as appropriate for your application
|
| 738 |
+
# FIXME
|
| 739 |
# For example: raise ValueError("No valid Anthropic API key available")
|
| 740 |
|
| 741 |
|
|
|
|
| 786 |
response = requests.post(API_URL, headers=headers, json=data)
|
| 787 |
|
| 788 |
if response.status_code == 200:
|
| 789 |
+
summary = response.json()[0]['generated_text'].strip()
|
| 790 |
logging.debug("huggingface: Summarization successful")
|
| 791 |
print("Summarization successful.")
|
| 792 |
return summary
|
|
|
|
| 825 |
if not deepseek_api_key or not deepseek_api_key.strip():
|
| 826 |
logging.error("DeepSeek: No valid API key available")
|
| 827 |
# You might want to raise an exception here or handle this case as appropriate for your application
|
| 828 |
+
# FIXME
|
| 829 |
# For example: raise ValueError("No valid deepseek API key available")
|
| 830 |
|
| 831 |
|
|
|
|
| 931 |
if not mistral_api_key or not mistral_api_key.strip():
|
| 932 |
logging.error("Mistral: No valid API key available")
|
| 933 |
# You might want to raise an exception here or handle this case as appropriate for your application
|
| 934 |
+
# FIXME
|
| 935 |
# For example: raise ValueError("No valid deepseek API key available")
|
| 936 |
|
| 937 |
|
|
|
|
| 1200 |
def summarize_chunk(api_name, text, custom_prompt_input, api_key, temp=None, system_message=None):
|
| 1201 |
logging.debug("Entered 'summarize_chunk' function")
|
| 1202 |
try:
|
| 1203 |
+
result = summarize(text, custom_prompt_input, api_name, api_key, temp, system_message)
|
| 1204 |
+
if result is None or result.startswith("Error:"):
|
| 1205 |
+
logging.warning(f"Summarization with {api_name} failed: {result}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1206 |
return None
|
| 1207 |
+
logging.info(f"Summarization with {api_name} successful")
|
| 1208 |
+
return result
|
| 1209 |
except Exception as e:
|
| 1210 |
+
logging.error(f"Error in summarize_chunk with {api_name}: {str(e)}", exc_info=True)
|
| 1211 |
return None
|
| 1212 |
|
| 1213 |
|