|
import dash |
|
from dash import dcc, html, Input, Output, State, callback_context |
|
import dash_bootstrap_components as dbc |
|
import os |
|
import tempfile |
|
import base64 |
|
import openai |
|
import docx |
|
from datetime import datetime |
|
import threading |
|
import time |
|
import google.generativeai as genai |
|
from anthropic import Anthropic |
|
import requests |
|
import uuid |
|
import flask |
|
import shutil |
|
import logging |
|
from collections import defaultdict |
|
from moviepy import * |
|
|
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') |
|
|
|
openai.api_key = os.getenv("OPENAI_API_KEY") |
|
if not openai.api_key: |
|
logging.warning("OPENAI_API_KEY not set. Transcription will fail.") |
|
|
|
google_api_key = os.getenv("GOOGLE_API_KEY") |
|
if google_api_key: |
|
try: |
|
genai.configure(api_key=google_api_key) |
|
except Exception as e: |
|
logging.error(f"Failed to configure Google Gemini: {e}") |
|
genai = None |
|
else: |
|
genai = None |
|
logging.warning("GOOGLE_API_KEY not set. Gemini model will not be available.") |
|
|
|
anthropic_api_key = os.getenv("ANTHROPIC_API_KEY") |
|
if anthropic_api_key: |
|
try: |
|
anthropic = Anthropic(api_key=anthropic_api_key) |
|
except Exception as e: |
|
logging.error(f"Failed to initialize Anthropic client: {e}") |
|
anthropic = None |
|
else: |
|
anthropic = None |
|
logging.warning("ANTHROPIC_API_KEY not set. Claude model will not be available.") |
|
|
|
grok_api_key = os.getenv("GROK_API_KEY") |
|
if not grok_api_key: |
|
logging.warning("GROK_API_KEY not set. Groq model will not be available.") |
|
|
|
server = flask.Flask(__name__) |
|
app = dash.Dash(__name__, server=server, external_stylesheets=[dbc.themes.BOOTSTRAP], suppress_callback_exceptions=True) |
|
|
|
session_data = defaultdict(lambda: {"audio_path": None, "transcript": None, "minutes": None, "diarized": None, "temp_dir": None, "original_filename": None}) |
|
session_locks = defaultdict(threading.Lock) |
|
|
|
def get_session_dir(session_id): |
|
if session_data[session_id]["temp_dir"] is None or not os.path.exists(session_data[session_id]["temp_dir"]): |
|
session_specific_dir = tempfile.mkdtemp(prefix=f"session_{session_id}_") |
|
session_data[session_id]["temp_dir"] = session_specific_dir |
|
logging.info(f"Created temp directory for session {session_id}: {session_specific_dir}") |
|
return session_data[session_id]["temp_dir"] |
|
|
|
def cleanup_session(session_id): |
|
with session_locks[session_id]: |
|
logging.info(f"Cleaning up session: {session_id}") |
|
session_dir = session_data[session_id].get("temp_dir") |
|
if session_dir and os.path.exists(session_dir): |
|
try: |
|
shutil.rmtree(session_dir) |
|
logging.info(f"Removed temp directory: {session_dir}") |
|
except Exception as e: |
|
logging.error(f"Error removing directory {session_dir}: {e}") |
|
if session_id in session_data: |
|
del session_data[session_id] |
|
if session_id in session_locks: |
|
del session_locks[session_id] |
|
logging.info(f"Session data cleared for {session_id}") |
|
|
|
def save_base64_data(content_string, file_path): |
|
try: |
|
logging.info(f"Decoding base64 data for {file_path}") |
|
content_type, content_string = content_string.split(',') |
|
data_bytes = base64.b64decode(content_string) |
|
with open(file_path, 'wb') as f: |
|
f.write(data_bytes) |
|
logging.info(f"Saved uploaded data to {file_path}") |
|
return file_path |
|
except ValueError as e: |
|
logging.error(f"Error splitting content string: {e}. String might not be in 'type,base64_data' format.") |
|
return None |
|
except base64.binascii.Error as e: |
|
logging.error(f"Error decoding base64: {e}") |
|
return None |
|
except Exception as e: |
|
logging.error(f"Error saving base64 data: {e}") |
|
return None |
|
|
|
def extract_audio_from_video(video_path, audio_output_path): |
|
try: |
|
logging.info(f"Extracting audio from {video_path} to {audio_output_path}") |
|
video = VideoFileClip(video_path) |
|
video.audio.write_audiofile(audio_output_path, codec='mp3') |
|
video.close() |
|
logging.info(f"Successfully extracted audio to {audio_output_path}") |
|
return audio_output_path |
|
except Exception as e: |
|
logging.error(f"Error extracting audio from {video_path}: {e}") |
|
if os.path.exists(audio_output_path): |
|
os.remove(audio_output_path) |
|
if 'video' in locals() and hasattr(video, 'close'): |
|
video.close() |
|
return None |
|
|
|
def transcribe_audio(file_path): |
|
logging.info(f"Starting transcription for {file_path}") |
|
if not openai.api_key: |
|
return "Error: OpenAI API key not configured." |
|
if not os.path.exists(file_path): |
|
logging.error(f"Transcription failed: File not found at {file_path}") |
|
return "Error: Audio file not found for transcription." |
|
try: |
|
with open(file_path, "rb") as audio_file: |
|
client = openai.OpenAI() |
|
transcript = client.audio.transcriptions.create( |
|
model="whisper-1", |
|
file=audio_file, |
|
response_format="text" |
|
) |
|
logging.info(f"Transcription successful for {file_path}") |
|
if isinstance(transcript, str): |
|
return transcript |
|
elif hasattr(transcript, 'text'): |
|
return transcript.text |
|
else: |
|
logging.error(f"Unexpected transcription response format: {type(transcript)}") |
|
return "Error: Could not extract transcript text from OpenAI response." |
|
except openai.BadRequestError as e: |
|
logging.error(f"OpenAI API Bad Request Error (possibly file format/size issue): {e}") |
|
error_message = f"Error during transcription: {e}" |
|
if "Invalid file format" in str(e): |
|
error_message = "Error: Invalid audio file format. Supported formats include mp3, mp4, mpeg, mpga, m4a, wav, and webm." |
|
elif "maximum file size" in str(e): |
|
error_message = "Error: Audio file exceeds the maximum size limit (25MB) for direct upload." |
|
return error_message |
|
except openai.AuthenticationError: |
|
logging.error("OpenAI API Authentication Error: Check your API key.") |
|
return "Error: OpenAI API Authentication Failed. Check API Key." |
|
except Exception as e: |
|
logging.error(f"An unexpected error occurred during transcription: {e}") |
|
return f"Error during transcription: An unexpected error occurred." |
|
|
|
def generate_minutes_ai(transcript, model_name, session_id): |
|
logging.info(f"Generating minutes using {model_name} for session {session_id}") |
|
if not transcript or "Error:" in transcript: |
|
return "Error: Cannot generate minutes from invalid or missing transcript." |
|
with session_locks[session_id]: |
|
try: |
|
if model_name == 'openai': |
|
if not openai.api_key: return "Error: OpenAI API key not configured." |
|
client = openai.OpenAI() |
|
response = client.chat.completions.create( |
|
model="gpt-3.5-turbo", |
|
messages=[ |
|
{"role": "system", "content": "You are a professional assistant tasked with creating structured meeting minutes, including sections like Attendees, Agenda, Discussion Points, Action Items, and Decisions Made."}, |
|
{"role": "user", "content": f"Generate detailed meeting minutes from this transcript:\n\n{transcript}"} |
|
], |
|
timeout=120 |
|
) |
|
logging.info(f"OpenAI minutes generation successful for session {session_id}") |
|
return response.choices[0].message.content |
|
elif model_name == 'gemini': |
|
if not genai: return "Error: Google Gemini API not configured or key missing." |
|
model = genai.GenerativeModel('gemini-1.5-flash-latest') |
|
response = model.generate_content( |
|
f"Generate detailed meeting minutes from this transcript, including sections like Attendees, Agenda, Discussion Points, Action Items, and Decisions Made:\n\n{transcript}", |
|
request_options={'timeout': 120} |
|
) |
|
logging.info(f"Gemini minutes generation successful for session {session_id}") |
|
if response.parts: |
|
return response.text |
|
else: |
|
logging.warning(f"Gemini response blocked or empty for session {session_id}. Reason: {response.prompt_feedback}") |
|
return f"Error: Gemini response blocked or empty. Reason: {response.prompt_feedback}" |
|
elif model_name == 'anthropic': |
|
if not anthropic: return "Error: Anthropic API not configured or key missing." |
|
response = anthropic.messages.create( |
|
model="claude-3-5-haiku-20241022", |
|
max_tokens=2000, |
|
messages=[ |
|
{ |
|
"role": "user", |
|
"content": f"Generate detailed meeting minutes from this transcript, including sections like Attendees, Agenda, Discussion Points, Action Items, and Decisions Made:\n\n{transcript}" |
|
} |
|
], |
|
timeout=120 |
|
) |
|
logging.info(f"Anthropic minutes generation successful for session {session_id}") |
|
if response.content and isinstance(response.content, list) and hasattr(response.content[0], 'text'): |
|
return response.content[0].text |
|
else: |
|
logging.error(f"Could not extract content from Anthropic response: {response}") |
|
return "Error: Could not extract content from Anthropic response." |
|
elif model_name == 'grok': |
|
if not grok_api_key: return "Error: Grok API key (via Groq) not configured." |
|
groq_url = "https://api.groq.com/openai/v1/chat/completions" |
|
headers = { |
|
"Authorization": f"Bearer {grok_api_key}", |
|
"Content-Type": "application/json" |
|
} |
|
data = { |
|
"model": "grok-3-mini-fast-beta", |
|
"messages": [ |
|
{"role": "system", "content": "You are a professional assistant tasked with creating structured meeting minutes, including sections like Attendees, Agenda, Discussion Points, Action Items, and Decisions Made."}, |
|
{"role": "user", "content": f"Generate detailed meeting minutes from this transcript:\n\n{transcript}"} |
|
], |
|
"max_tokens": 2000, |
|
"temperature": 0.7 |
|
} |
|
response = requests.post(groq_url, headers=headers, json=data, timeout=120) |
|
response.raise_for_status() |
|
logging.info(f"Groq ({data['model']}) minutes generation successful for session {session_id}") |
|
return response.json()["choices"][0]["message"]["content"] |
|
else: |
|
logging.warning(f"Invalid model selection: {model_name}") |
|
return "Error: Invalid model selection" |
|
except requests.exceptions.Timeout: |
|
logging.error(f"API Request Timeout for {model_name} on session {session_id}") |
|
return f"Error: Request to {model_name} API timed out." |
|
except requests.exceptions.RequestException as e: |
|
logging.error(f"API Request Error for {model_name}: {e}") |
|
if model_name == 'grok' and e.response is not None: |
|
if e.response.status_code == 429: |
|
logging.warning(f"Groq Rate Limit hit for session {session_id}") |
|
return "Error: Groq API rate limit exceeded. Please try again later." |
|
elif e.response.status_code == 404: |
|
logging.error(f"Model {data['model']} not found via Groq API. Status: {e.response.status_code}. Response: {e.response.text}") |
|
return f"Error: Model '{data['model']}' not found or accessible via Groq API. Please check model availability." |
|
elif e.response.status_code >= 400: |
|
logging.error(f"Groq API error. Status: {e.response.status_code}. Response: {e.response.text}") |
|
return f"Error communicating with Groq API: {e.response.status_code}" |
|
return f"Error communicating with {model_name} API: {e}" |
|
except (genai.types.generation_types.BlockedPromptException, genai.types.generation_types.StopCandidateException) as e: |
|
logging.error(f"Gemini content generation issue for session {session_id}: {e}") |
|
return f"Error: Gemini generation failed or was blocked. {e}" |
|
except Exception as e: |
|
logging.error(f"Error generating minutes with {model_name} for session {session_id}: {e}", exc_info=True) |
|
if model_name == 'anthropic' and 'Could not find model' in str(e): |
|
return f"Error: Anthropic model '{response.model if 'response' in locals() else 'claude-3-5-haiku-20241022'}' not found or accessible. Check model name and API key permissions." |
|
elif model_name == 'gemini' and 'model not found' in str(e).lower(): |
|
return f"Error: Gemini model '{model.model_name if 'model' in locals() else 'gemini-1.5-flash-latest'}' not found or accessible. Check model name and API key permissions." |
|
return f"Error generating minutes using {model_name}: An unexpected error occurred." |
|
|
|
def diarize_transcript_ai(transcript, model_name, session_id): |
|
logging.info(f"Generating diarized transcript using {model_name} for session {session_id}") |
|
if not transcript or "Error:" in transcript: |
|
return "Error: Cannot diarize invalid or missing transcript." |
|
diarization_prompt = ( |
|
"Analyze the given transcript to identify distinct speakers without labeled identifiers. " |
|
"Create unique speaker embeddings based on individual speech patterns, vocabulary choices, and linguistic styles. " |
|
"Examine the context and content of each utterance to detect likely speaker changes. " |
|
"Recognize typical conversation structures and turn-taking behaviors to differentiate between speakers. " |
|
"Finally, use topic modeling to identify shifts in subject matter and areas of expertise, associating certain topics with specific speakers. " |
|
"Based on this analysis, assign speaker labels (e.g., Speaker 1, Speaker 2) to each utterance in the transcript." |
|
"\n\nTranscript:\n" + transcript |
|
) |
|
with session_locks[session_id]: |
|
try: |
|
if model_name == 'openai': |
|
if not openai.api_key: return "Error: OpenAI API key not configured." |
|
client = openai.OpenAI() |
|
response = client.chat.completions.create( |
|
model="gpt-3.5-turbo", |
|
messages=[ |
|
{"role": "system", "content": "You are a professional assistant skilled in speaker diarization and transcript formatting."}, |
|
{"role": "user", "content": diarization_prompt} |
|
], |
|
timeout=120 |
|
) |
|
logging.info(f"OpenAI diarization successful for session {session_id}") |
|
return response.choices[0].message.content |
|
elif model_name == 'gemini': |
|
if not genai: return "Error: Google Gemini API not configured or key missing." |
|
model = genai.GenerativeModel('gemini-1.5-flash-latest') |
|
response = model.generate_content( |
|
diarization_prompt, |
|
request_options={'timeout': 120} |
|
) |
|
logging.info(f"Gemini diarization successful for session {session_id}") |
|
if response.parts: |
|
return response.text |
|
else: |
|
logging.warning(f"Gemini diarization response blocked or empty for session {session_id}. Reason: {response.prompt_feedback}") |
|
return f"Error: Gemini response blocked or empty. Reason: {response.prompt_feedback}" |
|
elif model_name == 'anthropic': |
|
if not anthropic: return "Error: Anthropic API not configured or key missing." |
|
response = anthropic.messages.create( |
|
model="claude-3-5-haiku-20241022", |
|
max_tokens=2000, |
|
messages=[ |
|
{ |
|
"role": "user", |
|
"content": diarization_prompt |
|
} |
|
], |
|
timeout=120 |
|
) |
|
logging.info(f"Anthropic diarization successful for session {session_id}") |
|
if response.content and isinstance(response.content, list) and hasattr(response.content[0], 'text'): |
|
return response.content[0].text |
|
else: |
|
logging.error(f"Could not extract content from Anthropic diarization response: {response}") |
|
return "Error: Could not extract content from Anthropic response." |
|
elif model_name == 'grok': |
|
if not grok_api_key: return "Error: Grok API key (via Groq) not configured." |
|
groq_url = "https://api.groq.com/openai/v1/chat/completions" |
|
headers = { |
|
"Authorization": f"Bearer {grok_api_key}", |
|
"Content-Type": "application/json" |
|
} |
|
data = { |
|
"model": "grok-3-mini-fast-beta", |
|
"messages": [ |
|
{"role": "system", "content": "You are a professional assistant skilled in speaker diarization and transcript formatting."}, |
|
{"role": "user", "content": diarization_prompt} |
|
], |
|
"max_tokens": 2000, |
|
"temperature": 0.7 |
|
} |
|
response = requests.post(groq_url, headers=headers, json=data, timeout=120) |
|
response.raise_for_status() |
|
logging.info(f"Groq ({data['model']}) diarization successful for session {session_id}") |
|
return response.json()["choices"][0]["message"]["content"] |
|
else: |
|
logging.warning(f"Invalid model selection for diarization: {model_name}") |
|
return "Error: Invalid model selection" |
|
except Exception as e: |
|
logging.error(f"Error during diarization with {model_name} for session {session_id}: {e}", exc_info=True) |
|
return f"Error generating diarized transcript using {model_name}: An unexpected error occurred." |
|
|
|
def save_to_word(content, filename): |
|
try: |
|
doc = docx.Document() |
|
doc.add_paragraph(content) |
|
doc.save(filename) |
|
logging.info(f"Saved content to Word document: {filename}") |
|
return filename |
|
except Exception as e: |
|
logging.error(f"Error saving to Word document {filename}: {e}") |
|
return None |
|
|
|
ALLOWED_AUDIO_EXTENSIONS = ['.mp3', '.wav', '.m4a', '.webm', '.mp4', '.mpeg', '.mpga'] |
|
ALLOWED_VIDEO_EXTENSIONS = ['.mp4', '.mov', '.avi', '.webm', '.mkv', '.flv'] |
|
ALLOWED_UPLOAD_EXTENSIONS = ALLOWED_AUDIO_EXTENSIONS + ALLOWED_VIDEO_EXTENSIONS |
|
|
|
app.layout = dbc.Container([ |
|
dcc.Store(id='session-id', storage_type='local'), |
|
dcc.Store(id='session-state-trigger'), |
|
dcc.Download(id="download-transcript"), |
|
dcc.Download(id="download-audio"), |
|
dcc.Download(id="download-minutes"), |
|
dcc.Download(id="download-diarized"), |
|
dbc.Row([ |
|
dbc.Col(dbc.Card( |
|
dbc.CardBody([ |
|
html.H4("Controls", className="card-title"), |
|
html.Div("Upload meeting audio or video file:"), |
|
dcc.Upload( |
|
id='audio-uploader', |
|
children=html.Div([ |
|
'Drag and Drop or ', |
|
html.A('Select Audio/Video File') |
|
]), |
|
style={ |
|
'width': '100%', |
|
'height': '60px', |
|
'lineHeight': '60px', |
|
'borderWidth': '1px', |
|
'borderStyle': 'dashed', |
|
'borderRadius': '5px', |
|
'textAlign': 'center', |
|
'margin': '10px 0' |
|
}, |
|
multiple=False, |
|
accept='audio/*,video/*' |
|
), |
|
html.Div(id='upload-status', children='Status: Ready to Upload', className="mt-2"), |
|
html.H5("Select AI Model", className="mt-4"), |
|
dcc.Dropdown( |
|
id='model-selection', |
|
options=[ |
|
{'label': 'OpenAI GPT-3.5 Turbo', 'value': 'openai', 'disabled': not openai.api_key}, |
|
{'label': 'Google Gemini 1.5 Flash', 'value': 'gemini', 'disabled': not genai}, |
|
{'label': 'Anthropic Claude 3.5 Haiku', 'value': 'anthropic', 'disabled': not anthropic}, |
|
{'label': 'Grok 3 Mini', 'value': 'grok', 'disabled': not grok_api_key} |
|
], |
|
value='openai' if openai.api_key else ('gemini' if genai else ('anthropic' if anthropic else ('grok' if grok_api_key else None))), |
|
clearable=False, |
|
className="mt-2", |
|
disabled=not (openai.api_key or genai or anthropic or grok_api_key) |
|
), |
|
dbc.Button("Generate Minutes", id="minutes-btn", color="secondary", className="mt-4 w-100", disabled=True), |
|
dbc.Button("Diarize Transcript", id="diarize-btn", color="secondary", className="mt-2 w-100", disabled=True), |
|
html.H5("Downloads", className="mt-4"), |
|
dbc.Button("Download Transcript (.docx)", id="download-transcript-btn", color="info", className="w-100 mb-2", disabled=True), |
|
dbc.Button("Download Minutes (.docx)", id="download-minutes-btn", color="info", className="w-100 mb-2", disabled=True), |
|
dbc.Button("Download Processed Audio", id="download-audio-btn", color="info", className="w-100 mb-2", disabled=True), |
|
dbc.Button("Download Diarized Transcript (.docx)", id="download-diarized-btn", color="info", className="w-100 mb-2", disabled=True), |
|
dbc.Button("Delete Session Data", id="delete-btn", color="warning", className="mt-4 w-100", disabled=True), |
|
]), |
|
style={'height': '80vh', 'overflow-y': 'auto'} |
|
), width=12, lg=4), |
|
dbc.Col(dbc.Card( |
|
dbc.CardBody([ |
|
dcc.Loading( |
|
id="loading", |
|
type="default", |
|
parent_style={'position': 'relative', 'height': '100%'}, |
|
style={'position': 'absolute', 'top': '50%', 'left': '50%', 'transform': 'translate(-50%, -50%)', 'zIndex':'1000'}, |
|
children=[ |
|
html.Div([ |
|
html.H4("Output", className="card-title"), |
|
html.Div(id="status", children="Status: Idle", className="mb-2"), |
|
html.H5("Transcript / Minutes / Diarization"), |
|
html.Div(id="transcript-preview", style={ |
|
"height": "400px", |
|
"overflow-y": "scroll", |
|
"border": "1px solid #ccc", |
|
"padding": "10px", |
|
"white-space": "pre-wrap", |
|
"word-wrap": "break-word", |
|
"background-color": "#f9f9f9" |
|
}), |
|
]) |
|
] |
|
), |
|
html.Div(id="loading-output", style={"height": "0px", "visibility": "hidden"}), |
|
]), |
|
style={'height': '80vh', 'overflow-y': 'auto', 'position': 'relative'} |
|
), width=12, lg=8), |
|
]) |
|
], fluid=True) |
|
|
|
@app.callback( |
|
Output('session-id', 'data'), |
|
Input('session-id', 'data'), |
|
prevent_initial_call=False |
|
) |
|
def manage_session_id(existing_session_id): |
|
session_cookie = flask.request.cookies.get('dash-session-id') |
|
ctx = dash.callback_context |
|
final_session_id = None |
|
source = "none" |
|
if existing_session_id and not ctx.triggered: |
|
final_session_id = existing_session_id |
|
source = "store (initial)" |
|
elif existing_session_id and session_cookie == existing_session_id: |
|
final_session_id = existing_session_id |
|
source = "store/cookie match" |
|
elif session_cookie: |
|
final_session_id = session_cookie |
|
source = "cookie" |
|
else: |
|
final_session_id = str(uuid.uuid4()) |
|
source = "new generation" |
|
if final_session_id not in session_data: |
|
logging.info(f"Initializing server-side session for ID: {final_session_id} (Source: {source})") |
|
get_session_dir(final_session_id) |
|
logging.info(f"Manage Session ID - Final ID: {final_session_id}, Source: {source}, Store Input: {existing_session_id}, Cookie Input: {session_cookie}") |
|
response = dash.callback_context.response |
|
if source == "new generation" or (session_cookie != final_session_id): |
|
logging.info(f"Setting session cookie for ID: {final_session_id}") |
|
response.set_cookie('dash-session-id', final_session_id, max_age=60*60*24*7) |
|
return final_session_id |
|
|
|
@app.callback( |
|
[ |
|
Output("status", "children"), |
|
Output("transcript-preview", "children"), |
|
Output("minutes-btn", "disabled"), |
|
Output("diarize-btn", "disabled"), |
|
Output("download-transcript-btn", "disabled"), |
|
Output("download-minutes-btn", "disabled"), |
|
Output("download-audio-btn", "disabled"), |
|
Output("download-diarized-btn", "disabled"), |
|
Output("delete-btn", "disabled"), |
|
Output("loading-output", "children"), |
|
Output("upload-status", "children") |
|
], |
|
[ |
|
Input('audio-uploader', 'contents'), |
|
Input("minutes-btn", "n_clicks"), |
|
Input("diarize-btn", "n_clicks"), |
|
Input("delete-btn", "n_clicks") |
|
], |
|
[ |
|
State("session-id", "data"), |
|
State("model-selection", "value"), |
|
State("transcript-preview", "children"), |
|
State('audio-uploader', 'filename') |
|
], |
|
prevent_initial_call=True |
|
) |
|
def handle_actions(upload_contents, minutes_clicks, diarize_clicks, delete_clicks, session_id, selected_model, existing_preview, filename): |
|
if not session_id: |
|
logging.warning("Session ID missing in handle_actions.") |
|
return "Status: Error - Session ID missing", "", True, True, True, True, True, True, True, None, "Status: Error" |
|
ctx = dash.callback_context |
|
triggered_id = ctx.triggered_id if hasattr(ctx, 'triggered_id') else (ctx.triggered[0]['prop_id'].split('.')[0] if ctx.triggered else None) |
|
current_transcript = session_data[session_id].get("transcript", "") |
|
current_minutes = session_data[session_id].get("minutes", "") |
|
current_diarized = session_data[session_id].get("diarized", "") |
|
current_audio_path = session_data[session_id].get("audio_path", None) |
|
original_filename = session_data[session_id].get("original_filename", None) |
|
output_text = "" |
|
|
|
if current_diarized and "Error:" not in current_diarized: |
|
output_text = current_diarized |
|
elif current_minutes and "Error:" not in current_minutes: |
|
output_text = current_minutes |
|
elif current_transcript and "Error:" not in current_transcript: |
|
output_text = current_transcript |
|
else: |
|
output_text = "Upload an audio or video file to begin." |
|
status_msg = "Status: Idle" |
|
if current_diarized and "Error:" not in current_diarized: |
|
status_msg = "Status: Session restored. Diarized transcript loaded." |
|
elif current_minutes and "Error:" not in current_minutes: |
|
status_msg = "Status: Session restored. Minutes loaded." |
|
elif current_transcript and "Error:" not in current_transcript: |
|
status_msg = "Status: Session restored. Transcript loaded. Ready for Minutes Generation." |
|
elif current_audio_path and os.path.exists(current_audio_path): |
|
status_msg = f"Status: Session restored. Processed audio loaded ({os.path.basename(original_filename if original_filename else 'file')}). Ready for transcription/minutes." |
|
elif original_filename: |
|
status_msg = f"Status: Session restored. Previous upload ({original_filename}) might have had issues." |
|
minutes_disabled = not bool(current_transcript and "Error:" not in current_transcript) |
|
diarize_disabled = not bool(current_transcript and "Error:" not in current_transcript) |
|
dl_transcript_disabled = not bool(current_transcript and "Error:" not in current_transcript) |
|
dl_minutes_disabled = not bool(current_minutes and "Error:" not in current_minutes) |
|
dl_audio_disabled = not bool(current_audio_path and os.path.exists(current_audio_path)) |
|
dl_diarized_disabled = not bool(current_diarized and "Error:" not in current_diarized) |
|
delete_disabled = not bool(session_data.get(session_id, {}).get("temp_dir")) |
|
loading_output = None |
|
upload_status_msg = f"Status: {'Loaded: ' + original_filename if original_filename else 'Ready to Upload'}" |
|
start_time = time.time() |
|
if triggered_id == 'audio-uploader' and upload_contents is not None and filename is not None: |
|
logging.info(f"File uploaded for session {session_id}, filename: {filename}") |
|
session_data[session_id]["original_filename"] = filename |
|
upload_status_msg = f"Status: Processing Uploaded File ({filename})..." |
|
status_msg = "Status: Processing Upload..." |
|
loading_output = "Processing Upload..." |
|
session_dir = get_session_dir(session_id) |
|
_, f_ext = os.path.splitext(filename) |
|
f_ext_lower = f_ext.lower() |
|
if f_ext_lower not in ALLOWED_UPLOAD_EXTENSIONS: |
|
status_msg = f"Status: Error - Invalid file type ({f_ext}). Please upload audio or video." |
|
output_text = f"Error: Invalid file type ({f_ext}). Allowed types: {', '.join(ALLOWED_UPLOAD_EXTENSIONS)}" |
|
upload_status_msg = f"Status: Invalid File Type ({filename})" |
|
session_data[session_id]["audio_path"] = None |
|
session_data[session_id]["transcript"] = None |
|
session_data[session_id]["minutes"] = None |
|
session_data[session_id]["diarized"] = None |
|
session_data[session_id]["original_filename"] = None |
|
minutes_disabled = True |
|
diarize_disabled = True |
|
dl_transcript_disabled = True |
|
dl_minutes_disabled = True |
|
dl_diarized_disabled = True |
|
dl_audio_disabled = True |
|
delete_disabled = False |
|
return status_msg, output_text, minutes_disabled, diarize_disabled, dl_transcript_disabled, dl_minutes_disabled, dl_audio_disabled, dl_diarized_disabled, delete_disabled, None, upload_status_msg |
|
safe_upload_filename = f"uploaded_file{f_ext}" |
|
upload_file_path = os.path.join(session_dir, safe_upload_filename) |
|
saved_upload_path = save_base64_data(upload_contents, upload_file_path) |
|
if saved_upload_path: |
|
audio_path_for_transcription = None |
|
is_video = f_ext_lower in ALLOWED_VIDEO_EXTENSIONS |
|
if is_video: |
|
status_msg = "Status: Extracting audio from video..." |
|
upload_status_msg = "Status: Extracting Audio..." |
|
loading_output = "Extracting Audio..." |
|
extracted_audio_filename = os.path.join(session_dir, f"extracted_audio_{uuid.uuid4()}.mp3") |
|
extracted_audio_path = extract_audio_from_video(saved_upload_path, extracted_audio_filename) |
|
if extracted_audio_path: |
|
audio_path_for_transcription = extracted_audio_path |
|
session_data[session_id]["audio_path"] = extracted_audio_path |
|
dl_audio_disabled = False |
|
try: |
|
os.remove(saved_upload_path) |
|
logging.info(f"Removed original video file: {saved_upload_path}") |
|
except Exception as e: |
|
logging.warning(f"Could not remove original video file {saved_upload_path}: {e}") |
|
else: |
|
status_msg = "Status: Error - Failed to extract audio from video." |
|
output_text = "Error: Failed to extract audio from video file. Check if the file is valid." |
|
upload_status_msg = f"Status: Error Extracting Audio ({filename})" |
|
session_data[session_id]["audio_path"] = None |
|
minutes_disabled = True |
|
diarize_disabled = True |
|
dl_transcript_disabled = True |
|
dl_minutes_disabled = True |
|
dl_diarized_disabled = True |
|
dl_audio_disabled = True |
|
delete_disabled = False |
|
return status_msg, output_text, minutes_disabled, diarize_disabled, dl_transcript_disabled, dl_minutes_disabled, dl_audio_disabled, dl_diarized_disabled, delete_disabled, None, upload_status_msg |
|
else: |
|
audio_path_for_transcription = saved_upload_path |
|
session_data[session_id]["audio_path"] = saved_upload_path |
|
dl_audio_disabled = False |
|
if audio_path_for_transcription: |
|
logging.info(f"Audio path set for session {session_id}: {audio_path_for_transcription}. Starting transcription.") |
|
status_msg = "Status: Transcribing..." |
|
upload_status_msg = f"Status: Transcribing ({filename})..." |
|
loading_output = "Transcribing..." |
|
transcript_text = transcribe_audio(audio_path_for_transcription) |
|
session_data[session_id]["transcript"] = transcript_text |
|
session_data[session_id]["minutes"] = None |
|
session_data[session_id]["diarized"] = None |
|
if "Error:" in transcript_text: |
|
status_msg = f"Status: Transcription Failed - {transcript_text}" |
|
output_text = transcript_text |
|
minutes_disabled = True |
|
diarize_disabled = True |
|
dl_transcript_disabled = True |
|
dl_minutes_disabled = True |
|
dl_diarized_disabled = True |
|
delete_disabled = False |
|
upload_status_msg = f"Status: Transcription Failed. ({filename})" |
|
else: |
|
status_msg = "Status: Transcription Complete. Ready for Minutes/Diarization." |
|
output_text = transcript_text |
|
minutes_disabled = False |
|
diarize_disabled = False |
|
dl_transcript_disabled = False |
|
dl_minutes_disabled = True |
|
dl_diarized_disabled = True |
|
delete_disabled = False |
|
upload_status_msg = f"Status: Processed & Transcribed: {filename}" |
|
processing_time = time.time() - start_time |
|
logging.info(f"File processing and transcription took {processing_time:.2f} seconds for session {session_id}") |
|
else: |
|
status_msg = "Status: Error - Failed to save uploaded file data." |
|
output_text = "Failed to save uploaded file data." |
|
upload_status_msg = "Status: Error Saving Upload" |
|
session_data[session_id]["audio_path"] = None |
|
session_data[session_id]["original_filename"] = None |
|
minutes_disabled = True |
|
diarize_disabled = True |
|
dl_transcript_disabled = True |
|
dl_minutes_disabled = True |
|
dl_diarized_disabled = True |
|
dl_audio_disabled = True |
|
delete_disabled = False |
|
elif triggered_id == "minutes-btn" and minutes_clicks: |
|
logging.info(f"Generate Minutes button clicked for session {session_id}") |
|
current_transcript = session_data[session_id].get("transcript", "") |
|
if current_transcript and "Error:" not in current_transcript: |
|
status_msg = f"Status: Generating Minutes ({selected_model})..." |
|
loading_output = "Generating Minutes..." |
|
minutes_text = generate_minutes_ai(current_transcript, selected_model, session_id) |
|
session_data[session_id]["minutes"] = minutes_text |
|
|
|
if session_data[session_id].get("diarized") and "Error:" not in session_data[session_id]["diarized"]: |
|
output_text = session_data[session_id]["diarized"] |
|
else: |
|
output_text = minutes_text |
|
if "Error:" in minutes_text: |
|
status_msg = f"Status: Minutes Generation Failed - {minutes_text}" |
|
else: |
|
status_msg = "Status: Minutes Generation Complete." |
|
processing_time = time.time() - start_time |
|
logging.info(f"Minutes generation took {processing_time:.2f} seconds for session {session_id}") |
|
minutes_disabled = False |
|
diarize_disabled = False |
|
dl_transcript_disabled = False |
|
dl_audio_disabled = not bool(session_data.get(session_id, {}).get("audio_path") and os.path.exists(session_data.get(session_id, {}).get("audio_path", ""))) |
|
dl_minutes_disabled = not (minutes_text and "Error:" not in minutes_text) |
|
dl_diarized_disabled = not (session_data[session_id].get("diarized") and "Error:" not in session_data[session_id].get("diarized")) |
|
delete_disabled = False |
|
upload_status_msg = f"Status: Processed & Transcribed: {session_data[session_id].get('original_filename', 'File')}" |
|
else: |
|
status_msg = "Status: Cannot generate minutes - No valid transcript available." |
|
output_text = existing_preview |
|
minutes_disabled = True |
|
elif triggered_id == "diarize-btn" and diarize_clicks: |
|
logging.info(f"Diarize button clicked for session {session_id}") |
|
current_transcript = session_data[session_id].get("transcript", "") |
|
if current_transcript and "Error:" not in current_transcript: |
|
status_msg = f"Status: Diarizing Transcript ({selected_model})..." |
|
loading_output = "Diarizing Transcript..." |
|
diarized_text = diarize_transcript_ai(current_transcript, selected_model, session_id) |
|
session_data[session_id]["diarized"] = diarized_text |
|
if "Error:" in diarized_text: |
|
status_msg = f"Status: Diarization Failed - {diarized_text}" |
|
else: |
|
status_msg = "Status: Diarization Complete." |
|
output_text = diarized_text |
|
minutes_disabled = False |
|
diarize_disabled = False |
|
dl_transcript_disabled = False |
|
dl_audio_disabled = not bool(session_data.get(session_id, {}).get("audio_path") and os.path.exists(session_data.get(session_id, {}).get("audio_path", ""))) |
|
dl_minutes_disabled = not (session_data[session_id].get("minutes") and "Error:" not in session_data[session_id].get("minutes")) |
|
dl_diarized_disabled = not (diarized_text and "Error:" not in diarized_text) |
|
delete_disabled = False |
|
upload_status_msg = f"Status: Processed & Transcribed: {session_data[session_id].get('original_filename', 'File')}" |
|
else: |
|
status_msg = "Status: Cannot diarize - No valid transcript available." |
|
output_text = existing_preview |
|
diarize_disabled = True |
|
elif triggered_id == "delete-btn" and delete_clicks: |
|
logging.info(f"Delete button clicked for session {session_id}") |
|
cleanup_session(session_id) |
|
status_msg = "Status: All session data deleted." |
|
output_text = "Session data cleared. Upload a new file." |
|
minutes_disabled = True |
|
diarize_disabled = True |
|
dl_transcript_disabled = True |
|
dl_minutes_disabled = True |
|
dl_diarized_disabled = True |
|
dl_audio_disabled = True |
|
delete_disabled = True |
|
upload_status_msg = "Status: Ready to Upload" |
|
else: |
|
loaded_audio_path = session_data.get(session_id, {}).get("audio_path") |
|
loaded_transcript = session_data.get(session_id, {}).get("transcript") |
|
loaded_minutes = session_data.get(session_id, {}).get("minutes") |
|
loaded_diarized = session_data.get(session_id, {}).get("diarized") |
|
temp_dir_exists = bool(session_data.get(session_id, {}).get("temp_dir")) |
|
loaded_original_filename = session_data.get(session_id, {}).get("original_filename") |
|
dl_audio_disabled = not (loaded_audio_path and os.path.exists(loaded_audio_path)) |
|
minutes_disabled = not (loaded_transcript and "Error:" not in loaded_transcript) |
|
diarize_disabled = not (loaded_transcript and "Error:" not in loaded_transcript) |
|
dl_transcript_disabled = not (loaded_transcript and "Error:" not in loaded_transcript) |
|
dl_minutes_disabled = not (loaded_minutes and "Error:" not in loaded_minutes) |
|
dl_diarized_disabled = not (loaded_diarized and "Error:" not in loaded_diarized) |
|
delete_disabled = not (loaded_audio_path or loaded_transcript or loaded_minutes or loaded_diarized or temp_dir_exists or loaded_original_filename) |
|
|
|
if loaded_diarized and "Error:" not in loaded_diarized: |
|
output_text = loaded_diarized |
|
elif loaded_minutes and "Error:" not in loaded_minutes: |
|
output_text = loaded_minutes |
|
elif loaded_transcript and "Error:" not in loaded_transcript: |
|
output_text = loaded_transcript |
|
else: |
|
output_text = "Upload an audio or video file to begin." |
|
if loaded_original_filename and dl_audio_disabled and not loaded_transcript: |
|
upload_status_msg = f"Status: Error processing {loaded_original_filename}?" |
|
elif loaded_audio_path and os.path.exists(loaded_audio_path): |
|
upload_status_msg = f"Status: Processed audio loaded ({loaded_original_filename or 'previous file'})." |
|
else: |
|
upload_status_msg = "Status: Ready to Upload" |
|
return ( |
|
status_msg, |
|
output_text, |
|
minutes_disabled, |
|
diarize_disabled, |
|
dl_transcript_disabled, |
|
dl_minutes_disabled, |
|
dl_audio_disabled, |
|
dl_diarized_disabled, |
|
delete_disabled, |
|
loading_output, |
|
upload_status_msg |
|
) |
|
|
|
@app.callback( |
|
Output("download-transcript", "data"), |
|
Input("download-transcript-btn", "n_clicks"), |
|
State("session-id", "data"), |
|
prevent_initial_call=True, |
|
) |
|
def download_transcript_file(n_clicks, session_id): |
|
if not session_id or not session_data.get(session_id, {}).get("transcript"): |
|
logging.warning(f"Download transcript requested but no data found for session {session_id}.") |
|
return None |
|
transcript = session_data[session_id]["transcript"] |
|
if "Error:" in transcript: |
|
logging.warning(f"Attempted to download transcript containing an error for session {session_id}.") |
|
return None |
|
session_dir = get_session_dir(session_id) |
|
transcript_filename = os.path.join(session_dir, f"transcript_{uuid.uuid4()}.docx") |
|
saved_doc_path = save_to_word(transcript, transcript_filename) |
|
if saved_doc_path: |
|
logging.info(f"Sending transcript file: {saved_doc_path}") |
|
original_filename_base = os.path.splitext(session_data[session_id].get("original_filename", "meeting"))[0] |
|
download_filename = f"{original_filename_base}_transcript.docx" |
|
return dcc.send_file(saved_doc_path, filename=download_filename) |
|
else: |
|
logging.error(f"Failed to create Word document for transcript download for session {session_id}") |
|
return dcc.send_data_frame(lambda: transcript, "meeting_transcript.txt") |
|
|
|
@app.callback( |
|
Output("download-minutes", "data"), |
|
Input("download-minutes-btn", "n_clicks"), |
|
State("session-id", "data"), |
|
prevent_initial_call=True, |
|
) |
|
def download_minutes_file(n_clicks, session_id): |
|
if not session_id or not session_data.get(session_id, {}).get("minutes"): |
|
logging.warning(f"Download minutes requested but no data found for session {session_id}.") |
|
return None |
|
minutes = session_data[session_id]["minutes"] |
|
if "Error:" in minutes: |
|
logging.warning(f"Attempted to download minutes containing an error for session {session_id}.") |
|
return None |
|
session_dir = get_session_dir(session_id) |
|
minutes_filename = os.path.join(session_dir, f"meeting_minutes_{uuid.uuid4()}.docx") |
|
saved_doc_path = save_to_word(minutes, minutes_filename) |
|
if saved_doc_path: |
|
logging.info(f"Sending minutes file: {saved_doc_path}") |
|
original_filename_base = os.path.splitext(session_data[session_id].get("original_filename", "meeting"))[0] |
|
download_filename = f"{original_filename_base}_minutes.docx" |
|
return dcc.send_file(saved_doc_path, filename=download_filename) |
|
else: |
|
logging.error(f"Failed to create Word document for minutes download for session {session_id}") |
|
return dcc.send_data_frame(lambda: minutes, "meeting_minutes.txt") |
|
|
|
@app.callback( |
|
Output("download-audio", "data"), |
|
Input("download-audio-btn", "n_clicks"), |
|
State("session-id", "data"), |
|
prevent_initial_call=True, |
|
) |
|
def download_audio_file(n_clicks, session_id): |
|
if not session_id or not session_data.get(session_id, {}).get("audio_path"): |
|
logging.warning(f"Download audio requested but no processed audio path found for session {session_id}.") |
|
return None |
|
audio_path = session_data[session_id]["audio_path"] |
|
original_filename = session_data[session_id].get("original_filename", "meeting_audio") |
|
if os.path.exists(audio_path): |
|
logging.info(f"Sending processed audio file: {audio_path}") |
|
original_filename_base = os.path.splitext(original_filename)[0] |
|
_, current_ext = os.path.splitext(audio_path) |
|
download_filename = f"{original_filename_base}_processed_audio{current_ext}" |
|
return dcc.send_file(audio_path, filename=download_filename) |
|
else: |
|
logging.error(f"Processed audio file not found at path {audio_path} for session {session_id}") |
|
return None |
|
|
|
@app.callback( |
|
Output("download-diarized", "data"), |
|
Input("download-diarized-btn", "n_clicks"), |
|
State("session-id", "data"), |
|
prevent_initial_call=True, |
|
) |
|
def download_diarized_file(n_clicks, session_id): |
|
if not session_id or not session_data.get(session_id, {}).get("diarized"): |
|
logging.warning(f"Download diarized transcript requested but no data found for session {session_id}.") |
|
return None |
|
diarized = session_data[session_id]["diarized"] |
|
if "Error:" in diarized: |
|
logging.warning(f"Attempted to download diarized transcript containing an error for session {session_id}.") |
|
return None |
|
session_dir = get_session_dir(session_id) |
|
diarized_filename = os.path.join(session_dir, f"diarized_{uuid.uuid4()}.docx") |
|
saved_doc_path = save_to_word(diarized, diarized_filename) |
|
if saved_doc_path: |
|
logging.info(f"Sending diarized transcript file: {saved_doc_path}") |
|
original_filename_base = os.path.splitext(session_data[session_id].get("original_filename", "meeting"))[0] |
|
download_filename = f"{original_filename_base}_diarized.docx" |
|
return dcc.send_file(saved_doc_path, filename=download_filename) |
|
else: |
|
logging.error(f"Failed to create Word document for diarized transcript download for session {session_id}") |
|
return dcc.send_data_frame(lambda: diarized, "meeting_diarized.txt") |
|
|
|
if __name__ == '__main__': |
|
print("Starting the Dash application...") |
|
app.run(debug=False, host='0.0.0.0', port=7860) |
|
print("Dash application has finished running.") |