File size: 4,665 Bytes
a08a46f
 
ddf0a26
e3551a8
b444514
e3551a8
c09e69a
e3551a8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f1d8e24
e3551a8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
00414cc
 
 
 
 
aea77a0
f1d8e24
 
 
 
 
 
 
 
aea77a0
e3551a8
a7e76f3
f1d8e24
e3551a8
f1d8e24
aea77a0
f1d8e24
aea77a0
 
 
 
 
e3551a8
 
 
ed43bfe
6419ee7
e3551a8
 
 
 
 
 
 
a08a46f
f1d8e24
 
 
 
 
 
 
 
 
 
 
 
29e6934
a7e76f3
e3551a8
 
 
 
a08a46f
41f0694
e3551a8
 
 
466cd11
5bfd280
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# app.py

import gradio as gr
from llm_loader import load_model
from processing import process_input
from transcription_diarization import process_video
from visualization import create_charts
import os
import time
from config import hf_token, openai_api_key

# Load the model
llm = load_model(openai_api_key)

# Mapping of display names to language codes
LANGUAGE_MAP = {
    "English": "en",
    "Hebrew": "he",
    "Italian": "it",
    "French": "fr",
    "German": "de",
    "Chinese": "zh",
    "Arabic": "ar"
}

def analyze_video(video_path, language_display_name, max_speakers, progress=gr.Progress()):
    start_time = time.time()

    if not video_path:
        return [gr.Markdown("Please upload a video file.")] + [gr.update(visible=False)] * 48 + ["Analysis not started."]

    # Convert the display name to the language code
    language = LANGUAGE_MAP[language_display_name]

    # Start the progress bar
    progress(0, desc="Starting analysis...")

    # Progress for diarization
    progress(0.2, desc="Starting diarization...")
    srt_path = process_video(video_path, hf_token, language, max_speakers)
    progress(0.4, desc="Diarization complete.")

    # Progress for transcription
    with open(srt_path, 'r', encoding='utf-8') as file:
        transcription = file.read()
    progress(0.6, desc="Transcription complete.")

    # Progress for processing the transcription
    progress(0.7, desc="Processing transcription...")
    results = process_input(transcription, llm)
    progress(0.8, desc="Transcription processing complete.")

    # Progress for creating charts
    progress(0.9, desc="Generating charts...")
    charts, explanations = create_charts(results)
    progress(1.0, desc="Charts generation complete.")

    # Clean up the temporary SRT file
    os.remove(srt_path)

    end_time = time.time()
    execution_time = end_time - start_time

    # Prepare outputs for each speaker
    output_components = []
    for speaker_id, speaker_charts in charts.items():
        speaker_explanations = explanations[speaker_id]

        output_components.extend([
            gr.Markdown(f"### {speaker_id}", visible=True),
            gr.update(value=speaker_charts.get("attachment", None), visible=True),
            gr.update(value=speaker_explanations.get("attachment", ""), visible=True),
            gr.update(value=speaker_charts.get("dimensions", None), visible=True),
            gr.update(value=speaker_charts.get("bigfive", None), visible=True),
            gr.update(value=speaker_explanations.get("bigfive", ""), visible=True),
            gr.update(value=speaker_charts.get("personality", None), visible=True),
            gr.update(value=speaker_explanations.get("personality", ""), visible=True),
        ])

    # Add the transcript at the end
    output_components.append(gr.update(value=transcription, visible=True))

    # Pad the output with invisible components if necessary
    while len(output_components) < 49:
        output_components.append(gr.update(visible=False))

    # Add the execution time
    output_components.append(f"Completed in {int(execution_time)} seconds.")

    return output_components

# Define the Gradio interface
with gr.Blocks() as iface:
    gr.Markdown("# AI Personality Detection")
    gr.Markdown("Upload a video")

    video_input = gr.Video(label="Upload Video")
    language_input = gr.Dropdown(choices=list(LANGUAGE_MAP.keys()), value="English", label="Select Language")
    max_speakers = gr.Slider(minimum=1, maximum=4, step=1, value=2, label="Maximum Number of Speakers")

    analyze_button = gr.Button("Analyze")

    # Create placeholders for output components
    output_components = []
    for _ in range(6):  # Assuming maximum of 6 speakers
        output_components.extend([
            gr.Markdown(visible=False),
            gr.Plot(visible=False),
            gr.Textbox(label="Attachment Styles Explanation", visible=False),
            gr.Plot(visible=False),
            gr.Plot(visible=False),
            gr.Textbox(label="Big Five Traits Explanation", visible=False),
            gr.Plot(visible=False),
            gr.Textbox(label="Personality Disorders Explanation", visible=False),
        ])
    output_components.append(gr.Textbox(label="Transcript", lines=10, visible=False))
    execution_info_box = gr.Textbox(label="Execution Information", value="Waiting for analysis...", lines=2)

    analyze_button.click(
        fn=analyze_video,
        inputs=[video_input, language_input, max_speakers],
        outputs=output_components + [execution_info_box],
        show_progress=True
    )

# Launch the app
if __name__ == "__main__":
    iface.launch()