Spaces:
Runtime error
Runtime error
Upload 8 files
Browse files- app.py +116 -89
- config.py +8 -0
- llm_loader.py +12 -0
- output_parser.py +4 -66
- processing.py +57 -223
- requirements.txt +6 -5
- transcription_diarization.py +52 -42
- visualization.py +67 -96
app.py
CHANGED
@@ -1,92 +1,119 @@
|
|
1 |
import gradio as gr
|
|
|
2 |
from processing import process_input
|
3 |
-
from
|
4 |
-
from
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
90 |
if __name__ == "__main__":
|
91 |
-
|
92 |
-
demo.launch()
|
|
|
1 |
import gradio as gr
|
2 |
+
from llm_loader import load_model
|
3 |
from processing import process_input
|
4 |
+
from transcription_diarization import process_video
|
5 |
+
from visualization import create_charts
|
6 |
+
import os
|
7 |
+
import time
|
8 |
+
from config import hf_token, openai_api_key
|
9 |
+
|
10 |
+
# Load the model
|
11 |
+
llm = load_model(openai_api_key)
|
12 |
+
|
13 |
+
# Mapping of display names to language codes
|
14 |
+
LANGUAGE_MAP = {
|
15 |
+
"English": "en",
|
16 |
+
"Hebrew": "he",
|
17 |
+
"Italian": "it",
|
18 |
+
"French": "fr",
|
19 |
+
"German": "de",
|
20 |
+
"Chinese": "zh",
|
21 |
+
"Arabic": "ar"
|
22 |
+
}
|
23 |
+
|
24 |
+
|
25 |
+
def analyze_video(video_path, language_display_name, max_speakers, progress=gr.Progress()):
|
26 |
+
start_time = time.time()
|
27 |
+
|
28 |
+
if not video_path:
|
29 |
+
return "Please upload a video file.", gr.Textbox.update(value="Analysis not started.")
|
30 |
+
|
31 |
+
# Convert the display name to the language code
|
32 |
+
language = LANGUAGE_MAP[language_display_name]
|
33 |
+
|
34 |
+
# Start the progress bar
|
35 |
+
progress(0, desc="Starting analysis...")
|
36 |
+
|
37 |
+
# Progress for diarization
|
38 |
+
progress(0.2, desc="Starting diarization...")
|
39 |
+
srt_path = process_video(video_path, hf_token, language, max_speakers)
|
40 |
+
progress(0.4, desc="Diarization complete.")
|
41 |
+
|
42 |
+
# Progress for transcription
|
43 |
+
with open(srt_path, 'r', encoding='utf-8') as file:
|
44 |
+
transcription = file.read()
|
45 |
+
progress(0.6, desc="Transcription complete.")
|
46 |
+
|
47 |
+
# Progress for processing the transcription
|
48 |
+
progress(0.7, desc="Processing transcription...")
|
49 |
+
results = process_input(transcription, llm)
|
50 |
+
progress(0.8, desc="Transcription processing complete.")
|
51 |
+
|
52 |
+
# Progress for creating charts
|
53 |
+
progress(0.9, desc="Generating charts...")
|
54 |
+
charts, explanations = create_charts(results)
|
55 |
+
progress(1.0, desc="Charts generation complete.")
|
56 |
+
|
57 |
+
# Clean up the temporary SRT file
|
58 |
+
os.remove(srt_path)
|
59 |
+
|
60 |
+
end_time = time.time()
|
61 |
+
execution_time = end_time - start_time
|
62 |
+
|
63 |
+
# Prepare outputs for each speaker
|
64 |
+
output_components = []
|
65 |
+
for speaker_id, speaker_charts in charts.items():
|
66 |
+
speaker_explanations = explanations[speaker_id]
|
67 |
+
|
68 |
+
output_components.extend([
|
69 |
+
gr.Markdown(f"### Speaker {speaker_id}"),
|
70 |
+
gr.Plot(speaker_charts["attachment"]),
|
71 |
+
gr.Textbox(value=speaker_explanations["attachment"],
|
72 |
+
label=f"Attachment Styles Explanation - Speaker {speaker_id}", lines=2),
|
73 |
+
gr.Plot(speaker_charts["dimensions"]),
|
74 |
+
gr.Plot(speaker_charts["bigfive"]),
|
75 |
+
gr.Textbox(value=speaker_explanations["bigfive"],
|
76 |
+
label=f"Big Five Traits Explanation - Speaker {speaker_id}", lines=2),
|
77 |
+
gr.Plot(speaker_charts["personality"]),
|
78 |
+
gr.Textbox(value=speaker_explanations["personality"],
|
79 |
+
label=f"Personality Disorders Explanation - Speaker {speaker_id}", lines=2)
|
80 |
+
])
|
81 |
+
|
82 |
+
# Add the transcript and execution info at the end
|
83 |
+
output_components.extend([
|
84 |
+
gr.Textbox(value=transcription, label="Transcript", lines=10),
|
85 |
+
gr.Textbox.update(value=f"Completed in {int(execution_time)} seconds.", label="Execution Information",
|
86 |
+
visible=True)
|
87 |
+
])
|
88 |
+
|
89 |
+
return output_components, gr.Textbox.update(value=f"Completed in {int(execution_time)} seconds.")
|
90 |
+
|
91 |
+
|
92 |
+
# Define the Gradio interface
|
93 |
+
with gr.Blocks() as iface:
|
94 |
+
gr.Markdown("# Video Analysis Tool")
|
95 |
+
gr.Markdown("Upload a video to analyze speech patterns and personality traits.")
|
96 |
+
|
97 |
+
video_input = gr.Video(label="Upload Video")
|
98 |
+
language_input = gr.Dropdown(choices=list(LANGUAGE_MAP.keys()), value="English", label="Select Language")
|
99 |
+
max_speakers = gr.Slider(minimum=1, maximum=4, step=1, value=2, label="Maximum Number of Speakers")
|
100 |
+
|
101 |
+
analyze_button = gr.Button("Analyze")
|
102 |
+
|
103 |
+
# Placeholder for dynamic outputs
|
104 |
+
output_section = gr.Column()
|
105 |
+
|
106 |
+
# Execution time box, initially displaying a waiting message
|
107 |
+
execution_info_box = gr.Textbox(label="Execution Information", value="Waiting for analysis...", lines=2,
|
108 |
+
visible=True)
|
109 |
+
|
110 |
+
analyze_button.click(
|
111 |
+
fn=analyze_video,
|
112 |
+
inputs=[video_input, language_input, max_speakers],
|
113 |
+
outputs=[output_section, execution_info_box],
|
114 |
+
show_progress=True # Enables the progress bar in Gradio
|
115 |
+
)
|
116 |
+
|
117 |
+
# Launch the app
|
118 |
if __name__ == "__main__":
|
119 |
+
iface.launch()
|
|
config.py
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# config.py
|
2 |
+
import os
|
3 |
+
from dotenv import load_dotenv
|
4 |
+
|
5 |
+
load_dotenv() # This loads the variables from .env file
|
6 |
+
|
7 |
+
openai_api_key = os.getenv('OPENAI_API_KEY')
|
8 |
+
hf_token = os.getenv('hf_token')
|
llm_loader.py
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# llm_loader.py
|
2 |
+
from langchain.chat_models import ChatOpenAI
|
3 |
+
|
4 |
+
def load_model(openai_api_key):
|
5 |
+
return ChatOpenAI(
|
6 |
+
model_name="gpt-4o",
|
7 |
+
openai_api_key=openai_api_key,
|
8 |
+
temperature=0.01,
|
9 |
+
max_tokens=2096,
|
10 |
+
top_p=0.95,
|
11 |
+
top_k=25
|
12 |
+
)
|
output_parser.py
CHANGED
@@ -1,7 +1,5 @@
|
|
1 |
-
from langchain.output_parsers import StructuredOutputParser, ResponseSchema
|
2 |
-
from langchain.prompts import PromptTemplate
|
3 |
from pydantic import BaseModel
|
4 |
-
from
|
5 |
|
6 |
class AttachmentStyle(BaseModel):
|
7 |
speaker: str
|
@@ -37,66 +35,6 @@ class PersonalityDisorder(BaseModel):
|
|
37 |
obsessional: int
|
38 |
explanation: str
|
39 |
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
ResponseSchema(name="anxious_preoccupied", description="Probability of anxious-preoccupied attachment style (0-1)"),
|
44 |
-
ResponseSchema(name="dismissive_avoidant", description="Probability of dismissive-avoidant attachment style (0-1)"),
|
45 |
-
ResponseSchema(name="fearful_avoidant", description="Probability of fearful-avoidant attachment style (0-1)"),
|
46 |
-
ResponseSchema(name="self_rating", description="Self rating (0-10)"),
|
47 |
-
ResponseSchema(name="others_rating", description="Others rating (0-10)"),
|
48 |
-
ResponseSchema(name="anxiety", description="Anxiety rating (0-10)"),
|
49 |
-
ResponseSchema(name="avoidance", description="Avoidance rating (0-10)"),
|
50 |
-
ResponseSchema(name="explanation", description="Brief explanation of the attachment style")
|
51 |
-
]
|
52 |
-
|
53 |
-
bigfive_response_schemas = [
|
54 |
-
ResponseSchema(name="speaker", description="The name or number of the speaker"),
|
55 |
-
ResponseSchema(name="extraversion", description="Extraversion rating (-10 to 10)"),
|
56 |
-
ResponseSchema(name="agreeableness", description="Agreeableness rating (-10 to 10)"),
|
57 |
-
ResponseSchema(name="conscientiousness", description="Conscientiousness rating (-10 to 10)"),
|
58 |
-
ResponseSchema(name="neuroticism", description="Neuroticism rating (-10 to 10)"),
|
59 |
-
ResponseSchema(name="openness", description="Openness rating (-10 to 10)"),
|
60 |
-
ResponseSchema(name="explanation", description="Brief explanation of the Big Five traits")
|
61 |
-
]
|
62 |
-
|
63 |
-
personality_response_schemas = [
|
64 |
-
ResponseSchema(name="speaker", description="The name or number of the speaker"),
|
65 |
-
ResponseSchema(name="depressed", description="Depressed rating (0-4)"),
|
66 |
-
ResponseSchema(name="paranoid", description="Paranoid rating (0-4)"),
|
67 |
-
ResponseSchema(name="schizoid_schizotypal", description="Schizoid-Schizotypal rating (0-4)"),
|
68 |
-
ResponseSchema(name="antisocial_psychopathic", description="Antisocial-Psychopathic rating (0-4)"),
|
69 |
-
ResponseSchema(name="borderline_dysregulated", description="Borderline-Dysregulated rating (0-4)"),
|
70 |
-
ResponseSchema(name="narcissistic", description="Narcissistic rating (0-4)"),
|
71 |
-
ResponseSchema(name="anxious_avoidant", description="Anxious-Avoidant rating (0-4)"),
|
72 |
-
ResponseSchema(name="dependent_victimized", description="Dependent-Victimized rating (0-4)"),
|
73 |
-
ResponseSchema(name="obsessional", description="Obsessional rating (0-4)"),
|
74 |
-
ResponseSchema(name="explanation", description="Brief explanation of the personality disorders")
|
75 |
-
]
|
76 |
-
|
77 |
-
attachment_parser = StructuredOutputParser.from_response_schemas(attachment_response_schemas)
|
78 |
-
bigfive_parser = StructuredOutputParser.from_response_schemas(bigfive_response_schemas)
|
79 |
-
personality_parser = StructuredOutputParser.from_response_schemas(personality_response_schemas)
|
80 |
-
|
81 |
-
def get_prompt_template(task: str, parser: StructuredOutputParser) -> PromptTemplate:
|
82 |
-
return PromptTemplate(
|
83 |
-
template="Analyze the following text according to the given task:\n\n{task}\n\n{format_instructions}\n\nText: {text}\n\nAnalysis:",
|
84 |
-
input_variables=["text"],
|
85 |
-
partial_variables={
|
86 |
-
"task": task,
|
87 |
-
"format_instructions": parser.get_format_instructions()
|
88 |
-
}
|
89 |
-
)
|
90 |
-
|
91 |
-
def parse_analysis_output(output: str, analysis_type: str) -> Dict[str, BaseModel]:
|
92 |
-
if analysis_type == "attachments":
|
93 |
-
parsed = attachment_parser.parse(output)
|
94 |
-
return {parsed['speaker']: AttachmentStyle(**parsed)}
|
95 |
-
elif analysis_type == "bigfive":
|
96 |
-
parsed = bigfive_parser.parse(output)
|
97 |
-
return {parsed['speaker']: BigFiveTraits(**parsed)}
|
98 |
-
elif analysis_type == "personalities":
|
99 |
-
parsed = personality_parser.parse(output)
|
100 |
-
return {parsed['speaker']: PersonalityDisorder(**parsed)}
|
101 |
-
else:
|
102 |
-
raise ValueError(f"Unknown analysis type: {analysis_type}")
|
|
|
|
|
|
|
1 |
from pydantic import BaseModel
|
2 |
+
from langchain.output_parsers import PydanticOutputParser
|
3 |
|
4 |
class AttachmentStyle(BaseModel):
|
5 |
speaker: str
|
|
|
35 |
obsessional: int
|
36 |
explanation: str
|
37 |
|
38 |
+
attachment_parser = PydanticOutputParser(pydantic_object=AttachmentStyle)
|
39 |
+
bigfive_parser = PydanticOutputParser(pydantic_object=BigFiveTraits)
|
40 |
+
personality_parser = PydanticOutputParser(pydantic_object=PersonalityDisorder)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
processing.py
CHANGED
@@ -1,231 +1,65 @@
|
|
1 |
-
|
2 |
-
import
|
3 |
-
import
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
from transformers import AutoTokenizer, AutoModelForCausalLM
|
9 |
-
from langdetect import detect
|
10 |
-
from langchain.chains import RetrievalQA
|
11 |
-
from langchain_community.llms import HuggingFacePipeline
|
12 |
-
from langchain.prompts import PromptTemplate
|
13 |
-
from langchain_community.document_loaders import TextLoader, PyPDFLoader
|
14 |
-
from langchain.text_splitter import CharacterTextSplitter
|
15 |
-
from langchain_community.vectorstores import FAISS
|
16 |
-
from langchain_community.embeddings import HuggingFaceEmbeddings
|
17 |
-
from transcription_diarization import process_video
|
18 |
-
from output_parser import get_prompt_template, attachment_parser, bigfive_parser, personality_parser, parse_analysis_output
|
19 |
-
|
20 |
-
hf_token = os.environ.get('hf_secret')
|
21 |
-
if not hf_token:
|
22 |
-
raise ValueError("HF_TOKEN not found in environment variables. Please set it in the Space secrets.")
|
23 |
-
|
24 |
-
login(token=hf_token)
|
25 |
-
|
26 |
-
def load_instructions(file_path):
|
27 |
-
with open(file_path, 'r') as file:
|
28 |
return file.read().strip()
|
29 |
|
30 |
-
def load_knowledge(file_path):
|
31 |
-
loader = TextLoader(file_path)
|
32 |
-
documents = loader.load()
|
33 |
-
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
|
34 |
-
texts = text_splitter.split_documents(documents)
|
35 |
-
return texts
|
36 |
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
|
|
|
|
|
|
46 |
|
47 |
-
def detect_language(text):
|
48 |
try:
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
self.pipe = self.create_pipeline(self.model)
|
60 |
-
|
61 |
-
def set_seed(self, seed):
|
62 |
-
random.seed(seed)
|
63 |
-
np.random.seed(seed)
|
64 |
-
torch.manual_seed(seed)
|
65 |
-
if torch.cuda.is_available():
|
66 |
-
torch.cuda.manual_seed_all(seed)
|
67 |
-
|
68 |
-
def load_model(self):
|
69 |
-
model = AutoModelForCausalLM.from_pretrained(
|
70 |
-
self.model_name,
|
71 |
-
torch_dtype=torch.bfloat16,
|
72 |
-
device_map="auto",
|
73 |
-
use_auth_token=self.hf_token,
|
74 |
-
use_cache=False,
|
75 |
-
load_in_4bit=False
|
76 |
-
)
|
77 |
-
return model
|
78 |
-
|
79 |
-
def create_pipeline(self, model):
|
80 |
-
from transformers import pipeline
|
81 |
-
tokenizer = AutoTokenizer.from_pretrained(self.model_name, use_auth_token=self.hf_token)
|
82 |
-
return pipeline(
|
83 |
-
"text-generation",
|
84 |
-
model=model,
|
85 |
-
top_k=50,
|
86 |
-
top_p=0.8,
|
87 |
-
tokenizer=tokenizer,
|
88 |
-
max_new_tokens=512,
|
89 |
-
temperature=0.3,
|
90 |
-
repetition_penalty=1.2,
|
91 |
-
do_sample=False,
|
92 |
-
truncation=True,
|
93 |
-
bad_words_ids=[[tokenizer.encode(char, add_special_tokens=False)[0]] for char in "*"]
|
94 |
-
)
|
95 |
-
|
96 |
-
def post_process_output(self, output):
|
97 |
-
return re.sub(r'[*]', '', output).strip()
|
98 |
-
|
99 |
-
def analyze_task(self, content, task, knowledge_db, analysis_type):
|
100 |
-
tokenizer = AutoTokenizer.from_pretrained(self.model_name, use_auth_token=self.hf_token)
|
101 |
-
|
102 |
-
input_tokens = len(tokenizer.encode(content))
|
103 |
-
|
104 |
-
max_input_length = 800
|
105 |
-
encoded_input = tokenizer.encode(content, truncation=True, max_length=max_input_length)
|
106 |
-
truncated_content = tokenizer.decode(encoded_input)
|
107 |
-
|
108 |
-
if len(encoded_input) == max_input_length:
|
109 |
-
print(f"Warning: Input was truncated from {input_tokens} to {max_input_length} tokens.")
|
110 |
-
|
111 |
-
llm = HuggingFacePipeline(pipeline=self.pipe)
|
112 |
-
|
113 |
-
if analysis_type == "attachments":
|
114 |
-
parser = attachment_parser
|
115 |
-
elif analysis_type == "bigfive":
|
116 |
-
parser = bigfive_parser
|
117 |
-
elif analysis_type == "personalities":
|
118 |
-
parser = personality_parser
|
119 |
-
else:
|
120 |
-
raise ValueError(f"Unknown analysis type: {analysis_type}")
|
121 |
-
|
122 |
-
prompt_template = PromptTemplate(
|
123 |
-
template=task + "\n\n{context}\n\n{query}\n\nSpeaker: {speaker}\n\n" + parser.get_format_instructions() + "\n\nAnalysis:",
|
124 |
-
input_variables=["context", "query", "speaker"]
|
125 |
-
)
|
126 |
-
|
127 |
-
if knowledge_db is None:
|
128 |
-
chain = prompt_template | llm
|
129 |
-
result = chain.invoke({"query": truncated_content, "speaker": "Unknown"})
|
130 |
-
output = result
|
131 |
-
else:
|
132 |
-
chain = RetrievalQA.from_chain_type(
|
133 |
-
llm=llm,
|
134 |
-
chain_type="stuff",
|
135 |
-
retriever=knowledge_db.as_retriever(),
|
136 |
-
chain_type_kwargs={"prompt": prompt_template}
|
137 |
-
)
|
138 |
-
result = chain({"query": truncated_content, "speaker": "Unknown"})
|
139 |
-
output = result['result'] # RetrievalQA returns a dict with 'result' key
|
140 |
-
|
141 |
-
print(f"Raw model output: {output}")
|
142 |
-
|
143 |
-
try:
|
144 |
-
cleaned_output = self.post_process_output(output)
|
145 |
-
parsed_output = parser.parse(cleaned_output)
|
146 |
-
except Exception as e:
|
147 |
-
raise ValueError(f"Error parsing output: {e}")
|
148 |
-
|
149 |
-
# Check if all required keys are present
|
150 |
-
required_keys = {schema.name for schema in parser.response_schemas}
|
151 |
-
missing_keys = required_keys - parsed_output.keys()
|
152 |
-
|
153 |
-
if missing_keys:
|
154 |
-
raise ValueError(f"Missing some input keys: {missing_keys}")
|
155 |
-
|
156 |
-
return cleaned_output, input_tokens
|
157 |
-
|
158 |
-
def process_input(input_file, max_speakers, progress=None):
|
159 |
-
start_time = time.time()
|
160 |
-
|
161 |
-
def safe_progress(value, desc=""):
|
162 |
-
if progress is not None:
|
163 |
-
try:
|
164 |
-
progress(value, desc=desc)
|
165 |
-
except Exception as e:
|
166 |
-
print(f"Progress update failed: {e}")
|
167 |
-
|
168 |
-
safe_progress(0, desc="Processing file")
|
169 |
-
|
170 |
-
if isinstance(input_file, str):
|
171 |
-
file_path = input_file
|
172 |
-
else:
|
173 |
-
file_path = input_file.name
|
174 |
-
|
175 |
-
file_extension = os.path.splitext(file_path)[1].lower()
|
176 |
-
|
177 |
-
if file_extension in ['.txt', '.srt']:
|
178 |
-
with open(file_path, 'r', encoding='utf-8') as file:
|
179 |
-
content = file.read()
|
180 |
-
transcription = content
|
181 |
-
elif file_extension == '.pdf':
|
182 |
-
loader = PyPDFLoader(file_path)
|
183 |
-
pages = loader.load_and_split()
|
184 |
-
content = '\n'.join([page.page_content for page in pages])
|
185 |
-
transcription = content
|
186 |
-
elif file_extension in ['.mp4', '.avi', '.mov']:
|
187 |
-
safe_progress(0.2, desc="Processing video...")
|
188 |
-
srt_path = process_video(file_path, hf_token, "en", max_speakers)
|
189 |
-
with open(srt_path, 'r', encoding='utf-8') as file:
|
190 |
-
content = file.read()
|
191 |
-
transcription = content
|
192 |
-
os.remove(srt_path)
|
193 |
-
else:
|
194 |
-
return "Unsupported file format. Please upload a TXT, SRT, PDF, or video file.", None, None, None, None, None, None
|
195 |
-
|
196 |
-
detected_language = detect_language(content)
|
197 |
-
|
198 |
-
safe_progress(0.2, desc="Initializing analyzer")
|
199 |
-
analyzer = SequentialAnalyzer(hf_token)
|
200 |
-
|
201 |
tasks = [
|
202 |
-
("
|
203 |
-
|
204 |
-
("
|
|
|
205 |
]
|
206 |
-
|
207 |
-
results =
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
end_time = time.time()
|
217 |
-
execution_time = end_time - start_time
|
218 |
-
|
219 |
-
safe_progress(1.0, desc="Analysis complete!")
|
220 |
-
|
221 |
-
parsed_results = [parse_analysis_output(result, analysis_type) for result, analysis_type in results]
|
222 |
-
|
223 |
-
return (
|
224 |
-
"Analysis complete!",
|
225 |
-
f"{execution_time:.2f} seconds",
|
226 |
-
detected_language,
|
227 |
-
parsed_results[0], # attachments
|
228 |
-
parsed_results[1], # bigfive
|
229 |
-
parsed_results[2], # personalities,
|
230 |
-
transcription
|
231 |
-
)
|
|
|
1 |
+
# processing.py
|
2 |
+
from langchain.schema import HumanMessage
|
3 |
+
from output_parser import attachment_parser, bigfive_parser, personality_parser
|
4 |
+
|
5 |
+
|
6 |
+
def load_text(file_path: str) -> str:
|
7 |
+
with open(file_path, 'r', encoding='utf-8') as file:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
return file.read().strip()
|
9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
+
def truncate_text(text: str, max_tokens: int = 10000) -> str:
|
12 |
+
words = text.split()
|
13 |
+
if len(words) > max_tokens:
|
14 |
+
truncated_text = ' '.join(words[:max_tokens])
|
15 |
+
print(f"Text truncated from {len(words)} to {max_tokens} words")
|
16 |
+
return truncated_text
|
17 |
+
print(f"Text not truncated, contains {len(words)} words")
|
18 |
+
return text
|
19 |
+
|
20 |
+
|
21 |
+
def process_task(llm, input_text: str, general_task: str, specific_task: str, knowledge: str, output_parser):
|
22 |
+
truncated_input = truncate_text(input_text)
|
23 |
+
|
24 |
+
prompt = f"""{general_task}
|
25 |
+
|
26 |
+
{specific_task}
|
27 |
+
|
28 |
+
Knowledge: {knowledge}
|
29 |
+
|
30 |
+
Input: {truncated_input}
|
31 |
|
32 |
+
{output_parser.get_format_instructions()}
|
33 |
+
|
34 |
+
Analysis:"""
|
35 |
+
|
36 |
+
messages = [HumanMessage(content=prompt)]
|
37 |
+
response = llm(messages)
|
38 |
+
print(response)
|
39 |
|
|
|
40 |
try:
|
41 |
+
parsed_output = output_parser.parse(response.content)
|
42 |
+
return parsed_output
|
43 |
+
except Exception as e:
|
44 |
+
print(f"Error parsing output: {e}")
|
45 |
+
return None
|
46 |
+
|
47 |
+
|
48 |
+
def process_input(input_text: str, llm):
|
49 |
+
general_task = load_text("tasks/general_task.txt")
|
50 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
tasks = [
|
52 |
+
("attachments", "tasks/Attachments_task.txt", "knowledge/bartholomew_attachments_definitions.txt",
|
53 |
+
attachment_parser),
|
54 |
+
("bigfive", "tasks/BigFive_task.txt", "knowledge/bigfive_definitions.txt", bigfive_parser),
|
55 |
+
("personalities", "tasks/Personalities_task.txt", "knowledge/personalities_definitions.txt", personality_parser)
|
56 |
]
|
57 |
+
|
58 |
+
results = {}
|
59 |
+
|
60 |
+
for task_name, task_file, knowledge_file, parser in tasks:
|
61 |
+
specific_task = load_text(task_file)
|
62 |
+
knowledge = load_text(knowledge_file)
|
63 |
+
results[task_name] = process_task(llm, input_text, general_task, specific_task, knowledge, parser)
|
64 |
+
|
65 |
+
return results
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
@@ -6,14 +6,15 @@ langchain
|
|
6 |
langchain-community
|
7 |
faiss-gpu
|
8 |
bitsandbytes
|
9 |
-
seaborn
|
10 |
plotly
|
11 |
sentence-transformers
|
12 |
-
huggingface_hub
|
13 |
moviepy
|
14 |
pyannote.audio
|
15 |
librosa
|
16 |
-
|
17 |
-
opencv-python
|
18 |
numpy
|
19 |
-
accelerate
|
|
|
|
|
|
|
|
|
|
6 |
langchain-community
|
7 |
faiss-gpu
|
8 |
bitsandbytes
|
|
|
9 |
plotly
|
10 |
sentence-transformers
|
|
|
11 |
moviepy
|
12 |
pyannote.audio
|
13 |
librosa
|
14 |
+
soundfile
|
|
|
15 |
numpy
|
16 |
+
accelerate
|
17 |
+
|
18 |
+
|
19 |
+
|
20 |
+
|
transcription_diarization.py
CHANGED
@@ -6,54 +6,54 @@ from moviepy.editor import VideoFileClip
|
|
6 |
from pyannote.audio import Pipeline
|
7 |
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
|
8 |
import librosa
|
|
|
9 |
import datetime
|
10 |
from collections import defaultdict
|
11 |
import numpy as np
|
12 |
-
import spaces
|
13 |
|
14 |
class LazyDiarizationPipeline:
|
15 |
def __init__(self):
|
16 |
self.pipeline = None
|
|
|
17 |
|
18 |
-
|
19 |
-
def get_pipeline(self, diarization_access_token):
|
20 |
if self.pipeline is None:
|
21 |
-
self.pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1",
|
22 |
-
|
|
|
23 |
torch.cuda.empty_cache()
|
24 |
gc.collect()
|
25 |
return self.pipeline
|
26 |
|
|
|
27 |
class LazyTranscriptionPipeline:
|
28 |
def __init__(self):
|
29 |
self.model = None
|
30 |
self.processor = None
|
31 |
self.pipe = None
|
|
|
32 |
|
33 |
-
|
34 |
-
def get_pipeline(self, language):
|
35 |
if self.pipe is None:
|
36 |
model_id = "openai/whisper-large-v3"
|
|
|
37 |
self.model = AutoModelForSpeechSeq2Seq.from_pretrained(
|
38 |
-
model_id, torch_dtype=
|
39 |
)
|
40 |
-
self.model.to(
|
41 |
self.processor = AutoProcessor.from_pretrained(model_id)
|
42 |
self.pipe = pipeline(
|
43 |
"automatic-speech-recognition",
|
44 |
model=self.model,
|
45 |
tokenizer=self.processor.tokenizer,
|
46 |
feature_extractor=self.processor.feature_extractor,
|
47 |
-
max_new_tokens=128,
|
48 |
chunk_length_s=30,
|
49 |
-
batch_size=8,
|
50 |
return_timestamps=True,
|
51 |
-
|
52 |
-
device=torch.device("cuda"),
|
53 |
-
generate_kwargs={"language": language}
|
54 |
)
|
55 |
return self.pipe
|
56 |
|
|
|
57 |
lazy_diarization_pipeline = LazyDiarizationPipeline()
|
58 |
lazy_transcription_pipeline = LazyTranscriptionPipeline()
|
59 |
|
@@ -62,12 +62,13 @@ def extract_audio(video_path, audio_path):
|
|
62 |
audio = video.audio
|
63 |
audio.write_audiofile(audio_path, codec='pcm_s16le', fps=16000)
|
64 |
|
|
|
65 |
def format_timestamp(seconds):
|
66 |
return str(datetime.timedelta(seconds=seconds)).split('.')[0]
|
67 |
|
68 |
-
|
69 |
-
def transcribe_audio(audio_path, language
|
70 |
-
pipe = lazy_transcription_pipeline.get_pipeline(
|
71 |
|
72 |
audio, sr = librosa.load(audio_path, sr=16000)
|
73 |
duration = len(audio) / sr
|
@@ -81,22 +82,44 @@ def transcribe_audio(audio_path, language, progress=None):
|
|
81 |
audio_chunk = audio[start:end]
|
82 |
audio_chunk = (audio_chunk * 32767).astype(np.float32)
|
83 |
|
84 |
-
result = pipe(audio_chunk)
|
|
|
85 |
transcription_txt += result["text"]
|
86 |
for chunk in result["chunks"]:
|
87 |
start_time, end_time = chunk["timestamp"]
|
|
|
|
|
|
|
|
|
88 |
transcription_chunks.append({
|
89 |
"start": start_time + i * 30,
|
90 |
"end": end_time + i * 30,
|
91 |
"text": chunk["text"]
|
92 |
})
|
93 |
|
94 |
-
if progress:
|
95 |
-
progress(0.6 + 0.2 * (i + 1) / n_chunks, desc=f"Transcription Progress: {int(((i + 1) / n_chunks) * 100)}%")
|
96 |
-
|
97 |
return transcription_txt, transcription_chunks
|
98 |
|
99 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
speaker_segments = []
|
101 |
speaker_durations = defaultdict(float)
|
102 |
|
@@ -105,7 +128,7 @@ def create_combined_srt(transcription_chunks, diarization, output_path, max_spea
|
|
105 |
speaker_segments.append((segment.start, segment.end, speaker))
|
106 |
|
107 |
sorted_speakers = sorted(speaker_durations.items(), key=lambda x: x[1], reverse=True)[:max_speakers]
|
108 |
-
|
109 |
speaker_map = {}
|
110 |
for i, (speaker, _) in enumerate(sorted_speakers, start=1):
|
111 |
speaker_map[speaker] = f"Speaker {i}"
|
@@ -132,28 +155,20 @@ def create_combined_srt(transcription_chunks, diarization, output_path, max_spea
|
|
132 |
duration_str = format_timestamp(duration).split('.')[0].lstrip('0')
|
133 |
srt_file.write(f"Speaker {i} (originally {speaker}): total duration {duration_str}\n")
|
134 |
|
135 |
-
|
136 |
-
def process_video(video_path,
|
137 |
base_name = os.path.splitext(video_path)[0]
|
138 |
audio_path = f"{base_name}.wav"
|
139 |
extract_audio(video_path, audio_path)
|
140 |
|
141 |
-
|
142 |
-
|
143 |
-
pipeline = lazy_diarization_pipeline.get_pipeline(diarization_access_token)
|
144 |
-
diarization = pipeline(audio_path)
|
145 |
-
if progress:
|
146 |
-
progress(0.5, desc="Diarization complete.")
|
147 |
|
148 |
# Clear GPU memory after diarization
|
149 |
torch.cuda.empty_cache()
|
150 |
gc.collect()
|
151 |
|
152 |
-
|
153 |
-
progress(0.6, desc="Performing transcription...")
|
154 |
-
transcription, chunks = transcribe_audio(audio_path, language, progress)
|
155 |
-
if progress:
|
156 |
-
progress(0.8, desc="Transcription complete.")
|
157 |
|
158 |
# Clear GPU memory after transcription
|
159 |
torch.cuda.empty_cache()
|
@@ -161,8 +176,6 @@ def process_video(video_path, diarization_access_token, language, max_speakers=3
|
|
161 |
|
162 |
combined_srt_path = f"{base_name}_combined.srt"
|
163 |
create_combined_srt(chunks, diarization, combined_srt_path, max_speakers)
|
164 |
-
if progress:
|
165 |
-
progress(0.9, desc="Combined SRT file created.")
|
166 |
|
167 |
os.remove(audio_path)
|
168 |
|
@@ -170,7 +183,4 @@ def process_video(video_path, diarization_access_token, language, max_speakers=3
|
|
170 |
torch.cuda.empty_cache()
|
171 |
gc.collect()
|
172 |
|
173 |
-
|
174 |
-
progress(1.0, desc="Video processing complete.")
|
175 |
-
|
176 |
-
return combined_srt_path
|
|
|
6 |
from pyannote.audio import Pipeline
|
7 |
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
|
8 |
import librosa
|
9 |
+
import soundfile as sf
|
10 |
import datetime
|
11 |
from collections import defaultdict
|
12 |
import numpy as np
|
|
|
13 |
|
14 |
class LazyDiarizationPipeline:
|
15 |
def __init__(self):
|
16 |
self.pipeline = None
|
17 |
+
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
18 |
|
19 |
+
def get_pipeline(self, hf_token):
|
|
|
20 |
if self.pipeline is None:
|
21 |
+
self.pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1",
|
22 |
+
use_auth_token=hf_token)
|
23 |
+
self.pipeline = self.pipeline.to(self.device)
|
24 |
torch.cuda.empty_cache()
|
25 |
gc.collect()
|
26 |
return self.pipeline
|
27 |
|
28 |
+
|
29 |
class LazyTranscriptionPipeline:
|
30 |
def __init__(self):
|
31 |
self.model = None
|
32 |
self.processor = None
|
33 |
self.pipe = None
|
34 |
+
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
35 |
|
36 |
+
def get_pipeline(self):
|
|
|
37 |
if self.pipe is None:
|
38 |
model_id = "openai/whisper-large-v3"
|
39 |
+
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
|
40 |
self.model = AutoModelForSpeechSeq2Seq.from_pretrained(
|
41 |
+
model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
|
42 |
)
|
43 |
+
self.model.to(self.device)
|
44 |
self.processor = AutoProcessor.from_pretrained(model_id)
|
45 |
self.pipe = pipeline(
|
46 |
"automatic-speech-recognition",
|
47 |
model=self.model,
|
48 |
tokenizer=self.processor.tokenizer,
|
49 |
feature_extractor=self.processor.feature_extractor,
|
|
|
50 |
chunk_length_s=30,
|
|
|
51 |
return_timestamps=True,
|
52 |
+
device=self.device
|
|
|
|
|
53 |
)
|
54 |
return self.pipe
|
55 |
|
56 |
+
|
57 |
lazy_diarization_pipeline = LazyDiarizationPipeline()
|
58 |
lazy_transcription_pipeline = LazyTranscriptionPipeline()
|
59 |
|
|
|
62 |
audio = video.audio
|
63 |
audio.write_audiofile(audio_path, codec='pcm_s16le', fps=16000)
|
64 |
|
65 |
+
|
66 |
def format_timestamp(seconds):
|
67 |
return str(datetime.timedelta(seconds=seconds)).split('.')[0]
|
68 |
|
69 |
+
|
70 |
+
def transcribe_audio(audio_path, language):
|
71 |
+
pipe = lazy_transcription_pipeline.get_pipeline()
|
72 |
|
73 |
audio, sr = librosa.load(audio_path, sr=16000)
|
74 |
duration = len(audio) / sr
|
|
|
82 |
audio_chunk = audio[start:end]
|
83 |
audio_chunk = (audio_chunk * 32767).astype(np.float32)
|
84 |
|
85 |
+
result = pipe(audio_chunk, generate_kwargs={"language": language, "task": "transcribe"})
|
86 |
+
|
87 |
transcription_txt += result["text"]
|
88 |
for chunk in result["chunks"]:
|
89 |
start_time, end_time = chunk["timestamp"]
|
90 |
+
if start_time is None:
|
91 |
+
start_time = 0
|
92 |
+
if end_time is None:
|
93 |
+
end_time = 0
|
94 |
transcription_chunks.append({
|
95 |
"start": start_time + i * 30,
|
96 |
"end": end_time + i * 30,
|
97 |
"text": chunk["text"]
|
98 |
})
|
99 |
|
|
|
|
|
|
|
100 |
return transcription_txt, transcription_chunks
|
101 |
|
102 |
+
|
103 |
+
def diarize_audio(audio_path, pipeline, max_speakers):
|
104 |
+
# Load the entire audio file
|
105 |
+
audio, sr = librosa.load(audio_path, sr=16000)
|
106 |
+
|
107 |
+
# Write the audio to a temporary file if needed for the pipeline
|
108 |
+
temp_audio_path = f"{audio_path}_temp.wav"
|
109 |
+
sf.write(temp_audio_path, audio, sr)
|
110 |
+
|
111 |
+
# Perform speaker diarization on the entire audio file
|
112 |
+
diarization = pipeline(temp_audio_path, num_speakers=max_speakers)
|
113 |
+
|
114 |
+
# Clean up the temporary file
|
115 |
+
os.remove(temp_audio_path)
|
116 |
+
torch.cuda.empty_cache()
|
117 |
+
gc.collect()
|
118 |
+
|
119 |
+
return diarization
|
120 |
+
|
121 |
+
|
122 |
+
def create_combined_srt(transcription_chunks, diarization, output_path, max_speakers):
|
123 |
speaker_segments = []
|
124 |
speaker_durations = defaultdict(float)
|
125 |
|
|
|
128 |
speaker_segments.append((segment.start, segment.end, speaker))
|
129 |
|
130 |
sorted_speakers = sorted(speaker_durations.items(), key=lambda x: x[1], reverse=True)[:max_speakers]
|
131 |
+
|
132 |
speaker_map = {}
|
133 |
for i, (speaker, _) in enumerate(sorted_speakers, start=1):
|
134 |
speaker_map[speaker] = f"Speaker {i}"
|
|
|
155 |
duration_str = format_timestamp(duration).split('.')[0].lstrip('0')
|
156 |
srt_file.write(f"Speaker {i} (originally {speaker}): total duration {duration_str}\n")
|
157 |
|
158 |
+
|
159 |
+
def process_video(video_path, hf_token, language, max_speakers=3):
|
160 |
base_name = os.path.splitext(video_path)[0]
|
161 |
audio_path = f"{base_name}.wav"
|
162 |
extract_audio(video_path, audio_path)
|
163 |
|
164 |
+
pipeline = lazy_diarization_pipeline.get_pipeline(hf_token)
|
165 |
+
diarization = diarize_audio(audio_path, pipeline, max_speakers)
|
|
|
|
|
|
|
|
|
166 |
|
167 |
# Clear GPU memory after diarization
|
168 |
torch.cuda.empty_cache()
|
169 |
gc.collect()
|
170 |
|
171 |
+
transcription, chunks = transcribe_audio(audio_path, language)
|
|
|
|
|
|
|
|
|
172 |
|
173 |
# Clear GPU memory after transcription
|
174 |
torch.cuda.empty_cache()
|
|
|
176 |
|
177 |
combined_srt_path = f"{base_name}_combined.srt"
|
178 |
create_combined_srt(chunks, diarization, combined_srt_path, max_speakers)
|
|
|
|
|
179 |
|
180 |
os.remove(audio_path)
|
181 |
|
|
|
183 |
torch.cuda.empty_cache()
|
184 |
gc.collect()
|
185 |
|
186 |
+
return combined_srt_path
|
|
|
|
|
|
visualization.py
CHANGED
@@ -1,99 +1,70 @@
|
|
1 |
-
import plotly.
|
2 |
-
|
3 |
|
4 |
-
def
|
5 |
-
|
6 |
-
|
7 |
-
x=list(data.keys()),
|
8 |
-
y=list(data.values()),
|
9 |
-
marker_color=colors[:len(data)]
|
10 |
-
)])
|
11 |
-
fig.update_layout(title=title)
|
12 |
-
return fig
|
13 |
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
fig = go.Figure(data=go.Scatterpolar(
|
19 |
-
r=ordered_values,
|
20 |
-
theta=ordered_keys,
|
21 |
-
fill='toself'
|
22 |
-
))
|
23 |
-
fig.update_layout(
|
24 |
-
polar=dict(radialaxis=dict(visible=True, range=[0, max(ordered_values, default=1)])),
|
25 |
-
showlegend=False,
|
26 |
-
title=title
|
27 |
-
)
|
28 |
-
return fig
|
29 |
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
#
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
# Hide unused speaker components
|
90 |
-
for _ in range(3 - len(sorted_speakers)):
|
91 |
-
outputs.extend([gr.update(visible=False)] * 7) # 7 components per speaker
|
92 |
-
|
93 |
-
print("Debug: Attachments Data:", attachments_data)
|
94 |
-
print("Debug: Big Five Data:", bigfive_data)
|
95 |
-
print("Debug: Personalities Data:", personalities_data)
|
96 |
-
print("Debug: Chart Data:", chart_data)
|
97 |
-
print("Debug: Sorted Speakers:", sorted_speakers)
|
98 |
-
|
99 |
-
return outputs
|
|
|
1 |
+
import plotly.graph_objects as go
|
2 |
+
from plotly.subplots import make_subplots
|
3 |
|
4 |
+
def create_charts(results):
|
5 |
+
charts = {}
|
6 |
+
explanations = {}
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
+
# Loop through each detected speaker in the results
|
9 |
+
for speaker_id, speaker_data in results['speakers'].items():
|
10 |
+
speaker_charts = {}
|
11 |
+
speaker_explanations = {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
+
# Attachment Styles for each speaker
|
14 |
+
attachment_data = speaker_data['attachments']
|
15 |
+
fig_attachment = go.Figure(go.Bar(
|
16 |
+
x=['Secured', 'Anxious-Preoccupied', 'Dismissive-Avoidant', 'Fearful-Avoidant'],
|
17 |
+
y=[attachment_data.secured, attachment_data.anxious_preoccupied,
|
18 |
+
attachment_data.dismissive_avoidant, attachment_data.fearful_avoidant],
|
19 |
+
marker_color=['blue', 'orange', 'green', 'red']
|
20 |
+
))
|
21 |
+
fig_attachment.update_layout(title_text=f"Attachment Styles - Speaker {speaker_id}", showlegend=False)
|
22 |
+
speaker_charts["attachment"] = fig_attachment
|
23 |
+
speaker_explanations["attachment"] = attachment_data.explanation
|
24 |
+
|
25 |
+
# Attachment Dimensions (Radar Chart) for each speaker
|
26 |
+
fig_dimensions = go.Figure(go.Scatterpolar(
|
27 |
+
r=[attachment_data.avoidance, attachment_data.anxiety, attachment_data.self_rating, attachment_data.others_rating],
|
28 |
+
theta=['Avoidance', 'Anxiety', 'Self', 'Others'],
|
29 |
+
fill='toself'
|
30 |
+
))
|
31 |
+
fig_dimensions.update_layout(title_text=f"Attachment Dimensions - Speaker {speaker_id}", showlegend=False)
|
32 |
+
speaker_charts["dimensions"] = fig_dimensions
|
33 |
+
|
34 |
+
# Big Five Traits for each speaker
|
35 |
+
bigfive_data = speaker_data['bigfive']
|
36 |
+
fig_bigfive = go.Figure(go.Bar(
|
37 |
+
x=['Extraversion', 'Agreeableness', 'Conscientiousness', 'Neuroticism', 'Openness'],
|
38 |
+
y=[bigfive_data.extraversion, bigfive_data.agreeableness,
|
39 |
+
bigfive_data.conscientiousness, bigfive_data.neuroticism, bigfive_data.openness],
|
40 |
+
marker_color=['blue', 'green', 'red', 'purple', 'orange']
|
41 |
+
))
|
42 |
+
fig_bigfive.update_layout(title_text=f"Big Five Traits - Speaker {speaker_id}", showlegend=False)
|
43 |
+
speaker_charts["bigfive"] = fig_bigfive
|
44 |
+
speaker_explanations["bigfive"] = bigfive_data.explanation
|
45 |
+
|
46 |
+
# Personality Disorders for each speaker
|
47 |
+
personality_data = speaker_data['personalities']
|
48 |
+
fig_personality = go.Figure(go.Bar(
|
49 |
+
x=['Antisocial', 'Narcissistic', 'Depressed', 'Anxious-Avoidant',
|
50 |
+
'Obsessive', 'Paranoid', 'Borderline', 'Dependent', 'Schizoid-Schizotypal'],
|
51 |
+
y=[personality_data.antisocial_psychopathic, personality_data.narcissistic,
|
52 |
+
personality_data.depressed, personality_data.anxious_avoidant,
|
53 |
+
personality_data.obsessional, personality_data.paranoid,
|
54 |
+
personality_data.borderline_dysregulated, personality_data.dependent_victimized,
|
55 |
+
personality_data.schizoid_schizotypal],
|
56 |
+
marker_color=['black', 'orange', 'gray', 'green', 'brown', 'purple', 'red', 'cyan', 'magenta']
|
57 |
+
))
|
58 |
+
fig_personality.update_layout(title_text=f"Personality Disorders - Speaker {speaker_id}", showlegend=False)
|
59 |
+
speaker_charts["personality"] = fig_personality
|
60 |
+
speaker_explanations["personality"] = personality_data.explanation
|
61 |
+
|
62 |
+
# Update all charts to take full width
|
63 |
+
for fig in speaker_charts.values():
|
64 |
+
fig.update_layout(height=400, width=None, margin=dict(l=50, r=50, t=100, b=50))
|
65 |
+
|
66 |
+
# Store the charts and explanations for each speaker
|
67 |
+
charts[speaker_id] = speaker_charts
|
68 |
+
explanations[speaker_id] = speaker_explanations
|
69 |
+
|
70 |
+
return charts, explanations
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|