hatimanees commited on
Commit
6bf224b
·
verified ·
1 Parent(s): 079571a

Create run.py

Browse files
Files changed (1) hide show
  1. run.py +186 -0
run.py ADDED
@@ -0,0 +1,186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify
2
+ from flask_cors import CORS
3
+ import os
4
+ from transformers import pipeline
5
+ import numpy as np
6
+ import torch
7
+ import re
8
+ from werkzeug.utils import secure_filename
9
+ import uuid
10
+ import platform
11
+
12
+ # Set Transformers Cache Directory
13
+ if platform.system() == "Windows":
14
+ print("Windows detected. Assigning cache directory to Transformers in AppData\\Local.")
15
+ transformers_cache_directory = os.path.join(os.getenv('LOCALAPPDATA'), 'transformers_cache')
16
+ else:
17
+ print("Non-Windows system detected. Assigning cache directory to /tmp/transformers_cache.")
18
+ transformers_cache_directory = '/tmp/transformers_cache'
19
+
20
+ # Ensure the directory exists
21
+ if not os.path.exists(transformers_cache_directory):
22
+ try:
23
+ os.makedirs(transformers_cache_directory, exist_ok=True)
24
+ print(f"Directory '{transformers_cache_directory}' created successfully.")
25
+ except OSError as e:
26
+ print(f"Error creating directory '{transformers_cache_directory}': {e}")
27
+ else:
28
+ print(f"Directory '{transformers_cache_directory}' already exists.")
29
+
30
+ # Set the TRANSFORMERS_CACHE environment variable
31
+ os.environ['TRANSFORMERS_CACHE'] = transformers_cache_directory
32
+ print(f"Environment variable TRANSFORMERS_CACHE set to '{transformers_cache_directory}'.")
33
+
34
+
35
+ class Config:
36
+ UPLOAD_FOLDER = os.path.join(os.path.dirname(__file__), '/tmp/uploads') # Correct path
37
+ MAX_CONTENT_LENGTH = 16 * 1024 * 1024 # 16MB max file size
38
+ CORS_HEADERS = 'Content-Type'
39
+
40
+
41
+
42
+ class DialogueSentimentAnalyzer:
43
+ def __init__(self, model_name: str = "microsoft/DialogRPT-updown"):
44
+ self.device = 0 if torch.cuda.is_available() else -1
45
+ self.dialogue_model = pipeline(
46
+ 'text-classification',
47
+ model="microsoft/DialogRPT-updown",
48
+ device=self.device
49
+ )
50
+ self.sentiment_model = pipeline(
51
+ 'sentiment-analysis',
52
+ model="distilbert-base-uncased-finetuned-sst-2-english",
53
+ device=self.device
54
+ )
55
+ self.max_length = 512
56
+
57
+ def parse_dialogue(self, text: str):
58
+ lines = text.strip().split('\n')
59
+ dialogue = []
60
+ current_speaker = None
61
+ current_text = []
62
+
63
+ for line in lines:
64
+ line = line.strip()
65
+ if not line:
66
+ continue
67
+
68
+ speaker_match = re.match(r'^([^:]+):', line)
69
+ if speaker_match:
70
+ if current_speaker and current_text:
71
+ dialogue.append({'speaker': current_speaker, 'text': ' '.join(current_text)})
72
+ current_speaker = speaker_match.group(1)
73
+ current_text = [line[len(current_speaker) + 1:].strip()]
74
+ else:
75
+ if current_speaker:
76
+ current_text.append(line.strip())
77
+
78
+ if current_speaker and current_text:
79
+ dialogue.append({'speaker': current_speaker, 'text': ' '.join(current_text)})
80
+
81
+ return dialogue
82
+
83
+ def analyze_utterance(self, utterance):
84
+ text = utterance['text']
85
+ dialogue_score = self.dialogue_model(text)[0]
86
+ sentiment = self.sentiment_model(text)[0]
87
+ positive_phrases = ['thank you', 'thanks', 'appreciate', 'great', 'perfect', 'looking forward', 'flexible', 'competitive']
88
+ negative_phrases = ['concerned', 'worry', 'issue', 'problem', 'difficult', 'unfortunately', 'sorry']
89
+ text_lower = text.lower()
90
+ positive_count = sum(1 for phrase in positive_phrases if phrase in text_lower)
91
+ negative_count = sum(1 for phrase in negative_phrases if phrase in text_lower)
92
+ sentiment_score = float(sentiment['score'])
93
+ if sentiment['label'] == 'NEGATIVE':
94
+ sentiment_score = 1 - sentiment_score
95
+ final_score = sentiment_score
96
+ if positive_count > negative_count:
97
+ final_score = min(1.0, final_score + 0.1 * (positive_count - negative_count))
98
+ elif negative_count > positive_count:
99
+ final_score = max(0.0, final_score - 0.1 * (negative_count - positive_count))
100
+
101
+ return {
102
+ 'speaker': utterance['speaker'],
103
+ 'text': text,
104
+ 'sentiment_score': final_score,
105
+ 'engagement_score': float(dialogue_score['score']),
106
+ 'positive_phrases': positive_count,
107
+ 'negative_phrases': negative_count
108
+ }
109
+
110
+ def analyze_dialogue(self, text: str):
111
+ dialogue = self.parse_dialogue(text)
112
+ utterance_results = [self.analyze_utterance(utterance) for utterance in dialogue]
113
+ overall_sentiment = np.mean([r['sentiment_score'] for r in utterance_results])
114
+ overall_engagement = np.mean([r['engagement_score'] for r in utterance_results])
115
+ sentiment_variance = np.std([r['sentiment_score'] for r in utterance_results])
116
+ confidence = max(0.0, 1.0 - sentiment_variance)
117
+ speaker_sentiments = {}
118
+ for result in utterance_results:
119
+ if result['speaker'] not in speaker_sentiments:
120
+ speaker_sentiments[result['speaker']] = []
121
+ speaker_sentiments[result['speaker']].append(result['sentiment_score'])
122
+ speaker_averages = {speaker: np.mean(scores) for speaker, scores in speaker_sentiments.items()}
123
+ return [{'label': 'Overall Sentiment', 'score': float(overall_sentiment)},
124
+ {'label': 'Confidence', 'score': float(confidence)},
125
+ {'label': 'Engagement', 'score': float(overall_engagement)}] + [
126
+ {'label': f'{speaker} Sentiment', 'score': float(score)} for speaker, score in speaker_averages.items()
127
+ ]
128
+
129
+
130
+ def save_uploaded_file(content, upload_folder):
131
+ filename = f"{uuid.uuid4().hex}.txt"
132
+ file_path = os.path.join(upload_folder, secure_filename(filename))
133
+ with open(file_path, 'w', encoding='utf-8') as f:
134
+ f.write(content)
135
+ return file_path
136
+
137
+
138
+ def analyze_sentiment(file_path: str):
139
+ try:
140
+ analyzer = DialogueSentimentAnalyzer()
141
+ with open(file_path, 'r', encoding='utf-8') as f:
142
+ text = f.read()
143
+ return analyzer.analyze_dialogue(text)
144
+ except Exception as e:
145
+ print(f"Error in sentiment analysis: {str(e)}")
146
+ return [{'label': 'Error', 'score': 0.5}]
147
+
148
+
149
+
150
+ def create_app():
151
+ app = Flask(__name__)
152
+ app.config.from_object(Config)
153
+
154
+ # Ensure the uploads directory exists
155
+ os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
156
+
157
+ @app.route('/upload', methods=['POST'])
158
+ def upload_transcript():
159
+ try:
160
+ transcript = request.form.get('transcript')
161
+ if not transcript:
162
+ return jsonify({'error': 'No transcript received'}), 400
163
+
164
+ # Save the transcript in the current folder
165
+ file_path = os.path.join(os.getcwd(), 'transcript.txt')
166
+ with open(file_path, 'w') as file:
167
+ file.write(transcript)
168
+
169
+ # Analyze sentiment
170
+ sentiment_result = analyze_sentiment(file_path)
171
+
172
+ # Remove the temporary file
173
+ os.remove(file_path)
174
+
175
+ return jsonify({'sentiment': sentiment_result}), 200
176
+ except Exception as e:
177
+ return jsonify({'error': str(e)}), 500
178
+
179
+ return app
180
+
181
+
182
+
183
+
184
+ if __name__ == '__main__':
185
+ app = create_app()
186
+ app.run(host="0.0.0.0", port=5000)