DevBhojani commited on
Commit
9cd15ff
·
verified ·
1 Parent(s): 1d9caea

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -182
app.py DELETED
@@ -1,182 +0,0 @@
1
- !pip install transformers joblib contractions gradio re huggingface_hub scikit-learn
2
-
3
- import gradio as gr
4
- from transformers import pipeline, AutoTokenizer
5
- import re
6
- import contractions
7
- import joblib
8
- from sklearn.feature_extraction.text import TfidfVectorizer
9
- from huggingface_hub import hf_hub_download
10
-
11
- repo_id = "DevBhojani/Classification-SamsumDataset"
12
- model_filename = "random_forest_classifier_model.joblib"
13
-
14
- model_path = hf_hub_download(repo_id=repo_id, filename=model_filename)
15
- loaded_classifier_model = joblib.load(model_path)
16
-
17
-
18
- vectorizer_filename = "tfidf_vectorizer.joblib"
19
-
20
- vectorizer_path = hf_hub_download(repo_id=repo_id, filename=vectorizer_filename)
21
- loaded_tfidf_vectorizer = joblib.load(vectorizer_path)
22
-
23
- def remove_html_tags(text):
24
- pattern = r'[^a-zA-Z0-9\s]'
25
- text = re.sub(pattern, '', str(text))
26
- return text
27
-
28
- def remove_url(text):
29
- pattern = re.compile(r'https?://\S+|www\.\S+')
30
- return pattern.sub(r'', str(text))
31
-
32
- def remove_emojis(text):
33
- emoji_pattern = re.compile(
34
- "["
35
- u"\U0001F600-\U0001F64F" # emoticons
36
- u"\U0001F300-\U0001F5FF" # symbols & pictographs
37
- u"\U0001F680-\U0001F6FF" # transport & map symbols
38
- u"\U0001F1E0-\U0001F1FF" # flags
39
- u"\U00002700-\U000027BF" # miscellaneous symbols
40
- u"\U0001F900-\U0001F9FF" # supplemental symbols
41
- u"\U00002600-\U000026FF" # weather & other symbols
42
- u"\U0001FA70-\U0001FAFF" # extended symbols
43
- "]+",
44
- flags=re.UNICODE
45
- )
46
- return emoji_pattern.sub(r'', str(text))
47
-
48
- def expand_contractions(text):
49
- return contractions.fix(text)
50
-
51
- def remove_special_and_numbers(text):
52
- return re.sub(r'[^a-zA-Z\s]', '', str(text))
53
-
54
- def clean_text(text):
55
- text = remove_url(text)
56
- text = remove_emojis(text)
57
- text = expand_contractions(text)
58
- text = text.lower()
59
- return text
60
-
61
- summarizer = pipeline("summarization", model="luisotorres/bart-finetuned-samsum")
62
- # summarizer2 = pipeline("summarization", model="knkarthick/MEETING_SUMMARY")
63
- tokenizer = AutoTokenizer.from_pretrained("luisotorres/bart-finetuned-samsum")
64
-
65
- def split_into_chunks(conversation, n=15):
66
- lines = conversation.strip().split('\n')
67
- chunk_size = max(1, len(lines) // n)
68
- return ['\n'.join(lines[i:i+chunk_size]) for i in range(0, len(lines), chunk_size)]
69
-
70
- def truncate_chunk(text, max_tokens=1024):
71
- tokens = tokenizer.encode(text, truncation=True, max_length=max_tokens)
72
- return tokenizer.decode(tokens, skip_special_tokens=True)
73
-
74
- def summarize_chunks(chunks, model):
75
- summaries = []
76
- for chunk in chunks:
77
- chunk = chunk.strip()
78
- if not chunk:
79
- continue
80
- try:
81
- truncated_chunk = truncate_chunk(chunk)
82
- summary = model(truncated_chunk, max_length=1024, min_length=20, do_sample=False)[0]['summary_text']
83
- summaries.append(summary)
84
- except Exception as e:
85
- print(f"Error summarizing chunk: {e}")
86
- return summaries
87
-
88
- def combine_summaries(summaries):
89
- return ' '.join(summaries)
90
-
91
- def summarize_dialogue(conversation, model):
92
- chunks = split_into_chunks(conversation, n=1)
93
- summaries = summarize_chunks(chunks, model)
94
- final_summary = combine_summaries(summaries)
95
- return final_summary
96
-
97
- def analyze_meeting_transcript(user_input):
98
- if not user_input.strip():
99
- return "Please enter some text to summarize.", ""
100
-
101
- cleaned_input = clean_text(user_input)
102
- summary1 = summarize_dialogue(cleaned_input, summarizer)
103
-
104
- # Use the loaded vectorizer to transform the input
105
- cleaned_input_vectorized = loaded_tfidf_vectorizer.transform([cleaned_input])
106
-
107
- intent_classification = loaded_classifier_model.predict(cleaned_input_vectorized)[0]
108
- # print(intent_classification)
109
- # print(cleaned_input_vectorized)
110
- # intent_classification = "Transactional Inquiry & Information Exchange"
111
-
112
- # Format the intent classification output
113
- formatted_intent = intent_classification.replace("__label__", "").replace("_", " ")
114
-
115
-
116
- return summary1, formatted_intent
117
-
118
- interface = gr.Interface(
119
- fn=analyze_meeting_transcript,
120
- inputs=gr.Textbox(label="Enter dialogue here", lines=12, placeholder="Paste your meeting transcript..."),
121
- outputs=[
122
- gr.Textbox(label="Summary (Luis Torres BART)"),
123
- # gr.Textbox(label="Summary 2 (KN Karthick MEETING_SUMMARY)"),
124
- gr.Textbox(label="Intent Classification") # Removed "Placeholder"
125
- ],
126
- title="Meeting Transcript Analyzer",
127
- description="Summarizes meeting dialogues and classifies the intent.",
128
- allow_flagging="never",
129
- examples=[
130
- [
131
- '''
132
- Amanda: guess what!
133
- Chris: hey ;) ur pregnant!
134
- Amanda: noo ;) but close enough! I'm so proud of myself! Remember I go to these dancing classes with Michael?
135
- Chris: Yeah?
136
- Amanda: So we went yesterday and the instructor needed a partner to show the steps we had so far
137
- Chris: so there's only one guy teaching you? without a female partner?
138
- Amanda: Well, this time he was alone, BUT THAT'S NOT THE POINT! Listen!
139
- Chris: yeah, sorry :D tell me!
140
- Amanda: So he needed a partner and noone really knew the steps like perfectly
141
- Amanda: and obviously noone wanted to be mocked
142
- Amanda: so I thought, aaaah :D
143
- Chris: u volunteered? really? you??
144
- Amanda: yeah!
145
- Chris: whooa! that's so great! #therapy #worthit :D
146
- Amanda: yeah i know :D maybe one day i'll actually stop being so shy
147
- Chris: that's definitely the first step! :D congrats!
148
- Amanda: tx ^_^
149
- Chris: what dance was it?
150
- Amanda: English waltz
151
- Chris: isn't it, like, SO difficult?
152
- Amanda: yeah it is! but everyone said I looked like a pro :D
153
- Chris: Well done!!
154
- '''
155
- ],
156
- ["I have some exciting news to share!"],
157
- [
158
- '''
159
- Beryl: Hello guys! How are you doing? We've lost contact for a few months now. Hope you are well.
160
- Anton: A happy hello to you Beryl! Great to hear from you. We are fine, thanks. And yourself?
161
- Beryl: I'm very well indeed. Thank you. Any changes in your setup?
162
- Anton: Not really. SOS. Same Old Soup ;) But we are happy for that.
163
- Beryl: Are you still running your lovely airbnb?
164
- Anton: Oh yes, we are. We had a few months off during summer, our summer, but now bookings start flowing in. Well... Are you planning to visit us? You two are always welcome!
165
- Beryl: You caught me here. I'm vaguely considering going down to Onrus again, most likely in January. What does it look like with vacancies then?
166
- Anton: Perfect! Just give me your dates and I'll keep it booked for you.
167
- Beryl: Would you prefer me to do it via airbnb website or just like this directly with you?
168
- Anton: I think it'll be more advantageous for both of us to do it directly. Do you know exactly when you'll be coming?
169
- Beryl: Not so much. Can I get back to you in 2, 3 days' time?
170
- Anton: ASAP really. As I say we've been receiving bookings daily now.
171
- Beryl: Well, no big deal. I'll be staying in Cape Town for a longer time and am quite flexible in my dates.
172
- Anton: Will you be coming with Tino, if I may ask?
173
- Beryl: No. I am single again. Hurray! So pls make it single occupancy any week in January, Anton.
174
- Anton: Great! 4th till 12th?
175
- Beryl: Very good. I'll call you beforehand from Cape Town. Greetings to you both!
176
- Anton: Take care!'''
177
- ],
178
- ]
179
- )
180
-
181
- if __name__ == "__main__":
182
- interface.launch(debug=True, share=True)