File size: 7,556 Bytes
2be8732
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fec227f
2be8732
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
from huggingface_hub import hf_hub_download
import joblib

repo_id = "DevBhojani/Classification-SamsumDataset"
model_filename = "random_forest_classifier_model.joblib"

model_path = hf_hub_download(repo_id=repo_id, filename=model_filename)
loaded_classifier_model = joblib.load(model_path)

import joblib
from sklearn.feature_extraction.text import TfidfVectorizer

repo_id = "DevBhojani/Classification-SamsumDataset"
model_filename = "random_forest_classifier_model.joblib"
vectorizer_filename = "tfidf_vectorizer.joblib"

model_path = hf_hub_download(repo_id=repo_id, filename=model_filename)
vectorizer_path = hf_hub_download(repo_id=repo_id, filename=vectorizer_filename)

loaded_classifier_model = joblib.load(model_path)
loaded_tfidf_vectorizer = joblib.load(vectorizer_path)

import gradio as gr
from transformers import pipeline, AutoTokenizer
import re
import contractions
# Assuming loaded_classifier_model and loaded_tfidf_vectorizer are already loaded from the previous cell

def remove_html_tags(text):
    pattern = r'[^a-zA-Z0-9\s]'
    text = re.sub(pattern, '', str(text))
    return text

def remove_url(text):
    pattern = re.compile(r'https?://\S+|www\.\S+')
    return pattern.sub(r'', str(text))

def remove_emojis(text):
    emoji_pattern = re.compile(
        "["
        u"\U0001F600-\U0001F64F"  # emoticons
        u"\U0001F300-\U0001F5FF"  # symbols & pictographs
        u"\U0001F680-\U0001F6FF"  # transport & map symbols
        u"\U0001F1E0-\U0001F1FF"  # flags
        u"\U00002700-\U000027BF"  # miscellaneous symbols
        u"\U0001F900-\U0001F9FF"  # supplemental symbols
        u"\U00002600-\U000026FF"  # weather & other symbols
        u"\U0001FA70-\U0001FAFF"  # extended symbols
        "]+",
        flags=re.UNICODE
    )
    return emoji_pattern.sub(r'', str(text))

def expand_contractions(text):
    return contractions.fix(text)

def remove_special_and_numbers(text):
    return re.sub(r'[^a-zA-Z\s]', '', str(text))

def clean_text(text):
    text = remove_url(text)
    text = remove_emojis(text)
    text = expand_contractions(text)
    text = text.lower()
    return text

summarizer = pipeline("summarization", model="luisotorres/bart-finetuned-samsum")
# summarizer2 = pipeline("summarization", model="knkarthick/MEETING_SUMMARY")
tokenizer = AutoTokenizer.from_pretrained("luisotorres/bart-finetuned-samsum")

def split_into_chunks(conversation, n=15):
    lines = conversation.strip().split('\n')
    chunk_size = max(1, len(lines) // n)
    return ['\n'.join(lines[i:i+chunk_size]) for i in range(0, len(lines), chunk_size)]

def truncate_chunk(text, max_tokens=1024):
    tokens = tokenizer.encode(text, truncation=True, max_length=max_tokens)
    return tokenizer.decode(tokens, skip_special_tokens=True)

def summarize_chunks(chunks, model):
    summaries = []
    for chunk in chunks:
        chunk = chunk.strip()
        if not chunk:
            continue
        try:
            truncated_chunk = truncate_chunk(chunk)
            summary = model(truncated_chunk, max_length=1024, min_length=20, do_sample=False)[0]['summary_text']
            summaries.append(summary)
        except Exception as e:
            print(f"Error summarizing chunk: {e}")
    return summaries

def combine_summaries(summaries):
    return ' '.join(summaries)

def summarize_dialogue(conversation, model):
    chunks = split_into_chunks(conversation, n=1)
    summaries = summarize_chunks(chunks, model)
    final_summary = combine_summaries(summaries)
    return final_summary

def analyze_meeting_transcript(user_input):
    if not user_input.strip():
        return "Please enter some text to summarize.", ""

    cleaned_input = clean_text(user_input)
    summary1 = summarize_dialogue(cleaned_input, summarizer)

    # Use the loaded vectorizer to transform the input
    cleaned_input_vectorized = loaded_tfidf_vectorizer.transform([cleaned_input])

    intent_classification = loaded_classifier_model.predict(cleaned_input_vectorized)[0]
    # print(intent_classification)
    # print(cleaned_input_vectorized)
    # intent_classification = "Transactional Inquiry & Information Exchange"

    # Format the intent classification output
    formatted_intent = intent_classification.replace("__label__", "").replace("_", " ")


    return summary1, formatted_intent

interface = gr.Interface(
    fn=analyze_meeting_transcript,
    inputs=gr.Textbox(label="Enter dialogue here", lines=12, placeholder="Paste your meeting transcript..."),
    outputs=[
        gr.Textbox(label="Summary (Luis Torres BART)"),
        # gr.Textbox(label="Summary 2 (KN Karthick MEETING_SUMMARY)"),
        gr.Textbox(label="Intent Classification") # Removed "Placeholder"
    ],
    title="Meeting Transcript Analyzer",
    description="Summarizes meeting dialogues and classifies the intent.",
    allow_flagging="never",
    examples=[
        [
            '''
Amanda: guess what!
Chris: hey ;) ur pregnant!
Amanda: I'm so proud of myself! Remember I go to these dancing classes with Michael?
Chris: Yeah?
Amanda: So we went yesterday and the instructor needed a partner to show the steps we had so far
Chris: so there's only one guy teaching you? without a female partner?
Amanda: Well, this time he was alone, BUT THAT'S NOT THE POINT! Listen!
Chris: yeah, sorry :D tell me!
Amanda: So he needed a partner and noone really knew the steps like perfectly
Amanda: and obviously noone wanted to be mocked
Amanda: so I thought, aaaah :D
Chris: u volunteered? really? you??
Amanda: yeah!
Chris: whooa! that's so great! #therapy #worthit :D
Amanda: yeah i know :D maybe one day i'll actually stop being so shy
Chris: that's definitely the first step! :D congrats!
Amanda: tx ^_^
Chris: what dance was it?
Amanda: English waltz
Chris: isn't it, like, SO difficult?
Amanda: yeah it is! but everyone said I looked like a pro :D
Chris: Well done!!
'''
        ],
        ["I have some exciting news to share!"],
        [
            '''
Beryl: Hello guys! How are you doing? We've lost contact for a few months now. Hope you are well.
Anton: A happy hello to you Beryl! Great to hear from you. We are fine, thanks. And yourself?
Beryl: I'm very well indeed. Thank you. Any changes in your setup?
Anton: Not really. SOS. Same Old Soup ;) But we are happy for that.
Beryl: Are you still running your lovely airbnb?
Anton: Oh yes, we are. We had a few months off during summer, our summer, but now bookings start flowing in. Well... Are you planning to visit us? You two are always welcome!
Beryl: You caught me here. I'm vaguely considering going down to Onrus again, most likely in January. What does it look like with vacancies then?
Anton: Perfect! Just give me your dates and I'll keep it booked for you.
Beryl: Would you prefer me to do it via airbnb website or just like this directly with you?
Anton: I think it'll be more advantageous for both of us to do it directly. Do you know exactly when you'll be coming?
Beryl: Not so much. Can I get back to you in 2, 3 days' time?
Anton: ASAP really. As I say we've been receiving bookings daily now.
Beryl: Well, no big deal. I'll be staying in Cape Town for a longer time and am quite flexible in my dates.
Anton: Will you be coming with Tino, if I may ask?
Beryl: No. I am single again. Hurray! So pls make it single occupancy any week in January, Anton.
Anton: Great! 4th till 12th?
Beryl: Very good. I'll call you beforehand from Cape Town. Greetings to you both!
Anton: Take care!'''
        ],
    ]
)

if __name__ == "__main__":
    interface.launch(debug=True, share=True)