File size: 9,771 Bytes
ef7a878
32f88c0
b22a6ec
351252d
 
 
 
 
 
 
 
 
 
 
b22a6ec
dbca570
 
2f03bd6
d4b107b
cf8326e
85002a1
ad6d7c2
 
85002a1
 
dbca570
2f03bd6
ad6d7c2
dbca570
ad6d7c2
 
cf8326e
8c6ad91
ad6d7c2
 
cf8326e
dbca570
cf8326e
1137662
8c6ad91
 
 
ad6d7c2
 
dbca570
d353554
85002a1
 
 
 
ad6d7c2
85002a1
ad6d7c2
 
 
 
d353554
071df52
 
 
 
 
85002a1
 
 
 
 
 
 
 
 
b22a6ec
2f03bd6
071df52
 
85002a1
b98f4ad
7ef26c1
8ab6c0e
85002a1
b992645
32f88c0
ad6d7c2
32f88c0
 
85002a1
 
 
 
 
 
071df52
 
 
 
 
85002a1
 
378420c
9e722fb
8c6ad91
 
 
9e722fb
 
7ef26c1
071df52
b992645
 
9e722fb
b992645
 
7ef26c1
b992645
 
 
8c6ad91
badcd8d
7ef26c1
55eafca
9e722fb
8c6ad91
9e722fb
 
8c6ad91
ef7a878
7ef26c1
9e722fb
 
 
 
 
 
 
 
 
cf8326e
9e722fb
b992645
 
9e722fb
 
8c6ad91
b992645
 
 
9e722fb
d262ec1
9e722fb
 
8c6ad91
d4b107b
7ef26c1
f7e87b9
b992645
 
9e722fb
 
8c6ad91
b992645
9e722fb
 
 
f7e87b9
7ef26c1
8c6ad91
d4b107b
f7e87b9
9e722fb
8c6ad91
f7e87b9
7ef26c1
9e722fb
b992645
 
9e722fb
 
f7e87b9
b992645
9e722fb
 
 
2d9e081
8c6ad91
 
 
 
9e722fb
b992645
7ef26c1
0ac786e
440d6b7
8c6ad91
440d6b7
d2774a4
0ac786e
8c6ad91
d2774a4
8c6ad91
d2774a4
b98f4ad
 
d2774a4
9e722fb
440d6b7
 
 
8ec53db
 
6a67784
378420c
ad6d7c2
85002a1
d2774a4
7735671
 
 
85002a1
 
7735671
 
8c6ad91
8ec53db
8c6ad91
9e722fb
7735671
 
8c6ad91
8ec53db
8c6ad91
9e722fb
 
 
8c6ad91
9e722fb
8c6ad91
9e722fb
 
 
8c6ad91
9e722fb
7735671
8c6ad91
7735671
 
 
8c6ad91
7735671
8c6ad91
 
 
8ec53db
9e722fb
a337c3f
 
 
32f88c0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
# app.py
# Version: 1.07 (08.24.24), ALPHA
#---------------------------------------------------------------------------------------------------------------------------------------------
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#---------------------------------------------------------------------------------------------------------------------------------------------
import gradio as gr
from PIL import Image
from pydub import AudioSegment
import os
import re
import time
import warnings
#import datetime
import subprocess
from pathlib import Path
from fpdf import FPDF

import psutil
from gpuinfo import GPUInfo
#import pandas as pd
#import csv
import numpy as np
import torch
#import torchaudio
#import torchaudio.transforms as transforms

from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM

import spacy
import networkx as nx
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
#---------------------------------------------------------------------------------------------------------------------------------------------
warnings.filterwarnings("ignore")


HEADER_INFO = """
    # WEB APP ✨| Norwegian WHISPER Model
Switch Work [Transkribering av lydfiler til norsk skrift]
""".strip()
LOGO = "https://huggingface.co/spaces/camparchimedes/transcription_app/resolve/main/pic09w9678yhit.png"
SIDEBAR_INFO = f"""
<div align="center">
    <img src="{LOGO}" style="width: 100%; height: auto;"/>
</div>
"""

def convert_to_wav(filepath):
    _,file_ending = os.path.splitext(f'{filepath}')
    audio_file = filepath.replace(file_ending, ".wav")
    os.system(f'ffmpeg -i "{filepath}" -ar 16000 -ac 1 -c:a pcm_s16le "{audio_file}"')
    return audio_file

device = "cuda" if torch.cuda.is_available() else "cpu"

pipe = pipeline(
    task="automatic-speech-recognition",
    model="NbAiLab/nb-whisper-large",
    chunk_length_s=30,
    device=device,
)

def transcribe_audio(audio_file, batch_size=10):
    #if audio_file.endswith(".m4a"):
        #audio_file = convert_to_wav(audio_file)

    start_time = time.time()

    outputs = pipe(audio_file, batch_size=batch_size, return_timestamps=False, generate_kwargs={'task': 'transcribe', 'language': 'no'}) # skip_special_tokens=True
    text = outputs["text"]

    end_time = time.time()
    
    output_time = end_time - start_time
    word_count = len(text.split())
    memory = psutil.virtual_memory()
    gpu_utilization, gpu_memory = GPUInfo.gpu_usage()
    gpu_utilization = gpu_utilization[0] if len(gpu_utilization) > 0 else 0
    gpu_memory = gpu_memory[0] if len(gpu_memory) > 0 else 0
    system_info = f"""
    *Memory: {memory.total / (1024 * 1024 * 1024):.2f}GB, used: {memory.percent}%, available: {memory.available / (1024 * 1024 * 1024):.2f}GB.*
    *Processing time: {output_time:.2f} seconds.*
    *Number of words: {word_count}*
    *GPU Utilization: {gpu_utilization}%, GPU Memory: {gpu_memory}*"""

    return text.strip(), system_info
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::

# Clean/preprocess text
def clean_text(text):
    text = re.sub(r'https?:\/\/.*[\r\n]*', '', text)
    text = re.sub(r'[^\w\s]', '', text)
    text = re.sub(r'\s+', ' ', text).strip()
    return text

nlp = spacy.blank("nb")  # 'nb' ==> codename = Norwegian Bokmål
nlp.add_pipe('sentencizer')
spacy_stop_words = spacy.lang.nb.stop_words.STOP_WORDS

def preprocess_text(text):
    # Process the text with SpaCy
    doc = nlp(text)
    # SpaCy's stop top wrds direct
    stop_words = spacy_stop_words
    # Filter out stop words
    words = [token.text for token in doc if token.text.lower() not in stop_words]
    return ' '.join(words)

# Summarize w/T5 model
def summarize_text(text):
    preprocessed_text = preprocess_text(text)
    inputs = summarization_tokenizer(preprocessed_text, max_length=1024, return_tensors="pt", truncation=True)
    inputs = inputs.to(device)
    summary_ids = summarization_model.generate(inputs.input_ids, num_beams=5, max_length=150, early_stopping=True)
    return summarization_tokenizer.decode(summary_ids[0], skip_special_tokens=True)

# Builds similarity matrix
def build_similarity_matrix(sentences, stop_words):
    similarity_matrix = nx.Graph()
    for i, tokens_a in enumerate(sentences):
        for j, tokens_b in enumerate(sentences):
            if i != j:
                common_words = set(tokens_a) & set(tokens_b)
                similarity_matrix.add_edge(i, j, weight=len(common_words))
    return similarity_matrix

# "Graph-based summarization" =====>
def graph_based_summary(text, num_paragraphs=3):
    doc = nlp(text)
    sentences = [sent.text for sent in doc.sents]
    if len(sentences) < num_paragraphs:
        return sentences

    sentence_tokens = [nlp(sent) for sent in sentences]
    stop_words = spacy_stop_words
    filtered_tokens = [[token.text for token in tokens if token.text.lower() not in stop_words] for tokens in sentence_tokens]
    similarity_matrix = build_similarity_matrix(filtered_tokens, stop_words)

    scores = nx.pagerank(similarity_matrix)
    ranked_sentences = sorted(((scores[i], sent) for i, sent in enumerate(sentences)), reverse=True)
    return ' '.join([sent for _, sent in ranked_sentences[:num_paragraphs]])

# LexRank
def lex_rank_summary(text, num_paragraphs=3, threshold=0.1):
    doc = nlp(text)
    sentences = [sent.text for sent in doc.sents]
    if len(sentences) < num_paragraphs:
        return sentences

    stop_words = spacy_stop_words
    vectorizer = TfidfVectorizer(stop_words=list(stop_words))
    X = vectorizer.fit_transform(sentences)
    similarity_matrix = cosine_similarity(X, X)

    # Apply threshold@similarity matrix
    similarity_matrix[similarity_matrix < threshold] = 0
    nx_graph = nx.from_numpy_array(similarity_matrix)
    scores = nx.pagerank(nx_graph)
    ranked_sentences = sorted(((scores[i], s) for i, s in enumerate(sentences)), reverse=True)
    return ' '.join([ranked_sentences[i][1] for i in range(num_paragraphs)])

# TextRank
def text_rank_summary(text, num_paragraphs=3):
    doc = nlp(text)
    sentences = [sent.text for sent in doc.sents]
    if len(sentences) < num_paragraphs:
        return sentences

    stop_words = spacy_stop_words
    vectorizer = TfidfVectorizer(stop_words=list(stop_words))
    X = vectorizer.fit_transform(sentences)
    similarity_matrix = cosine_similarity(X, X)

    nx_graph = nx.from_numpy_array(similarity_matrix)
    scores = nx.pagerank(nx_graph)
    ranked_sentences = sorted(((scores[i], s) for i, s in enumerate(sentences)), reverse=True)
    return ' '.join([ranked_sentences[i][1] for i in range(num_paragraphs)])


# Save text+summary/PDF
def save_to_pdf(text, summary):
    pdf = FPDF()
    pdf.add_page()
    pdf.set_font("Arial", size=12)

    if text:
        pdf.multi_cell(0, 10, "Text:\n" + text)

    pdf.ln(10)  # Paragraph space

    if summary:
        pdf.multi_cell(0, 10, "Summary:\n" + summary)

    pdf_output_path = "transcription.pdf"
    pdf.output(pdf_output_path)
    return pdf_output_path

iface = gr.Blocks()

with iface:
    
    gr.HTML(SIDEBAR_INFO)
    gr.Markdown(HEADER_INFO)

    with gr.Tabs():
        with gr.TabItem("Transcription"):
            audio_input = gr.Audio(type="filepath")
            text_output = gr.Textbox(label="Text")
            result_output = gr.Textbox(label="Transcription Details")
            transcribe_button = gr.Button("Transcribe")

            transcribe_button.click(fn=transcribe_audio, inputs=[audio_input], outputs=[text_output, result_output])

        with gr.TabItem("Summary | Graph-based"):
            summary_output = gr.Textbox(label="Summary | Graph-based")
            summarize_button = gr.Button("Summarize")

            summarize_button.click(fn=lambda text: graph_based_summary(text), inputs=[text_output], outputs=[summary_output])

        with gr.TabItem("Summary | LexRank"):
            summary_output = gr.Textbox(label="Summary | LexRank")
            summarize_button = gr.Button("Summarize")

            summarize_button.click(fn=lambda text: lex_rank_summary(text), inputs=[text_output], outputs=[summary_output])

        with gr.TabItem("Summary | TextRank"):
            summary_output = gr.Textbox(label="Summary | TextRank")
            summarize_button = gr.Button("Summarize")

            summarize_button.click(fn=lambda text: text_rank_summary(text), inputs=[text_output], outputs=[summary_output])

        with gr.TabItem("Download PDF"):
            pdf_text_only = gr.Button("Download PDF with Text Only")
            pdf_summary_only = gr.Button("Download PDF with Summary Only")
            pdf_both = gr.Button("Download PDF with Both")

            pdf_output = gr.File(label="Download PDF")

            pdf_text_only.click(fn=lambda text: save_to_pdf(text, ""), inputs=[text_output], outputs=[pdf_output])
            pdf_summary_only.click(fn=lambda summary: save_to_pdf("", summary), inputs=[summary_output], outputs=[pdf_output])
            pdf_both.click(fn=lambda text, summary: save_to_pdf(text, summary), inputs=[text_output, summary_output], outputs=[pdf_output])

iface.launch(share=True, debug=True)