File size: 7,209 Bytes
abc89d1
c0228d9
abc89d1
 
 
351252d
 
 
 
 
 
 
 
 
 
 
abc89d1
 
e79814a
dbca570
 
abc89d1
c5571fa
f0c35fe
d4b107b
cf8326e
85002a1
ad6d7c2
 
abc89d1
 
85002a1
 
c5571fa
dbca570
2f03bd6
ad6d7c2
dbca570
abc89d1
04f2c63
8c6ad91
04f2c63
 
cf8326e
04f2c63
 
 
 
 
 
cf8326e
ad6d7c2
dbca570
3a0e2ab
85002a1
4a470bd
85002a1
73a1be0
 
 
 
 
 
 
1813060
e79814a
638acc9
73a1be0
 
e79814a
 
9d34978
e79814a
 
 
9d34978
e79814a
73a1be0
 
 
32f88c0
361f8d0
5c44de8
85002a1
9d34978
 
 
 
 
 
 
 
 
 
 
5c44de8
361f8d0
9d34978
4a5b260
85002a1
fa68d0f
 
361f8d0
102fb89
 
18c392d
9d34978
9e722fb
abc89d1
 
 
 
c0228d9
abc89d1
18c392d
8ec53db
abc89d1
 
 
 
 
 
 
 
2fb8a5f
102fb89
 
1667a9d
fa68d0f
 
102fb89
 
 
1667a9d
fa68d0f
dee4184
102fb89
 
dee4184
102fb89
abc89d1
6a67784
abc89d1
ad6d7c2
85002a1
5ca37ae
abc89d1
49113b6
f185ce3
 
 
 
 
 
49113b6
 
aebda00
abc89d1
18c392d
1813060
102fb89
 
1813060
abc89d1
 
102fb89
18c392d
73a1be0
56823a6
 
9d34978
 
 
 
 
 
 
 
 
 
 
 
 
73a1be0
 
 
 
613108f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
### -----------------------------------------------------------------------
### (BASE, Revised) version_1.07 ALPHA, app.py
### -----------------------------------------------------------------------

# -------------------------------------------------------------------------
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# -------------------------------------------------------------------------

import spaces
import gradio as gr
from PIL import Image
#from pydub import AudioSegment
#from scipy.io import wavfile

import os
import re
import time
import warnings
#import datetime
#import pandas as pd
#import csv
import subprocess
from pathlib import Path
import tempfile
from fpdf import FPDF

import psutil
from gpuinfo import GPUInfo

#import numpy as np
import torch
#import torchaudio
#import torchaudio.transforms as transforms

from transformers import pipeline #AutoModel

#import spacy
#import networkx as nx
#from sklearn.feature_extraction.text import TfidfVectorizer
#from sklearn.metrics.pairwise import cosine_similarity

warnings.filterwarnings("ignore")

# ------------header section------------
HEADER_INFO = """
    # SWITCHVOX ✨|🇳🇴 *Transkribering av lydfiler til norsk skrift*
""".strip()
LOGO = "https://cdn-lfs-us-1.huggingface.co/repos/fe/3b/fe3bd7c8beece8b087fddcc2278295e7f56c794c8dcf728189f4af8bddc585e1/24ad06a03a5bc66f3eba361b94e45ad17e46f98b76632f2d17faf8a0b4f9ab6b?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27banner_trans.png%3B+filename%3D%22banner_trans.png%22%3B&response-content-type=image%2Fpng&Expires=1725145079&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcyNTE0NTA3OX19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zL2ZlLzNiL2ZlM2JkN2M4YmVlY2U4YjA4N2ZkZGNjMjI3ODI5NWU3ZjU2Yzc5NGM4ZGNmNzI4MTg5ZjRhZjhiZGRjNTg1ZTEvMjRhZDA2YTAzYTViYzY2ZjNlYmEzNjFiOTRlNDVhZDE3ZTQ2Zjk4Yjc2NjMyZjJkMTdmYWY4YTBiNGY5YWI2Yj9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSomcmVzcG9uc2UtY29udGVudC10eXBlPSoifV19&Signature=PCB1BZeLzsZXnn4lRi1Fj50%7E0E4G39u6-GKLNLLgxhDyhXlqb3BJkR7IOgdyjuNyBh8Iz2d7QqhzNSsOlQmqR30cJLl6aDM5eJO01OlWXoD3Z0KcphoVBFyyrkoxe2oS8i2mdlbFRYn7oc%7EhyOcW46zR6HtqAB91iEydhEa5WTyz3C9nWasgMZevb0vRJtzwhplM9e-%7EbRrZTm2fMzkL14IGWpTpUOGBe93BDSAYbPhrZK1jvuY8p0Tmy1iEKVP3Zdzix5U5lrbxit5luitEhK8x6q2t63Gdv7F0CZvjQtTh7MYkB5GNiru8bTGKAgCdHGiZbG7VCGfhlX3UKvUTPg__&Key-Pair-Id=K24J24Z295AEI9"
SIDEBAR_INFO = f"""
<div align="center">
    <img src="{LOGO}" style="width: 100%; height: auto;"/>
</div>
"""

@spaces.GPU(duration=120)
def transcribe(microphone, file_upload):
    
    file = microphone if microphone is not None else file_upload
    start_time = time.time()
    
    #--------------____________________________________________--------------"

    device = "cuda" if torch.cuda.is_available() else "cpu"
    pipe = pipeline("automatic-speech-recognition", model="NbAiLab/nb-whisper-large", device=device)
    text = pipe(file)["text"]
    
    #--------------____________________________________________--------------"

    end_time = time.time()
    output_time = end_time - start_time
    word_count = len(text.split())

    # --GPU metrics
    memory = psutil.virtual_memory()
    
    # Default GPU utilization and memory to 0 in case of an error
    gpu_utilization = 0
    gpu_memory = 0
    try:
        gpu_utilization, gpu_memory = GPUInfo.gpu_usage()
        gpu_utilization = gpu_utilization[0] if len(gpu_utilization) > 0 else 0
        gpu_memory = gpu_memory[0] if len(gpu_memory) > 0 else 0
    except Exception as e:
        print(f"Error retrieving GPU info: {e}")

    # --CPU metric
    cpu_usage = psutil.cpu_percent(interval=1)
    
    # --system info string
    system_info = f"""
    Processing time: {output_time:.2f} seconds.
    Number of words: {word_count}
    """
    # *Memory: {memory.total / (1024 * 1024 * 1024):.2f}GB, used: {memory.percent}%, available: {memory.available / (1024 * 1024 * 1024):.2f}GB.*
    # *GPU Utilization: {gpu_utilization}%, GPU Memory: {gpu_memory}*
    # *CPU Usage: {cpu_usage}%*
    return text, system_info

def save_to_pdf(text, summary):
    pdf = FPDF()
    pdf.add_page()
    pdf.set_font("Arial", size=12)

    if text:
        pdf.multi_cell(0, 10, "Transkribert Tekst:\n" + text)

    pdf.ln(10)  # Paragraph metric

    if summary:
        pdf.multi_cell(0, 10, "Summary:\n" + summary)

    pdf_output_path = "transcription_.pdf"
    pdf.output(pdf_output_path)
    return pdf_output_path

css = """
#transcription_output textarea {
    background-color: #000000;  /* black */
    color: #00FF00 !important;  /* text color */
    font-size: 16px;  /* font size */
}

#system_info_box textarea {
    background-color: #ffe0b3;  /* orange */
    color: black !important;  /* text color */
    font-size: 14px;  /* font size */
}
"""

iface = gr.Blocks(css=css)

with iface:

    gr.HTML(SIDEBAR_INFO)
    gr.Markdown(HEADER_INFO)

    with gr.Row():
        gr.Markdown('''
        ##### 🔊 Last opp lydfila 
        ##### ☕️ Trykk på "Transkriber" knappen og vent på svar
        ##### ⚡️ Går rimelig bra kjapt med Norwegian NB-Whisper Large..
        ##### 😅 Planlegger tilleggs-funksjoner senere
        ##### 🎤 Bruk av mikrofon mulig (*ikke testet*) 
        ''')
        microphone = gr.Audio(sources="microphone", type="filepath")
        upload = gr.Audio(sources="upload", type="filepath")
        transcribe_btn = gr.Button("Transkriber")

    with gr.Row():   
        with gr.Column(scale=3):
            text_output = gr.Textbox(label="Transkribert Tekst", elem_id="transcription_output")
        system_info = gr.Textbox(label="Antall sekunder, ord:", elem_id="system_info_box")
    
    with gr.Tabs():
        with gr.TabItem("Download PDF"):
            pdf_text_only = gr.Button("Last ned pdf med resultat")
            pdf_output = gr.File(label="/.docx?")

            pdf_text_only.click(fn=lambda text: save_to_pdf(text, ""), inputs=[text_output], outputs=[pdf_output])

    with gr.Row():
        gr.Markdown('''
        <div align="center">
            <a href="https://opensource.com/resources/what-open-source">
                <img src="https://badgen.net/badge/Open%20Source%20%3F/Yes%21/blue?icon=github" alt="Open Source? Yes!">
        </a>
        <span style="display:inline-block; width: 20px;"></span> 
        <a href="https://opensource.org/licenses/Apache-2.0">
            <img src="https://img.shields.io/badge/License-Apache_2.0-blue.svg" alt="License: Apache 2.0">
        </a>
        </div>
        ''')
        
    transcribe_btn.click(fn=transcribe, inputs=[microphone, upload], outputs=[text_output, system_info])



iface.launch(share=True, debug=True)