Update app.py
Browse files
app.py
CHANGED
|
@@ -1,10 +1,44 @@
|
|
| 1 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
import whisper
|
| 3 |
from transformers import pipeline
|
| 4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
model = whisper.load_model("base")
|
| 6 |
sentiment_analysis = pipeline("sentiment-analysis", framework="pt", model="SamLowe/roberta-base-go_emotions")
|
| 7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
def analyze_sentiment(text):
|
| 9 |
results = sentiment_analysis(text)
|
| 10 |
sentiment_results = {result['label']: result['score'] for result in results}
|
|
@@ -54,7 +88,9 @@ def display_sentiment_results(sentiment_results, option):
|
|
| 54 |
sentiment_text += f"{sentiment} {emoji}: {score}\n"
|
| 55 |
return sentiment_text
|
| 56 |
|
| 57 |
-
def inference(audio, sentiment_option):
|
|
|
|
|
|
|
| 58 |
audio = whisper.load_audio(audio)
|
| 59 |
audio = whisper.pad_or_trim(audio)
|
| 60 |
|
|
@@ -66,15 +102,20 @@ def inference(audio, sentiment_option):
|
|
| 66 |
options = whisper.DecodingOptions(fp16=False)
|
| 67 |
result = whisper.decode(model, mel, options)
|
| 68 |
|
| 69 |
-
|
| 70 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
|
| 72 |
-
return lang.upper(), result.text, sentiment_output
|
| 73 |
|
| 74 |
-
title = """<h1 align="center"
|
| 75 |
image_path = "thmbnail.jpg"
|
| 76 |
description = """
|
| 77 |
-
π» This demo showcases a
|
| 78 |
<br>
|
| 79 |
βοΈ Components of the tool:<br>
|
| 80 |
<br>
|
|
@@ -121,34 +162,32 @@ with block:
|
|
| 121 |
with gr.Column():
|
| 122 |
gr.HTML(description)
|
| 123 |
|
| 124 |
-
with gr.
|
| 125 |
with gr.Row():
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
</div>
|
| 152 |
-
''')
|
| 153 |
|
| 154 |
block.launch()
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
+
import cv2
|
| 3 |
+
import easyocr
|
| 4 |
+
import numpy as np
|
| 5 |
+
import requests
|
| 6 |
+
import os
|
| 7 |
import whisper
|
| 8 |
from transformers import pipeline
|
| 9 |
|
| 10 |
+
API_KEY = os.getenv("API_KEY")
|
| 11 |
+
|
| 12 |
+
API_URL = "https://api-inference.huggingface.co/models/dima806/facial_emotions_image_detection"
|
| 13 |
+
headers = {"Authorization": "Bearer "+ API_KEY+""}
|
| 14 |
+
|
| 15 |
+
reader = easyocr.Reader(['en'], gpu=False)
|
| 16 |
+
|
| 17 |
model = whisper.load_model("base")
|
| 18 |
sentiment_analysis = pipeline("sentiment-analysis", framework="pt", model="SamLowe/roberta-base-go_emotions")
|
| 19 |
|
| 20 |
+
def query(image):
|
| 21 |
+
image_data = np.array(image, dtype=np.uint8)
|
| 22 |
+
_, buffer = cv2.imencode('.jpg', image_data)
|
| 23 |
+
binary_data = buffer.tobytes()
|
| 24 |
+
|
| 25 |
+
response = requests.post(API_URL, headers=headers, data=binary_data)
|
| 26 |
+
return response.json()
|
| 27 |
+
|
| 28 |
+
def text_extraction(image):
|
| 29 |
+
global text_content
|
| 30 |
+
text_content = ''
|
| 31 |
+
facial_data = query(image)
|
| 32 |
+
text_ = reader.readtext(image)
|
| 33 |
+
threshold = 0.25
|
| 34 |
+
for t_, t in enumerate(text_):
|
| 35 |
+
bbox, text, score = t
|
| 36 |
+
text_content = text_content + ' ' + ' '.join(text)
|
| 37 |
+
if score > threshold:
|
| 38 |
+
cv2.rectangle(image, tuple(map(int, bbox[0])), tuple(map(int, bbox[2])), (0, 255, 0), 5)
|
| 39 |
+
|
| 40 |
+
return image, text_content, facial_data
|
| 41 |
+
|
| 42 |
def analyze_sentiment(text):
|
| 43 |
results = sentiment_analysis(text)
|
| 44 |
sentiment_results = {result['label']: result['score'] for result in results}
|
|
|
|
| 88 |
sentiment_text += f"{sentiment} {emoji}: {score}\n"
|
| 89 |
return sentiment_text
|
| 90 |
|
| 91 |
+
def inference(image, text, audio, sentiment_option):
|
| 92 |
+
extracted_image, extracted_text, extracted_facial_data = text_extraction(image)
|
| 93 |
+
|
| 94 |
audio = whisper.load_audio(audio)
|
| 95 |
audio = whisper.pad_or_trim(audio)
|
| 96 |
|
|
|
|
| 102 |
options = whisper.DecodingOptions(fp16=False)
|
| 103 |
result = whisper.decode(model, mel, options)
|
| 104 |
|
| 105 |
+
audio_sentiment_results = analyze_sentiment(result.text) # Ta - Text from audio
|
| 106 |
+
image_sentiment_results = analyze_sentiment(extracted_text) # Ti - Text from image
|
| 107 |
+
text_sentiment_results = analyze_sentiment(text) # T - User defined Text
|
| 108 |
+
|
| 109 |
+
audio_sentiment_output = display_sentiment_results(audio_sentiment_results, sentiment_option)
|
| 110 |
+
image_sentiment_output = display_sentiment_results(image_sentiment_results, sentiment_option)
|
| 111 |
+
text_sentiment_output = display_sentiment_results(text_sentiment_results, sentiment_option)
|
| 112 |
|
| 113 |
+
return extracted_image, extracted_facial_data, extracted_text, image_sentiment_output, text_sentiment_output, lang.upper(), result.text, sentiment_output
|
| 114 |
|
| 115 |
+
title = """<h1 align="center">Cross Model Machine Learning (Sentiment Analysis)</h1>"""
|
| 116 |
image_path = "thmbnail.jpg"
|
| 117 |
description = """
|
| 118 |
+
π» This demo showcases a Cross Model Machine Learning for Sentiment Analysis.<br><br>
|
| 119 |
<br>
|
| 120 |
βοΈ Components of the tool:<br>
|
| 121 |
<br>
|
|
|
|
| 162 |
with gr.Column():
|
| 163 |
gr.HTML(description)
|
| 164 |
|
| 165 |
+
with gr.Blocks():
|
| 166 |
with gr.Row():
|
| 167 |
+
with gr.Column():
|
| 168 |
+
image = gr.Image()
|
| 169 |
+
|
| 170 |
+
image_output = gr.Image()
|
| 171 |
+
text_output = gr.Textbox(label="Text Content")
|
| 172 |
+
text_sentiment = gr.Textbox(label="Text Sentiment")
|
| 173 |
+
facial_output = gr.JSON(label="Facial Data")
|
| 174 |
+
|
| 175 |
+
with gr.Text():
|
| 176 |
+
gr.Textbox(label="Text Content")
|
| 177 |
+
|
| 178 |
+
output_text_sentiment = gr.TextBox("Text Sentiment")
|
| 179 |
+
|
| 180 |
+
with gr.Column():
|
| 181 |
+
audio = gr.Audio(label="Input Audio", show_label=False, type="filepath")
|
| 182 |
+
sentiment_option = gr.Radio(choices=["Sentiment Only", "Sentiment + Score"], label="Select an option")
|
| 183 |
+
|
| 184 |
+
lang_str = gr.Textbox(label="Language")
|
| 185 |
+
text = gr.Textbox(label="Transcription")
|
| 186 |
+
sentiment_output = gr.Textbox(label="Sentiment Analysis Results")
|
| 187 |
+
|
| 188 |
+
|
| 189 |
+
btn = gr.Button("Transcribe")
|
| 190 |
+
|
| 191 |
+
btn.click(inference, inputs=[image, text, audio, sentiment_option], outputs=[image_output, facial_output, text_output, text_sentiment, output_text_sentiment, lang_str, text, sentiment_output])
|
|
|
|
|
|
|
| 192 |
|
| 193 |
block.launch()
|