JaweriaGenAI commited on
Commit
19ae010
·
verified ·
1 Parent(s): bb4138c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -73
app.py CHANGED
@@ -1,102 +1,75 @@
1
  import os
2
  import gradio as gr
3
- import pdfplumber
4
- import docx
5
- import pandas as pd
6
- from PIL import Image
7
- from io import BytesIO
8
- import base64
9
- import whisper
10
  from openai import OpenAI
 
 
 
11
 
12
- # Load Whisper model
13
- whisper_model = whisper.load_model("base")
14
-
15
-
16
-
17
-
18
- # Load Groq API key
19
  GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
20
 
21
- # Initialize OpenAI client with Groq base URL
22
  client = OpenAI(
23
  api_key=GROQ_API_KEY,
24
  base_url="https://api.groq.com/openai/v1"
25
  )
26
 
 
 
 
 
 
27
 
28
- def extract_text_from_file(file):
29
- if file.name.endswith(".pdf"):
30
- with pdfplumber.open(file.name) as pdf:
31
- text = "\n".join(page.extract_text() for page in pdf.pages if page.extract_text())
32
- elif file.name.endswith(".docx"):
33
- doc = docx.Document(file.name)
34
- text = "\n".join(p.text for p in doc.paragraphs)
35
- elif file.name.endswith(".xlsx"):
36
- df = pd.read_excel(file.name)
37
- text = df.to_string()
38
- elif file.name.endswith((".png", ".jpg", ".jpeg")):
39
- img = Image.open(file.name)
40
- buffer = BytesIO()
41
- img.save(buffer, format="PNG")
42
- encoded = base64.b64encode(buffer.getvalue()).decode("utf-8")
43
- text = f"[Image uploaded: data:image/png;base64,{encoded[:100]}... (truncated)]"
44
- else:
45
- with open(file.name, "r", encoding="utf-8", errors="ignore") as f:
46
- text = f.read()
47
- return text
48
-
49
- def transcribe_audio(audio_path):
50
- result = whisper_model.transcribe(audio_path)
51
- return result["text"]
52
-
53
- def generate_reply(history):
54
- messages = [{"role": "system", "content": "You are a helpful assistant."}]
55
- for user_msg, bot_msg in history:
56
- messages.append({"role": "user", "content": user_msg})
57
- messages.append({"role": "assistant", "content": bot_msg})
58
 
 
 
 
 
59
  response = client.chat.completions.create(
60
  model="llama3-8b-8192",
61
- messages=messages,
62
- temperature=0.7
63
  )
 
64
  reply = response.choices[0].message.content
65
- return reply
66
 
67
- def respond(message, history):
68
- reply = generate_reply(history + [[message, ""]])
69
- history.append([message, reply])
70
- return history, ""
 
 
71
 
72
- def handle_file_upload(file, message):
73
- if file is None:
74
- return message
75
- file_content = extract_text_from_file(file)
76
- return f"{message}\n\n--- File Content Start ---\n{file_content}\n--- File Content End ---"
 
 
 
 
77
 
78
- def handle_audio_upload(audio, message):
79
- if audio is None:
80
- return message
81
- transcription = transcribe_audio(audio)
82
- return f"{message}\n\n--- Transcription ---\n{transcription}"
83
 
84
- with gr.Blocks(css="body { background-color: white; color: black }") as demo:
85
- gr.Markdown("<h1 style='text-align: center;'>Neobot</h1>")
86
- chatbot = gr.Chatbot(label="Chat", elem_id="chatbox", height=450, type="messages")
87
 
88
  with gr.Row():
89
- txt = gr.Textbox(placeholder="Type a message or edit transcribed/file content here...", scale=5, show_label=False)
90
- send_btn = gr.Button("Send", scale=1)
91
 
92
  with gr.Row():
93
- upload_btn = gr.File(label="📎 Upload File", file_types=[".pdf", ".docx", ".txt", ".xlsx", ".png", ".jpg", ".jpeg"])
94
- audio_in = gr.Audio(label="🎙️ Upload Audio", type="filepath")
95
 
96
- history = gr.State([])
 
97
 
98
- send_btn.click(respond, [txt, history], [chatbot, txt])
99
- upload_btn.change(handle_file_upload, [upload_btn, txt], txt)
100
- audio_in.change(handle_audio_upload, [audio_in, txt], txt)
101
 
102
  demo.launch()
 
1
  import os
2
  import gradio as gr
 
 
 
 
 
 
 
3
  from openai import OpenAI
4
+ import tempfile
5
+ import torch
6
+ import whisper
7
 
8
+ # Load API key from environment
 
 
 
 
 
 
9
  GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
10
 
11
+ # Initialize client for Groq-compatible OpenAI API
12
  client = OpenAI(
13
  api_key=GROQ_API_KEY,
14
  base_url="https://api.groq.com/openai/v1"
15
  )
16
 
17
+ # Load Whisper model
18
+ whisper_model = whisper.load_model("base")
19
+
20
+ # Chat history storage
21
+ chat_history = []
22
 
23
+ def chat_with_bot(message, history):
24
+ global chat_history
25
+ chat_history = history or []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
+ # Append user message to chat history
28
+ chat_history.append({"role": "user", "content": message})
29
+
30
+ # Call Groq LLM
31
  response = client.chat.completions.create(
32
  model="llama3-8b-8192",
33
+ messages=chat_history
 
34
  )
35
+
36
  reply = response.choices[0].message.content
 
37
 
38
+ # Append assistant reply to chat history
39
+ chat_history.append({"role": "assistant", "content": reply})
40
+
41
+ # Prepare output format for Gradio (list of tuples)
42
+ formatted_history = [(m['content'], chat_history[i+1]['content']) for i, m in enumerate(chat_history[:-1]) if m['role'] == 'user']
43
+ return formatted_history, chat_history
44
 
45
+ def transcribe_audio(audio_file):
46
+ if audio_file is None:
47
+ return ""
48
+ audio = whisper.load_audio(audio_file)
49
+ audio = whisper.pad_or_trim(audio)
50
+ mel = whisper.log_mel_spectrogram(audio).to(whisper_model.device)
51
+ options = whisper.DecodingOptions(fp16=torch.cuda.is_available())
52
+ result = whisper.decode(whisper_model, mel, options)
53
+ return result.text
54
 
55
+ with gr.Blocks() as demo:
56
+ gr.Markdown("# 🤖 Neobot - Chat with Voice, File & Text")
 
 
 
57
 
58
+ chatbot = gr.Chatbot()
59
+ state = gr.State([])
 
60
 
61
  with gr.Row():
62
+ txt = gr.Textbox(placeholder="Type a message or upload audio/file...", show_label=False)
63
+ send_btn = gr.Button("Send")
64
 
65
  with gr.Row():
66
+ audio_upload = gr.Audio(source="upload", type="filepath", label="Upload Audio")
67
+ transcribe_btn = gr.Button("Transcribe Audio")
68
 
69
+ # Chat event
70
+ send_btn.click(chat_with_bot, inputs=[txt, state], outputs=[chatbot, state])
71
 
72
+ # Audio transcription event
73
+ transcribe_btn.click(transcribe_audio, inputs=audio_upload, outputs=txt)
 
74
 
75
  demo.launch()