awacke1 commited on
Commit
d881b0d
·
verified ·
1 Parent(s): 0de03c5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +167 -69
app.py CHANGED
@@ -1,98 +1,196 @@
1
- # app.py
2
  import os
 
 
 
 
 
 
 
3
  import gradio as gr
4
  import openai
 
 
 
5
 
6
- # ─── CONFIG ────────────────────────────────────────────────────────────────────
7
  KEY_FILE = "openai_api_key.txt"
8
- MODEL = "gpt-4o-2024-05-13"
9
 
10
- # Try to load a cached key (global for all users)
11
  if os.path.exists(KEY_FILE):
12
- with open(KEY_FILE, "r") as f:
13
  DEFAULT_KEY = f.read().strip()
14
  else:
15
- DEFAULT_KEY = ""
16
 
17
- # ─── HELPERS ───────────────────────────────────────────────────────────────────
18
-
19
- def save_api_key(api_key: str) -> str:
20
- """Save the key to disk so it's remembered for all future sessions."""
21
- with open(KEY_FILE, "w") as f:
22
  f.write(api_key.strip())
23
- return " API key saved and will be used for all users."
 
 
24
 
25
- def chat_with_openai(api_key: str, user_message: str, history: list) -> list:
26
- """Invoke GPT-4o with the supplied API key and append to chat history."""
27
  openai.api_key = api_key.strip()
28
- # ensure history is a list of tuples [(user, bot), ...]
29
- history = history or []
30
- # Build messages in OpenAI format
31
  messages = []
32
  for u, a in history:
33
- messages.append({"role":"user", "content": u})
34
- messages.append({"role":"assistant","content": a})
35
- messages.append({"role":"user", "content": user_message})
36
- # Call the API
37
- resp = openai.ChatCompletion.create(
38
- model=MODEL,
39
- messages=messages,
40
- )
41
  answer = resp.choices[0].message.content
42
  history.append((user_message, answer))
43
  return history
44
 
45
- # ─── UI LAYOUT ────────────────────────────────────────────────────────────────
46
-
47
- with gr.Blocks(title="🌐 GPT-4o Multimodal (Skeleton)") as demo:
48
 
49
- gr.Markdown(
50
- """
51
- # 🤖 GPT-4o Client
52
- Enter your OpenAI API key once below.
53
- It will be cached on the server for all future sessions.
54
- """
 
 
 
 
 
 
55
  )
 
56
 
57
- with gr.Row():
58
- api_key_input = gr.Textbox(
59
- label="🔑 OpenAI API Key",
60
- placeholder="sk-…",
61
- value=DEFAULT_KEY,
62
- type="password",
63
- )
64
- save_button = gr.Button("💾 Save API Key")
65
- save_status = gr.Textbox(
66
- label="Status",
67
- interactive=False,
68
- placeholder="–"
69
- )
70
-
71
- # Wire the save button
72
- save_button.click(
73
- fn=save_api_key,
74
- inputs=api_key_input,
75
- outputs=save_status,
76
- )
77
 
78
- gr.Markdown("---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
- ## 💬 Chat with GPT-4o")
81
- chatbot = gr.Chatbot(label="Chat History")
82
- msg = gr.Textbox(
83
- label="Your message",
84
- placeholder="Type something and press Enter…",
85
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
 
87
- # When you hit Enter in the textbox, call the chat fn
88
- msg.submit(
89
- fn=chat_with_openai,
90
- inputs=[api_key_input, msg, chatbot],
91
- outputs=chatbot,
 
 
 
 
 
 
 
 
92
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
 
94
- # ─── RUN ───────────────────────────────────────────────────────────────────────
95
 
96
  if __name__ == "__main__":
97
- # Use 0.0.0.0 if you want external access; port can be adjusted as needed
98
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
 
1
  import os
2
+ import base64
3
+ import re
4
+ import requests
5
+ import pytz
6
+ import json
7
+ from io import BytesIO
8
+ from datetime import datetime
9
  import gradio as gr
10
  import openai
11
+ import fitz # pymupdf
12
+ from bs4 import BeautifulSoup
13
+ from moviepy.video.io.VideoFileClip import VideoFileClip
14
 
15
+ # 🔐 CONFIG
16
  KEY_FILE = "openai_api_key.txt"
17
+ MODEL = "gpt-4o-2024-05-13"
18
 
19
+ # 🍿 Default key load
20
  if os.path.exists(KEY_FILE):
21
+ with open(KEY_FILE, 'r') as f:
22
  DEFAULT_KEY = f.read().strip()
23
  else:
24
+ DEFAULT_KEY = ''
25
 
26
+ # 🔧 HELPERS
27
+ def save_api_key(api_key):
28
+ with open(KEY_FILE, 'w') as f:
 
 
29
  f.write(api_key.strip())
30
+ return "🔑 Key saved!"
31
+
32
+ # 🗒️ Chat
33
 
34
+ def chat_with_openai(api_key, user_message, history):
 
35
  openai.api_key = api_key.strip()
 
 
 
36
  messages = []
37
  for u, a in history:
38
+ messages.append({"role": "user", "content": u})
39
+ messages.append({"role": "assistant", "content": a})
40
+ messages.append({"role": "user", "content": user_message})
41
+ resp = openai.ChatCompletion.create(model=MODEL, messages=messages)
 
 
 
 
42
  answer = resp.choices[0].message.content
43
  history.append((user_message, answer))
44
  return history
45
 
46
+ # 🖼️ Image analysis
47
+ def image_to_base64(file):
48
+ return base64.b64encode(file.read()).decode()
49
 
50
+ def analyze_image(api_key, file, prompt):
51
+ data_uri = f"data:image/png;base64,{image_to_base64(file)}"
52
+ openai.api_key = api_key.strip()
53
+ resp = openai.ChatCompletion.create(
54
+ model=MODEL,
55
+ messages=[
56
+ {"role": "system", "content": "You are a helpful assistant that responds in Markdown."},
57
+ {"role": "user", "content": [
58
+ {"type":"text","text":prompt},
59
+ {"type":"image_url","image_url":{"url":data_uri}}
60
+ ]}
61
+ ]
62
  )
63
+ return resp.choices[0].message.content
64
 
65
+ # 🎤 Audio transcription + chat
66
+ def transcribe_audio_file(api_key, file):
67
+ openai.api_key = api_key.strip()
68
+ resp = openai.Audio.transcriptions.create(model="whisper-1", file=file)
69
+ return resp.text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
+ # 🎥 Video summarize
72
+ def summarize_video(api_key, file, prompt, seconds=2):
73
+ # save tmp
74
+ with open("tmp_vid.mp4", 'wb') as f: f.write(file.read())
75
+ clip = VideoFileClip("tmp_vid.mp4")
76
+ frames = []
77
+ step = int(clip.fps * seconds)
78
+ for t in range(0, int(clip.duration), seconds):
79
+ frame = clip.get_frame(t)
80
+ buf = BytesIO()
81
+ from PIL import Image
82
+ Image.fromarray(frame).save(buf, format='JPEG')
83
+ frames.append(base64.b64encode(buf.getvalue()).decode())
84
+ transcript = transcribe_audio_file(api_key, open("tmp_vid.mp4", 'rb'))
85
+ openai.api_key = api_key.strip()
86
+ messages = [{"role":"system","content":"You are a helpful assistant."},
87
+ {"role":"user","content": prompt}]
88
+ for f64 in frames:
89
+ messages.append({"role":"user","content": {"type":"image_url","image_url":{"url":f"data:image/jpeg;base64,{f64}"}}})
90
+ messages.append({"role":"user","content": f"Transcript: {transcript}"})
91
+ resp = openai.ChatCompletion.create(model=MODEL, messages=messages)
92
+ return resp.choices[0].message.content
93
 
94
+ # 📄 PDF->Markdown
95
+ def pdf_to_markdown(path):
96
+ doc = fitz.open(path)
97
+ md = ''
98
+ for page in doc:
99
+ md += page.get_text('markdown') + '\n'
100
+ return md
101
+
102
+ # 🔍 ArXiv RAG
103
+ from gradio_client import Client
104
+ def arxiv_search(query):
105
+ client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
106
+ refs = client.predict(query, 10, "Semantic Search - up to 10 Mar 2024", "mistralai/Mixtral-8x7B-Instruct-v0.1", api_name="/update_with_rag_md")
107
+ ans = client.predict(query, "mistralai/Mixtral-8x7B-Instruct-v0.1", True, api_name="/ask_llm")
108
+ return refs + "\n" + ans
109
+
110
+ # 🔈 TTS
111
+ from gtts import gTTS
112
+ def tts_bytes(text):
113
+ buf = BytesIO()
114
+ gTTS(text=text, lang='en').write_to_fp(buf)
115
+ buf.seek(0)
116
+ return buf.read()
117
 
118
+ # UI CONFIG
119
+ ui_config = {
120
+ "chat": {"label":"💬 ChatGPT-4o", "placeholder":"Say something..."},
121
+ "image_prompt": {"label":"🖼️ Image Prompt", "default":"Describe this image..."},
122
+ "audio_prompt": {"label":"🎤 Audio Prompt", "default":"Transcribe and summarize..."},
123
+ "video_prompt": {"label":"🎥 Video Prompt", "default":"Summarize this video..."},
124
+ "pdf_prompt": {"label":"📄 PDF Prompt", "default":"Convert PDF to markdown..."},
125
+ "arxiv_prompt": {"label":"🔍 Arxiv Query", "default":"Search papers..."}
126
+ }
127
+
128
+ with gr.Blocks(title="🔬🧠 ScienceBrain.Gradio") as demo:
129
+ gr.Markdown("# 🔬🧠 ScienceBrain Gradio
130
+ Enter API key below."
131
  )
132
+ with gr.Row():
133
+ api_key = gr.Textbox(label="🔑 OpenAI Key", value=DEFAULT_KEY, type="password")
134
+ save_btn = gr.Button("💾 Save Key")
135
+ status = gr.Textbox(label="Status", interactive=False)
136
+ save_btn.click(save_api_key, inputs=api_key, outputs=status)
137
+
138
+ # Tabs for each modality
139
+ with gr.Tab("💬 Chat"):
140
+ chatbot = gr.Chatbot(label=ui_config['chat']['label'], value=[])
141
+ msg = gr.Textbox(label=ui_config['chat']['label'], placeholder=ui_config['chat']['placeholder'])
142
+ msg.submit(chat_with_openai, inputs=[api_key, msg, chatbot], outputs=chatbot)
143
+
144
+ with gr.Tab("🖼️ Image"):
145
+ img_in = gr.File(file_types=['png','jpg','jpeg'])
146
+ img_prompt = gr.Textbox(label=ui_config['image_prompt']['label'], value=ui_config['image_prompt']['default'])
147
+ img_btn = gr.Button("🔍 Analyze Image")
148
+ img_out = gr.Markdown()
149
+ img_btn.click(analyze_image, inputs=[api_key, img_in, img_prompt], outputs=img_out)
150
+
151
+ with gr.Tab("🎤 Audio"):
152
+ aud_in = gr.File(file_types=['wav','mp3'])
153
+ aud_btn = gr.Button("🔊 Transcribe + Chat")
154
+ aud_out = gr.Markdown()
155
+ def audio_pipeline(key, f):
156
+ text = transcribe_audio_file(key, f)
157
+ reply = chat_with_openai(key, text, [])[-1][1]
158
+ return f"**Transcript:** {text}\n\n**Reply:** {reply}"
159
+ aud_btn.click(audio_pipeline, inputs=[api_key, aud_in], outputs=aud_out)
160
+
161
+ with gr.Tab("🎥 Video"):
162
+ vid_in = gr.File(file_types=['mp4'])
163
+ vid_prompt = gr.Textbox(label=ui_config['video_prompt']['label'], value=ui_config['video_prompt']['default'])
164
+ vid_btn = gr.Button("🎞️ Summarize Video")
165
+ vid_out = gr.Markdown()
166
+ vid_btn.click(summarize_video, inputs=[api_key, vid_in, vid_prompt], outputs=vid_out)
167
+
168
+ with gr.Tab("📄 PDF"):
169
+ pdf_in = gr.File(file_types=['pdf'])
170
+ pdf_btn = gr.Button("📝 Convert PDF")
171
+ pdf_out = gr.Markdown()
172
+ pdf_btn.click(lambda f: pdf_to_markdown(f.name), inputs=[pdf_in], outputs=pdf_out)
173
+
174
+ with gr.Tab("🔍 ArXiv"):
175
+ arxiv_in = gr.Textbox(label=ui_config['arxiv_prompt']['label'], value=ui_config['arxiv_prompt']['default'])
176
+ arxiv_btn = gr.Button("🔎 Search ArXiv")
177
+ arxiv_out = gr.Markdown()
178
+ arxiv_btn.click(arxiv_search, inputs=[arxiv_in], outputs=arxiv_out)
179
+
180
+ with gr.Tab("⚙️ Quick Tests"):
181
+ tests = [
182
+ ("📝 Text","What is 2+2?"),
183
+ ("🖼️ Image","Analyze image https://via.placeholder.com/150.png"),
184
+ ("🎤 Audio","Transcribe https://www2.cs.uic.edu/~i101/SoundFiles/gettysburg10.wav"),
185
+ ("🎥 Video","Summarize video https://sample-videos.com/video123/mp4/240/big_buck_bunny_240p_1mb.mp4"),
186
+ ("🖼️+📝 Img+Txt","Given image https://via.placeholder.com/150.png list 3 facts."),
187
+ ("🎤+📝 Aud+Txt","Given audio https://www2.cs.uic.edu/~i101/SoundFiles/gettysburg10.wav summarize."),
188
+ ("🎥+📝 Vid+Txt","Given video https://sample-videos.com/video123/mp4/240/big_buck_bunny_240p_1mb.mp4 transcript+summary.")
189
+ ]
190
+ for idx, (e,p) in enumerate(tests,1):
191
+ btn = gr.Button(f"{idx}. {e} Test")
192
+ btn.click(chat_with_openai, inputs=[api_key, gr.State(p), gr.State([])], outputs=chatbot)
193
 
 
194
 
195
  if __name__ == "__main__":
196
+ demo.launch()