NotASI's picture
Update app.py
c0d0de3
raw
history blame
11.2 kB
# """
# References:
# - https://medium.com/@turna.fardousi/building-a-multimodal-chatbot-with-gemini-api-8015bfbee538
# """
# import os
# import time
# from typing import List, Tuple, Optional
# import google.generativeai as genai
# import gradio as gr
# from PIL import Image
# from dotenv import load_dotenv
# load_dotenv()
# GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
# TITLE = """<h1 align="center">๐ŸŽฎChat with Gemini 1.5๐Ÿ”ฅ -- Beta Preview</h1>"""
# NOTICE = """
# Notices ๐Ÿ“œ:
# - This app is still in development
# - Some features may not work as expected
# """
# ABOUT = """
# Updates (2024-8-12): Created the App
# Info:
# - Model: Gemini 1.5 Flash
# """
# ERRORS = """
# Known errors โš ๏ธ:
# """
# FUTURE_IMPLEMENTATIONS = """
# To be implemented ๐Ÿš€:
# - Select other Gemini / Gemma models
# - Upload files
# - More tools other than web search
# """
# IMAGE_WIDTH = 512
# def preprocess_stop_sequences(stop_sequences: str) -> Optional[List[str]]:
# return [seq.strip() for seq in stop_sequences.split(",")] if stop_sequences else None
# def preprocess_image(image: Image.Image) -> Image.Image:
# image_height = int(image.height * IMAGE_WIDTH / image.width)
# return image.resize((IMAGE_WIDTH, image_height))
# def user(text_prompt: str, chatbot: List[Tuple[str, str]]):
# return "", chatbot + [[text_prompt, None]]
# def bot(
# google_key: str,
# image_prompt: Optional[Image.Image],
# temperature: float,
# max_output_tokens: int,
# stop_sequences: str,
# top_k: int,
# top_p: float,
# chatbot: List[Tuple[str, str]]
# ):
# google_key = google_key or GEMINI_API_KEY
# if not google_key:
# raise ValueError("GOOGLE_API_KEY is not set. Please set it up.")
# text_prompt = chatbot[-1][0]
# genai.configure(api_key=google_key)
# generation_config = genai.types.GenerationConfig(
# temperature=temperature,
# max_output_tokens=max_output_tokens,
# stop_sequences=preprocess_stop_sequences(stop_sequences),
# top_k=top_k,
# top_p=top_p,
# )
# model_name = "gemini-1.5-flash" # if image_prompt is None else "gemini-pro-vision"
# model = genai.GenerativeModel(model_name)
# inputs = [text_prompt] if image_prompt is None else [text_prompt, preprocess_image(image_prompt)]
# response = model.generate_content(inputs, stream=True, generation_config=generation_config)
# response.resolve()
# chatbot[-1][1] = ""
# for chunk in response:
# for i in range(0, len(chunk.text), 10):
# chatbot[-1][1] += chunk.text[i:i + 10]
# time.sleep(0.01)
# yield chatbot
# google_key_component = gr.Textbox(
# label = "GOOGLE API KEY",
# type = "password",
# placeholder = "...",
# visible = GEMINI_API_KEY is None
# )
# image_prompt_component = gr.Image(
# type = "pil",
# label = "Image"
# )
# chatbot_component = gr.Chatbot(
# # label = 'Gemini',
# bubble_full_width = False
# )
# text_prompt_component = gr.Textbox(
# placeholder = "Chat with Gemini",
# label = "Ask me anything and press Enter"
# )
# run_button_component = gr.Button(
# "Run"
# )
# temperature_component = gr.Slider(
# minimum = 0,
# maximum = 1.0,
# value = 0.5,
# step = 0.05,
# label = "Temperature"
# )
# max_output_tokens_component = gr.Slider(
# minimum = 1,
# maximum = 8192,
# value = 4096,
# step = 1,
# label = "Max Output Tokens"
# )
# stop_sequences_component = gr.Textbox(
# label = "Add stop sequence",
# placeholder = "STOP, END"
# )
# top_k_component = gr.Slider(
# minimum = 1,
# maximum = 40,
# value = 32,
# step = 1,
# label = "Top-K"
# )
# top_p_component = gr.Slider(
# minimum = 0,
# maximum = 1,
# value = 1,
# step = 0.01,
# label = "Top-P"
# )
# user_inputs = [
# text_prompt_component,
# chatbot_component
# ]
# bot_inputs = [
# google_key_component,
# image_prompt_component,
# temperature_component,
# max_output_tokens_component,
# stop_sequences_component,
# top_k_component,
# top_p_component,
# chatbot_component
# ]
# with gr.Blocks(theme = gr.themes.Soft()) as demo:
# with gr.Tab("Chat with Gemini 1.5 Flash"):
# gr.HTML(TITLE)
# with gr.Row():
# gr.Markdown(NOTICE)
# gr.Markdown(ABOUT)
# gr.Markdown(ERRORS)
# gr.Markdown(FUTURE_IMPLEMENTATIONS)
# with gr.Column():
# google_key_component.render()
# with gr.Row():
# image_prompt_component.render()
# chatbot_component.render()
# text_prompt_component.render()
# run_button_component.render()
# with gr.Accordion("Parameters", open=False):
# temperature_component.render()
# max_output_tokens_component.render()
# stop_sequences_component.render()
# with gr.Accordion("Advanced", open=False):
# top_k_component.render()
# top_p_component.render()
# run_button_component.click(
# fn = user,
# inputs = user_inputs,
# outputs = [
# text_prompt_component,
# chatbot_component
# ],
# queue = False
# ).then(
# fn = bot,
# inputs = bot_inputs,
# outputs = [
# chatbot_component
# ]
# )
# text_prompt_component.submit(
# fn = user,
# inputs = user_inputs,
# outputs = [
# text_prompt_component,
# chatbot_component
# ],
# queue = False
# ).then(
# fn = bot,
# inputs = bot_inputs,
# outputs = [
# chatbot_component
# ]
# )
# with gr.Tab("Chat with Gemma 2"):
# gr.HTML(
# """
# <h1 align="center">Still in development</h1>
# """
# )
# demo.queue().launch(debug = True, show_error = True)
"""
References:
- https://medium.com/@turna.fardousi/building-a-multimodal-chatbot-with-gemini-api-8015bfbee538
"""
import os
import time
from typing import List, Tuple, Optional
import google.generativeai as genai
import gradio as gr
from PIL import Image
from dotenv import load_dotenv
load_dotenv()
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
TITLE = """<h1 align="center">๐ŸŽฎChat with Gemini 1.5๐Ÿ”ฅ -- Beta Preview</h1>"""
NOTICE = """
Notices ๐Ÿ“œ:
- This app is still in development
- Some features may not work as expected
"""
ABOUT = """
Updates (2024-8-12): Created the App
Info:
- Model: Gemini 1.5 Flash
"""
ERRORS = """
Known errors โš ๏ธ:
"""
FUTURE_IMPLEMENTATIONS = """
To be implemented ๐Ÿš€:
- Select other Gemini / Gemma models
- Upload files
- More tools other than web search
"""
IMAGE_WIDTH = 512
def preprocess_stop_sequences(stop_sequences: str) -> Optional[List[str]]:
return [seq.strip() for seq in stop_sequences.split(",")] if stop_sequences else None
def preprocess_image(image: Image.Image) -> Image.Image:
image_height = int(image.height * IMAGE_WIDTH / image.width)
return image.resize((IMAGE_WIDTH, image_height))
def user(text_prompt: str, history: List[Tuple[str, str]]):
return text_prompt, history
def bot(
google_key: str,
image_prompt: Optional[Image.Image],
temperature: float,
max_output_tokens: int,
stop_sequences: str,
top_k: int,
top_p: float,
history: List[Tuple[str, str]]
):
google_key = google_key or GEMINI_API_KEY
if not google_key:
raise ValueError("GOOGLE_API_KEY is not set. Please set it up.")
text_prompt = history[-1][0]
genai.configure(api_key=google_key)
generation_config = genai.types.GenerationConfig(
temperature=temperature,
max_output_tokens=max_output_tokens,
stop_sequences=preprocess_stop_sequences(stop_sequences),
top_k=top_k,
top_p=top_p,
)
model_name = "gemini-1.5-flash"
model = genai.GenerativeModel(model_name)
inputs = [text_prompt] if image_prompt is None else [text_prompt, preprocess_image(image_prompt)]
response = model.generate_content(inputs, stream=True, generation_config=generation_config)
response.resolve()
output_text = ""
for chunk in response:
for i in range(0, len(chunk.text), 10):
output_text += chunk.text[i:i + 10]
time.sleep(0.01)
yield history + [(text_prompt, output_text)]
google_key_component = gr.Textbox(
label = "GOOGLE API KEY",
type = "password",
placeholder = "...",
visible = GEMINI_API_KEY is None
)
image_prompt_component = gr.Image(
type = "pil",
label = "Image"
)
text_prompt_component = gr.Textbox(
placeholder = "Chat with Gemini",
label = "Ask me anything and press Enter"
)
temperature_component = gr.Slider(
minimum = 0,
maximum = 1.0,
value = 0.5,
step = 0.05,
label = "Temperature"
)
max_output_tokens_component = gr.Slider(
minimum = 1,
maximum = 8192,
value = 4096,
step = 1,
label = "Max Output Tokens"
)
stop_sequences_component = gr.Textbox(
label = "Add stop sequence",
placeholder = "STOP, END"
)
top_k_component = gr.Slider(
minimum = 1,
maximum = 40,
value = 32,
step = 1,
label = "Top-K"
)
top_p_component = gr.Slider(
minimum = 0,
maximum = 1,
value = 1,
step = 0.01,
label = "Top-P"
)
user_inputs = [
text_prompt_component,
gr.State([])
]
bot_inputs = [
google_key_component,
image_prompt_component,
temperature_component,
max_output_tokens_component,
stop_sequences_component,
top_k_component,
top_p_component,
gr.State([])
]
with gr.Blocks(theme = gr.themes.Soft()) as demo:
with gr.Tab("Chat with Gemini 1.5 Flash"):
gr.HTML(TITLE)
with gr.Row():
gr.Markdown(NOTICE)
gr.Markdown(ABOUT)
gr.Markdown(ERRORS)
gr.Markdown(FUTURE_IMPLEMENTATIONS)
with gr.Column():
google_key_component.render()
with gr.Row():
image_prompt_component.render()
text_prompt_component.render()
with gr.Accordion("Parameters", open=False):
temperature_component.render()
max_output_tokens_component.render()
stop_sequences_component.render()
with gr.Accordion("Advanced", open=False):
top_k_component.render()
top_p_component.render()
chat_interface = gr.ChatInterface(
fn=bot,
user_fn=user,
inputs=bot_inputs,
outputs="chatbot",
submit=text_prompt_component,
state="chatbot",
queue=False
)
with gr.Tab("Chat with Gemma 2"):
gr.HTML(
"""
<h1 align="center">Still in development</h1>
"""
)
demo.queue().launch(debug = True, show_error = True)