Spaces:
Sleeping
Sleeping
import gradio as gr | |
import base64 | |
import os | |
import re | |
from io import BytesIO | |
from PIL import Image | |
from huggingface_hub import InferenceClient | |
from mistralai import Mistral | |
from feifeilib.feifeichat import feifeichat # Assuming this utility is still relevant or replace with SmartDocAnalyzer logic as needed. | |
# Initialize Hugging Face inference clients | |
client = InferenceClient(api_key=os.getenv('HF_TOKEN')) | |
client.headers["x-use-cache"] = "0" | |
api_key = os.getenv("MISTRAL_API_KEY") | |
Mistralclient = Mistral(api_key=api_key) | |
# Gradio interface setup for SmartDocAnalyzer | |
SmartDocAnalyzer = gr.ChatInterface( | |
feifeichat, # This should be replaced with a suitable function for SmartDocAnalyzer if needed. | |
type="messages", | |
multimodal=True, | |
additional_inputs=[ | |
gr.Checkbox(label="Enable Analyzer Mode", value=True), | |
gr.Dropdown( | |
[ | |
"meta-llama/Llama-3.3-70B-Instruct", | |
"CohereForAI/c4ai-command-r-plus-08-2024", | |
"Qwen/Qwen2.5-72B-Instruct", | |
"nvidia/Llama-3.1-Nemotron-70B-Instruct-HF", | |
"NousResearch/Hermes-3-Llama-3.1-8B", | |
"mistralai/Mistral-Nemo-Instruct-2411", | |
"microsoft/phi-4" | |
], | |
value="mistralai/Mistral-Nemo-Instruct-2411", | |
show_label=False, | |
container=False | |
), | |
gr.Radio( | |
["pixtral", "Vision"], | |
value="pixtral", | |
show_label=False, | |
container=False | |
) | |
], | |
title="SmartDocAnalyzer", | |
description="An advanced document analysis tool powered by AI." | |
) | |
SmartDocAnalyzer.launch() | |
def encode_image(image_path): | |
""" | |
Encode the image at the given path to a base64 JPEG. | |
Resizes image height to 512 pixels while maintaining aspect ratio. | |
""" | |
try: | |
image = Image.open(image_path).convert("RGB") | |
base_height = 512 | |
h_percent = (base_height / float(image.size[1])) | |
w_size = int((float(image.size[0]) * float(h_percent))) | |
image = image.resize((w_size, base_height), Image.LANCZOS) | |
buffered = BytesIO() | |
image.save(buffered, format="JPEG") | |
return base64.b64encode(buffered.getvalue()).decode("utf-8") | |
except FileNotFoundError: | |
print(f"Error: The file {image_path} was not found.") | |
except Exception as e: | |
print(f"Error: {e}") | |
return None | |
def feifeiprompt(feifei_select=True, message_text="", history=""): | |
""" | |
Constructs a prompt for the chatbot based on message text and history. | |
Enhancements for SmartDocAnalyzer context can be added here. | |
""" | |
input_prompt = [] | |
# Special handling for drawing requests | |
if message_text.startswith("画") or message_text.startswith("draw"): | |
feifei_photo = ( | |
"You are FeiFei. Background: FeiFei was born in Tokyo and is a natural-born photographer, " | |
"hailing from a family with a long history in photography... [truncated for brevity]" | |
) | |
message_text = message_text.replace("画", "").replace("draw", "") | |
message_text = f"提示词是'{message_text}',根据提示词帮我生成一张高质量照片的一句话英文回复" | |
system_prompt = {"role": "system", "content": feifei_photo} | |
user_input_part = {"role": "user", "content": str(message_text)} | |
return [system_prompt, user_input_part] | |
# Default prompt construction for FeiFei character | |
if feifei_select: | |
feifei = ( | |
"[Character Name]: Aifeifei (AI Feifei) [Gender]: Female [Age]: 19 years old ... " | |
"[Identity]: User's virtual girlfriend" | |
) | |
system_prompt = {"role": "system", "content": feifei} | |
user_input_part = {"role": "user", "content": str(message_text)} | |
pattern = re.compile(r"gradio") | |
if history: | |
history = [item for item in history if not pattern.search(str(item["content"]))] | |
input_prompt = [system_prompt] + history + [user_input_part] | |
else: | |
input_prompt = [system_prompt, user_input_part] | |
else: | |
input_prompt = [{"role": "user", "content": str(message_text)}] | |
return input_prompt | |
def feifeiimgprompt(message_files, message_text, image_mod): | |
""" | |
Handles image-based prompts for either 'Vision' or 'pixtral' modes. | |
""" | |
message_file = message_files[0] | |
base64_image = encode_image(message_file) | |
if base64_image is None: | |
return | |
# Vision mode using meta-llama model | |
if image_mod == "Vision": | |
messages = [{ | |
"role": "user", | |
"content": [ | |
{"type": "text", "text": message_text}, | |
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}} | |
] | |
}] | |
stream = client.chat.completions.create( | |
model="meta-llama/Llama-3.2-11B-Vision-Instruct", | |
messages=messages, | |
max_tokens=500, | |
stream=True | |
) | |
temp = "" | |
for chunk in stream: | |
if chunk.choices[0].delta.content is not None: | |
temp += chunk.choices[0].delta.content | |
yield temp | |
# Pixtral mode using Mistral model | |
else: | |
model = "pixtral-large-2411" | |
messages = [{ | |
"role": "user", | |
"content": [ | |
{"type": "text", "text": message_text}, | |
{"type": "image_url", "image_url": f"data:image/jpeg;base64,{base64_image}"} | |
] | |
}] | |
partial_message = "" | |
for chunk in Mistralclient.chat.stream(model=model, messages=messages): | |
if chunk.data.choices[0].delta.content is not None: | |
partial_message += chunk.data.choices[0].delta.content | |
yield partial_message | |
def feifeichatmod(additional_dropdown, input_prompt): | |
""" | |
Chooses the appropriate chat model based on the dropdown selection. | |
""" | |
if additional_dropdown == "mistralai/Mistral-Nemo-Instruct-2411": | |
model = "mistral-large-2411" | |
stream_response = Mistralclient.chat.stream(model=model, messages=input_prompt) | |
partial_message = "" | |
for chunk in stream_response: | |
if chunk.data.choices[0].delta.content is not None: | |
partial_message += chunk.data.choices[0].delta.content | |
yield partial_message | |
else: | |
stream = client.chat.completions.create( | |
model=additional_dropdown, | |
messages=input_prompt, | |
temperature=0.5, | |
max_tokens=1024, | |
top_p=0.7, | |
stream=True | |
) | |
temp = "" | |
for chunk in stream: | |
if chunk.choices[0].delta.content is not None: | |
temp += chunk.choices[0].delta.content | |
yield temp | |
def feifeichat(message, history, feifei_select, additional_dropdown, image_mod): | |
""" | |
Main chat function that decides between image-based and text-based handling. | |
This function can be further enhanced for SmartDocAnalyzer-specific logic. | |
""" | |
message_text = message.get("text", "") | |
message_files = message.get("files", []) | |
if message_files: | |
# Process image input | |
yield from feifeiimgprompt(message_files, message_text, image_mod) | |
else: | |
# Process text input | |
input_prompt = feifeiprompt(feifei_select, message_text, history) | |
yield from feifeichatmod(additional_dropdown, input_prompt) | |
# Enhancement Note: | |
# For the SmartDocAnalyzer space, consider integrating document parsing, | |
# OCR functionalities, semantic analysis of documents, and more advanced | |
# error handling as needed. This template serves as a starting point. | |