Spaces:

bensheng
/

ocr

Runtime error

ocr

File size: 2,101 Bytes

00ab1fc
 
5bf3ded
00ab1fc
5bf3ded
 
00ab1fc
5bf3ded
 
 
00ab1fc
 
 
5bf3ded
00ab1fc
 
 
 
 
 
 
 
 
 
 
 
5bf3ded
 
 
 
 
 
00ab1fc
 
5bf3ded
00ab1fc
5bf3ded
 
 
00ab1fc
 
 
 
5bf3ded
00ab1fc
 
 
5bf3ded
00ab1fc
5bf3ded
 
 
 
 
00ab1fc
 
5bf3ded
00ab1fc
5bf3ded
 
 
00ab1fc

import gradio as gr
from huggingface_hub import InferenceClient
import base64

# 更新为 MiniCPM-Llama3-V-2_5 模型
client = InferenceClient("openbmb/MiniCPM-Llama3-V-2_5")

def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

def respond(
    message,
    image,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    messages = [{"role": "system", "content": system_message}]
    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})
    
    # 处理图片输入
    if image:
        base64_image = encode_image(image.name)
        image_message = f"<image>{base64_image}</image>"
        message = image_message + "\n" + message

    messages.append({"role": "user", "content": message})
    
    response = ""
    for message in client.text_generation(
        prompt=f"{messages}",
        max_new_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
    ):
        token = message.token.text
        response += token
        yield response

demo = gr.Interface(
    respond,
    inputs=[
        gr.Textbox(label="Message"),
        gr.Image(type="filepath", label="Upload Image"),
        gr.State([]),  # for history
        gr.Textbox(value="You are a friendly AI assistant capable of understanding images and text.", label="System message"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
    ],
    outputs=gr.Textbox(label="Response"),
    title="MiniCPM-Llama3-V-2_5 Image and Text Chat",
    description="Upload an image and ask questions about it, or just chat without an image."
)

if __name__ == "__main__":
    demo.launch()