import gradio as gr import PyPDF2 import pytesseract from PIL import Image # Ensure Tesseract is installed and accessible # On Windows, you may need to specify the Tesseract path: # pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe' def process_file(file): if file is None: return "No file uploaded." content = "" if file.name.endswith(".txt"): # Read text files with open(file.name, "r") as f: content = f.read() elif file.name.endswith(".pdf"): # Extract text from PDFs reader = PyPDF2.PdfReader(file.name) for page in reader.pages: content += page.extract_text() elif file.name.endswith((".png", ".jpg", ".jpeg")): # Extract text from images using OCR image = Image.open(file.name) content = pytesseract.image_to_string(image) else: return f"Unsupported file type: {file.name}" # Simulate passing the content to the phi-4 model model_response = f"Processed file content:\n{content}" return model_response with gr.Blocks(fill_height=True) as demo: with gr.Sidebar(): gr.Markdown("# Inference Provider") gr.Markdown("This Space showcases the microsoft/phi-4 model, served by the nebius API. Sign in with your Hugging Face account to use this API.") button = gr.LoginButton("Sign in") with gr.Column(): # Load the phi-4 model model = gr.load("models/microsoft/phi-4", accept_token=button, provider="nebius") # File upload component file_input = gr.File(label="Upload a file (TXT, PDF, or Image)") # Output component to display model response file_output = gr.Textbox(label="Model Response", lines=10) # Connect the file upload to the processing function file_input.change(process_file, inputs=file_input, outputs=file_output) demo.launch()