import gradio as gr
import PyPDF2
import pytesseract
from PIL import Image

# Ensure Tesseract is installed and accessible
# On Windows, you may need to specify the Tesseract path:
# pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

def process_file(file):
    if file is None:
        return "No file uploaded."
    
    content = ""
    if file.name.endswith(".txt"):
        # Read text files
        with open(file.name, "r") as f:
            content = f.read()
    elif file.name.endswith(".pdf"):
        # Extract text from PDFs
        reader = PyPDF2.PdfReader(file.name)
        for page in reader.pages:
            content += page.extract_text()
    elif file.name.endswith((".png", ".jpg", ".jpeg")):
        # Extract text from images using OCR
        image = Image.open(file.name)
        content = pytesseract.image_to_string(image)
    else:
        return f"Unsupported file type: {file.name}"
    
    # Simulate passing the content to the phi-4 model
    model_response = f"Processed file content:\n{content}"
    return model_response

with gr.Blocks(fill_height=True) as demo:
    with gr.Sidebar():
        gr.Markdown("# Inference Provider")
        gr.Markdown("This Space showcases the microsoft/phi-4 model, served by the nebius API. Sign in with your Hugging Face account to use this API.")
        button = gr.LoginButton("Sign in")
    
    with gr.Column():
        # Load the phi-4 model
        model = gr.load("models/microsoft/phi-4", accept_token=button, provider="nebius")
        
        # File upload component
        file_input = gr.File(label="Upload a file (TXT, PDF, or Image)")
        
        # Output component to display model response
        file_output = gr.Textbox(label="Model Response", lines=10)
    
    # Connect the file upload to the processing function
    file_input.change(process_file, inputs=file_input, outputs=file_output)

demo.launch()