Spaces:
Running
Running
import base64 | |
import os | |
from datetime import datetime | |
from openai import OpenAI | |
import gradio as gr | |
# === Initialize OpenAI Client using Environment Variable === | |
openai_api_key = os.environ.get("OPENAI_API_KEY") | |
if not openai_api_key: | |
raise ValueError("OPENAI_API_KEY environment variable is not set.") | |
client = OpenAI(api_key=openai_api_key) | |
# === Prompts === | |
system_prompt = ( | |
"You are a detail-oriented assistant that specializes in transcribing and polishing " | |
"handwritten notes from images. Your goal is to turn rough, casual, or handwritten " | |
"content into clean, structured, and professional-looking text that sounds like it " | |
"was written by a human—not an AI. You do not include icons, emojis, or suggest next " | |
"steps unless explicitly instructed." | |
) | |
user_prompt_template = ( | |
"You will receive an image of handwritten notes. Transcribe the content accurately, " | |
"correcting any spelling or grammar issues. Then, organize it clearly with headings, " | |
"bullet points, and proper formatting. Maintain the original intent and voice of the " | |
"author, but enhance readability and flow. Do not add embellishments or AI-style phrasing." | |
) | |
# === Image processing === | |
def encode_image_to_base64(image_file): | |
image_bytes = image_file.read() | |
return base64.b64encode(image_bytes).decode("utf-8") | |
# === Transcription function === | |
def transcribe_images(files): | |
if not files: | |
return "No images uploaded." | |
results = [] | |
for file in files: | |
encoded_image = encode_image_to_base64(file) | |
image_url = f"data:image/jpeg;base64,{encoded_image}" | |
response = client.chat.completions.create( | |
model="gpt-4-turbo", | |
messages=[ | |
{"role": "system", "content": system_prompt}, | |
{"role": "user", "content": [ | |
{"type": "text", "text": user_prompt_template}, | |
{"type": "image_url", "image_url": {"url": image_url}} | |
]} | |
], | |
max_tokens=1500 | |
) | |
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
result_text = f"🗓️ Transcribed on: {timestamp}\n\n{response.choices[0].message.content}" | |
results.append(result_text) | |
return "\n\n---\n\n".join(results) | |
# === Gradio Interface using UploadButton === | |
with gr.Blocks() as app: | |
with gr.Row(): | |
uploader = gr.UploadButton( | |
label="Upload handwritten note images", | |
file_types=[".jpg", ".jpeg", ".png"], | |
file_types_multiple=True | |
) | |
output_box = gr.Textbox(label="Transcribed Output", lines=30) | |
uploader.change(fn=transcribe_images, inputs=uploader, outputs=output_box) | |
# === Launch === | |
if __name__ == "__main__": | |
app.launch() | |