import base64 import os from datetime import datetime from openai import OpenAI import gradio as gr # === Initialize OpenAI Client using Environment Variable === openai_api_key = os.environ.get("OPENAI_API_KEY") if not openai_api_key: raise ValueError("OPENAI_API_KEY environment variable is not set.") client = OpenAI(api_key=openai_api_key) # === Prompts === system_prompt = ( "You are a detail-oriented assistant that specializes in transcribing and polishing " "handwritten notes from images. Your goal is to turn rough, casual, or handwritten " "content into clean, structured, and professional-looking text that sounds like it " "was written by a human—not an AI. You do not include icons, emojis, or suggest next " "steps unless explicitly instructed." ) user_prompt_template = ( "You will receive an image of handwritten notes. Transcribe the content accurately, " "correcting any spelling or grammar issues. Then, organize it clearly with headings, " "bullet points, and proper formatting. Maintain the original intent and voice of the " "author, but enhance readability and flow. Do not add embellishments or AI-style phrasing." ) # === Image processing === def encode_image_to_base64(image_file): image_bytes = image_file.read() return base64.b64encode(image_bytes).decode("utf-8") # === Transcription function === def transcribe_images(files): if not files: return "No images uploaded." results = [] for file in files: encoded_image = encode_image_to_base64(file) image_url = f"data:image/jpeg;base64,{encoded_image}" response = client.chat.completions.create( model="gpt-4-turbo", messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": [ {"type": "text", "text": user_prompt_template}, {"type": "image_url", "image_url": {"url": image_url}} ]} ], max_tokens=1500 ) timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") result_text = f"🗓️ Transcribed on: {timestamp}\n\n{response.choices[0].message.content}" results.append(result_text) return "\n\n---\n\n".join(results) # === Gradio Interface using UploadButton === with gr.Blocks() as app: with gr.Row(): uploader = gr.UploadButton( label="Upload handwritten note images", file_types=[".jpg", ".jpeg", ".png"], multifile=True ) output_box = gr.Textbox(label="Transcribed Output", lines=30) uploader.change(fn=transcribe_images, inputs=uploader, outputs=output_box) # === Launch === if __name__ == "__main__": app.launch()