Spaces:
Sleeping
Sleeping
import base64 | |
import os | |
from datetime import datetime | |
from openai import OpenAI | |
import gradio as gr | |
import oci | |
import io | |
import re | |
import tempfile | |
from PIL import Image as PILImage | |
from collections import Counter | |
import matplotlib.pyplot as plt | |
from wordcloud import WordCloud | |
# === OpenAI API Setup === | |
openai_api_key = os.environ.get("OPENAI_API_KEY") | |
if not openai_api_key: | |
raise ValueError("OPENAI_API_KEY environment variable is not set.") | |
client = OpenAI(api_key=openai_api_key) | |
# === OCI Object Storage Setup === | |
oci_config = { | |
"user": os.environ.get("OCI_USER"), | |
"tenancy": os.environ.get("OCI_TENANCY"), | |
"fingerprint": os.environ.get("OCI_FINGERPRINT"), | |
"region": os.environ.get("OCI_REGION"), | |
"key_content": os.environ.get("OCI_PRIVATE_KEY") | |
} | |
namespace = os.environ.get("OCI_NAMESPACE") | |
bucket_name = os.environ.get("OCI_BUCKET_NAME") | |
try: | |
object_storage = oci.object_storage.ObjectStorageClient(oci_config) | |
except Exception as e: | |
print("Failed to initialize OCI Object Storage client:", e) | |
# === Prompts === | |
system_prompt = ( | |
"You are a detail-oriented assistant that specializes in transcribing and polishing " | |
"handwritten notes from images. Your goal is to turn rough, casual, or handwritten " | |
"content into clean, structured, and professional-looking text that sounds like it " | |
"was written by a human—not an AI. You do not include icons, emojis, or suggest next " | |
"steps unless explicitly instructed." | |
) | |
user_prompt_template = ( | |
"You will receive an image of handwritten notes. Transcribe the content accurately, " | |
"correcting any spelling or grammar issues. Then, organize it clearly with headings, " | |
"bullet points, and proper formatting. Maintain the original intent and voice of the " | |
"author, but enhance readability and flow. Do not add embellishments or AI-style phrasing." | |
) | |
# === Encode uploaded bytes === | |
def encode_image_to_base64(file_bytes): | |
return base64.b64encode(file_bytes).decode("utf-8") | |
# === Upload transcription result to OCI === | |
def upload_to_object_storage(user_name, text): | |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") | |
filename = f"{user_name.replace(' ', '_')}_{timestamp}.txt" | |
object_storage.put_object( | |
namespace_name=namespace, | |
bucket_name=bucket_name, | |
object_name=filename, | |
put_object_body=text.encode("utf-8") | |
) | |
return filename | |
# === List object storage === | |
def list_object_store(): | |
try: | |
objects = object_storage.list_objects(namespace, bucket_name) | |
return [obj.name for obj in objects.data.objects if obj.name.endswith(".txt")] | |
except Exception as e: | |
return [f"Failed to list objects: {str(e)}"] | |
# === View file contents === | |
def view_transcription(file_name): | |
try: | |
response = object_storage.get_object(namespace, bucket_name, file_name) | |
return response.data.text | |
except Exception as e: | |
return f"Failed to load file: {str(e)}" | |
# === Analyze content with OpenAI === | |
def summarize_selected_files(file_list): | |
combined_text = "" | |
for name in file_list: | |
combined_text += view_transcription(name) + "\n" | |
if not combined_text.strip(): | |
return "No content found." | |
response = client.chat.completions.create( | |
model="gpt-4-turbo", | |
messages=[ | |
{"role": "system", "content": "You are a summarization expert."}, | |
{"role": "user", "content": "Please summarize the following transcriptions in detail:\n" + combined_text} | |
], | |
max_tokens=1500 | |
) | |
return response.choices[0].message.content | |
def recommend_from_selected_files(file_list): | |
combined_text = "" | |
for name in file_list: | |
combined_text += view_transcription(name) + "\n" | |
if not combined_text.strip(): | |
return "No content found." | |
response = client.chat.completions.create( | |
model="gpt-4-turbo", | |
messages=[ | |
{"role": "system", "content": "You are an operations consultant."}, | |
{"role": "user", "content": "Please recommend next steps based on these transcriptions:\n" + combined_text} | |
], | |
max_tokens=1500 | |
) | |
return response.choices[0].message.content | |
# === Generate word cloud from selected files === | |
def generate_word_map_from_files(file_list): | |
combined_text = "" | |
for name in file_list: | |
combined_text += view_transcription(name) + "\n" | |
if not combined_text.strip(): | |
return None | |
wordcloud = WordCloud(width=800, height=400, background_color='white').generate(combined_text) | |
plt.figure(figsize=(10, 5)) | |
plt.imshow(wordcloud, interpolation='bilinear') | |
plt.axis("off") | |
buf = io.BytesIO() | |
plt.savefig(buf, format="png") | |
buf.seek(0) | |
# Upload image to object storage | |
filename = f"wordcloud_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png" | |
object_storage.put_object( | |
namespace_name=namespace, | |
bucket_name=bucket_name, | |
object_name=filename, | |
put_object_body=buf.getvalue() | |
) | |
buf.seek(0) | |
return PILImage.open(buf) | |
# === Transcription logic === | |
def transcribe_image(file_bytes, user_name): | |
if not file_bytes: | |
return "No image uploaded." | |
encoded = encode_image_to_base64(file_bytes) | |
image_url = f"data:image/jpeg;base64,{encoded}" | |
response = client.chat.completions.create( | |
model="gpt-4-turbo", | |
messages=[ | |
{"role": "system", "content": system_prompt}, | |
{"role": "user", "content": [ | |
{"type": "text", "text": user_prompt_template}, | |
{"type": "image_url", "image_url": {"url": image_url}} | |
]} | |
], | |
max_tokens=1500 | |
) | |
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
result = f"🗓️ Transcribed on: {timestamp}\n\n{response.choices[0].message.content}" | |
upload_to_object_storage(user_name, result) | |
return result | |
# === Gradio Interface === | |
with gr.Blocks() as app: | |
gr.Markdown("## Handwritten Note Transcriber & Analyzer") | |
with gr.Row(): | |
user_dropdown = gr.Dropdown( | |
choices=["Jim Goodwin", "Zahabiya Ali rampurawala", "Keith Gauvin"], | |
label="Who is uploading this?" | |
) | |
input_file = gr.File(label="Upload image", type="binary", file_types=[".jpg", ".jpeg", ".png"]) | |
output_text = gr.Textbox(label="Transcription Output", lines=30) | |
input_file.change(fn=transcribe_image, inputs=[input_file, user_dropdown], outputs=output_text) | |
gr.Markdown("### List Object Store Contents") | |
gr.Button("List Object Store").click(fn=lambda: "\n".join(list_object_store()), outputs=gr.Textbox(label="Object Store Contents")) | |
gr.Markdown("### View Transcription") | |
file_selector = gr.Dropdown(choices=list_object_store(), label="Select transcription file") | |
view_output = gr.Textbox(label="File Content") | |
file_selector.change(fn=view_transcription, inputs=file_selector, outputs=view_output) | |
gr.Markdown("### Summarize or Recommend") | |
file_multiselect = gr.Dropdown(choices=list_object_store(), label="Select files to analyze", multiselect=True) | |
summary_output = gr.Textbox(label="Summary of Selected Transcriptions") | |
rec_output = gr.Textbox(label="Recommended Next Steps") | |
gr.Button("Summarize Files").click(fn=summarize_selected_files, inputs=file_multiselect, outputs=summary_output) | |
gr.Button("Recommend from Files").click(fn=recommend_from_selected_files, inputs=file_multiselect, outputs=rec_output) | |
gr.Markdown("### Word Cloud from Files") | |
wordcloud_image = gr.Image(label="Word Cloud") | |
gr.Button("Generate Word Map from Files").click( | |
fn=generate_word_map_from_files, | |
inputs=file_multiselect, | |
outputs=wordcloud_image | |
) | |
# === Launch App === | |
if __name__ == "__main__": | |
app.launch(share=True) | |