Spaces:

AkashDataScience
/

ReceiptRAG

Sleeping

AkashDataScience commited on Mar 14

Commit

eb1ac6d

1 Parent(s): dc7c4f3

Adding embeddings

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,15 +1,36 @@
 import easyocr
 import gradio as gr
 from PIL import Image
 reader = easyocr.Reader(['en'])
 def inference(img_path, width_ths):
     output = reader.readtext(img_path, detail=0, slope_ths=0.7, ycenter_ths=0.9,
                           height_ths=0.8, width_ths=width_ths, add_margin=0.2)
     output = "\n".join(output)
     return output
 title = "Receipt RAG"

+import os
 import easyocr
 import gradio as gr
 from PIL import Image
+from llama_index.core import Settings
+from llama_index.llms.gemini import Gemini
+from llama_index.core import Document, VectorStoreIndex
+from llama_index.embeddings.gemini import GeminiEmbedding
 reader = easyocr.Reader(['en'])
+llm = Gemini(api_key=os.getenv('GEMINI_API_KEY'), model_name="models/gemini-2.0-flash")
+gemini_embedding_model = GeminiEmbedding(api_key=os.getenv('GEMINI_API_KEY'), model_name="models/embedding-001")
+# Set Global settings
+Settings.llm = llm
+Settings.embed_model = gemini_embedding_model
 def inference(img_path, width_ths):
     output = reader.readtext(img_path, detail=0, slope_ths=0.7, ycenter_ths=0.9,
                           height_ths=0.8, width_ths=width_ths, add_margin=0.2)
     output = "\n".join(output)
+    # create a Document object from the extracted text
+    doc = Document(text = output)
+    # Create an index from the documents and save it to the disk.
+    index = VectorStoreIndex.from_documents([doc])
+    # save the index
+    index.storage_context.persist(persist_dir = "./receiptsembeddings")
     return output
 title = "Receipt RAG"