AkashDataScience commited on
Commit
eb1ac6d
·
1 Parent(s): dc7c4f3

Adding embeddings

Browse files
Files changed (1) hide show
  1. app.py +21 -0
app.py CHANGED
@@ -1,15 +1,36 @@
 
1
  import easyocr
2
  import gradio as gr
3
  from PIL import Image
 
 
 
 
4
 
5
  reader = easyocr.Reader(['en'])
6
 
 
 
 
 
 
 
 
7
  def inference(img_path, width_ths):
8
  output = reader.readtext(img_path, detail=0, slope_ths=0.7, ycenter_ths=0.9,
9
  height_ths=0.8, width_ths=width_ths, add_margin=0.2)
10
 
11
  output = "\n".join(output)
12
 
 
 
 
 
 
 
 
 
 
13
  return output
14
 
15
  title = "Receipt RAG"
 
1
+ import os
2
  import easyocr
3
  import gradio as gr
4
  from PIL import Image
5
+ from llama_index.core import Settings
6
+ from llama_index.llms.gemini import Gemini
7
+ from llama_index.core import Document, VectorStoreIndex
8
+ from llama_index.embeddings.gemini import GeminiEmbedding
9
 
10
  reader = easyocr.Reader(['en'])
11
 
12
+ llm = Gemini(api_key=os.getenv('GEMINI_API_KEY'), model_name="models/gemini-2.0-flash")
13
+ gemini_embedding_model = GeminiEmbedding(api_key=os.getenv('GEMINI_API_KEY'), model_name="models/embedding-001")
14
+
15
+ # Set Global settings
16
+ Settings.llm = llm
17
+ Settings.embed_model = gemini_embedding_model
18
+
19
  def inference(img_path, width_ths):
20
  output = reader.readtext(img_path, detail=0, slope_ths=0.7, ycenter_ths=0.9,
21
  height_ths=0.8, width_ths=width_ths, add_margin=0.2)
22
 
23
  output = "\n".join(output)
24
 
25
+ # create a Document object from the extracted text
26
+ doc = Document(text = output)
27
+
28
+ # Create an index from the documents and save it to the disk.
29
+ index = VectorStoreIndex.from_documents([doc])
30
+
31
+ # save the index
32
+ index.storage_context.persist(persist_dir = "./receiptsembeddings")
33
+
34
  return output
35
 
36
  title = "Receipt RAG"