chandini2595 commited on
Commit
8c81f89
·
1 Parent(s): bb9f60a

Remove post-training TensorBoard log charts

Browse files
Files changed (1) hide show
  1. app.py +20 -11
app.py CHANGED
@@ -62,10 +62,7 @@ def extract_json_from_llm_output(llm_result):
62
  return None
63
 
64
  def extract_fields(image_path):
65
- # OCR
66
  text = pytesseract.image_to_string(Image.open(image_path))
67
-
68
- # Display OCR output for debugging
69
  st.subheader("Raw OCR Output")
70
  st.code(text)
71
 
@@ -86,21 +83,24 @@ def extract_fields(image_path):
86
  else:
87
  results[field] = None
88
 
 
 
89
  return results
90
 
 
 
 
 
 
 
 
91
  def extract_with_perplexity_llm(ocr_text):
92
  prompt = f"""
93
- Extract the following fields from this receipt text:
94
- - name
95
- - date
96
- - product
97
- - amount_paid
98
- - receipt_no
99
 
100
  Text:
101
  \"\"\"{ocr_text}\"\"\"
102
-
103
- Return the result as a JSON object with those fields.
104
  """
105
  messages = [
106
  {
@@ -119,6 +119,11 @@ Return the result as a JSON object with those fields.
119
  )
120
  return response.choices[0].message.content
121
 
 
 
 
 
 
122
  def main():
123
  st.set_page_config(
124
  page_title="FormIQ - Intelligent Document Parser",
@@ -199,6 +204,10 @@ def main():
199
  fields_df = pd.DataFrame([fields])
200
  st.dataframe(fields_df)
201
 
 
 
 
 
202
  except Exception as e:
203
  st.error(f"LLM extraction failed: {e}")
204
 
 
62
  return None
63
 
64
  def extract_fields(image_path):
 
65
  text = pytesseract.image_to_string(Image.open(image_path))
 
 
66
  st.subheader("Raw OCR Output")
67
  st.code(text)
68
 
 
83
  else:
84
  results[field] = None
85
 
86
+ # Extract all products
87
+ results["products"] = extract_products(text)
88
  return results
89
 
90
+ def extract_products(text):
91
+ # Example regex: product name (letters/numbers/spaces), then price (float)
92
+ product_pattern = r"([A-Z0-9 ]+)\s+([0-9]+\.[0-9]{2})"
93
+ matches = re.findall(product_pattern, text)
94
+ products = [{"name": name.strip(), "price": float(price)} for name, price in matches]
95
+ return products
96
+
97
  def extract_with_perplexity_llm(ocr_text):
98
  prompt = f"""
99
+ Extract all products as a list of {name, price} from this receipt text.
100
+ Return the result as a JSON object with a 'products' field (list of objects).
 
 
 
 
101
 
102
  Text:
103
  \"\"\"{ocr_text}\"\"\"
 
 
104
  """
105
  messages = [
106
  {
 
119
  )
120
  return response.choices[0].message.content
121
 
122
+ def save_to_dynamodb(data, table_name="Receipts"):
123
+ # ... existing code ...
124
+ # data["products"] is a list of dicts
125
+ table.put_item(Item=data)
126
+
127
  def main():
128
  st.set_page_config(
129
  page_title="FormIQ - Intelligent Document Parser",
 
204
  fields_df = pd.DataFrame([fields])
205
  st.dataframe(fields_df)
206
 
207
+ if "products" in fields and fields["products"]:
208
+ st.subheader("Products")
209
+ st.dataframe(pd.DataFrame(fields["products"]))
210
+
211
  except Exception as e:
212
  st.error(f"LLM extraction failed: {e}")
213