chandini2595 commited on
Commit
210b4bb
·
1 Parent(s): 7a3eff7
Files changed (1) hide show
  1. app.py +23 -27
app.py CHANGED
@@ -97,20 +97,24 @@ def extract_products(text):
97
 
98
  def extract_with_perplexity_llm(ocr_text):
99
  prompt = f"""
100
- Extract the following fields from this receipt text and return them as a single flat JSON object (not a list or nested object):
 
 
101
  - name (customer name)
102
- - product (main product name)
103
- - price (main product price)
104
- - date
105
- - receipt_no
 
 
106
 
107
  Example output:
108
  {{
109
- "name": "ABS",
110
- "product": "airpods",
111
- "price": "579.18",
112
  "date": "12/13/2024",
113
- "receipt_no": "042085"
 
 
114
  }}
115
 
116
  Text:
@@ -229,27 +233,19 @@ def main():
229
  llm_json = extract_json_from_llm_output(llm_result)
230
  if llm_json:
231
  llm_data = json.loads(llm_json)
 
 
 
 
 
 
232
  except Exception as e:
233
  st.error(f"Failed to parse LLM output as JSON: {e}")
234
 
235
- # Merge results
236
- final_data = merge_extractions(fields, llm_data)
237
-
238
- # Display merged fields
239
- st.subheader("Final Extracted Fields (Merged)")
240
- st.json(final_data)
241
-
242
- # Save to DynamoDB
243
- try:
244
- save_to_dynamodb(final_data)
245
- st.success("Saved to DynamoDB!")
246
- except Exception as e:
247
- st.error(f"Failed to save to DynamoDB: {e}")
248
-
249
- # Display extracted products
250
- if "products" in final_data and final_data["products"]:
251
- st.subheader("Products (Final Extracted)")
252
- st.dataframe(pd.DataFrame(final_data["products"]))
253
 
254
  except Exception as e:
255
  logger.error(f"Error processing document: {str(e)}")
 
97
 
98
  def extract_with_perplexity_llm(ocr_text):
99
  prompt = f"""
100
+ You are an expert at extracting structured data from receipts.
101
+
102
+ From the following OCR text, extract these fields and return them as a flat JSON object with exactly these keys:
103
  - name (customer name)
104
+ - date (date of purchase)
105
+ - amount_paid (total amount paid, or price if only one product)
106
+ - receipt_no (receipt number)
107
+ - product (the main product name, as a string; if multiple products, pick the most expensive or the only one)
108
+
109
+ **Note:** If the receipt has only one product, set 'product' to its name and 'amount_paid' to its price. If there is a 'price' and an 'amount paid', treat them as the same if they are equal.
110
 
111
  Example output:
112
  {{
113
+ "name": "Mrs. Genevieve Lopez",
 
 
114
  "date": "12/13/2024",
115
+ "amount_paid": 579.18,
116
+ "receipt_no": "042085",
117
+ "product": "Wireless Airpods"
118
  }}
119
 
120
  Text:
 
233
  llm_json = extract_json_from_llm_output(llm_result)
234
  if llm_json:
235
  llm_data = json.loads(llm_json)
236
+ # Save to DynamoDB
237
+ try:
238
+ save_to_dynamodb(llm_data)
239
+ st.success("Saved to DynamoDB!")
240
+ except Exception as e:
241
+ st.error(f"Failed to save to DynamoDB: {e}")
242
  except Exception as e:
243
  st.error(f"Failed to parse LLM output as JSON: {e}")
244
 
245
+ # Display extracted products if present
246
+ if "products" in llm_data and llm_data["products"]:
247
+ st.subheader("Products (LLM Extracted)")
248
+ st.dataframe(pd.DataFrame(llm_data["products"]))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
249
 
250
  except Exception as e:
251
  logger.error(f"Error processing document: {str(e)}")