Spaces:
Sleeping
Sleeping
Commit
·
210b4bb
1
Parent(s):
7a3eff7
added
Browse files
app.py
CHANGED
@@ -97,20 +97,24 @@ def extract_products(text):
|
|
97 |
|
98 |
def extract_with_perplexity_llm(ocr_text):
|
99 |
prompt = f"""
|
100 |
-
|
|
|
|
|
101 |
- name (customer name)
|
102 |
-
-
|
103 |
-
-
|
104 |
-
-
|
105 |
-
-
|
|
|
|
|
106 |
|
107 |
Example output:
|
108 |
{{
|
109 |
-
"name": "
|
110 |
-
"product": "airpods",
|
111 |
-
"price": "579.18",
|
112 |
"date": "12/13/2024",
|
113 |
-
"
|
|
|
|
|
114 |
}}
|
115 |
|
116 |
Text:
|
@@ -229,27 +233,19 @@ def main():
|
|
229 |
llm_json = extract_json_from_llm_output(llm_result)
|
230 |
if llm_json:
|
231 |
llm_data = json.loads(llm_json)
|
|
|
|
|
|
|
|
|
|
|
|
|
232 |
except Exception as e:
|
233 |
st.error(f"Failed to parse LLM output as JSON: {e}")
|
234 |
|
235 |
-
#
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
st.subheader("Final Extracted Fields (Merged)")
|
240 |
-
st.json(final_data)
|
241 |
-
|
242 |
-
# Save to DynamoDB
|
243 |
-
try:
|
244 |
-
save_to_dynamodb(final_data)
|
245 |
-
st.success("Saved to DynamoDB!")
|
246 |
-
except Exception as e:
|
247 |
-
st.error(f"Failed to save to DynamoDB: {e}")
|
248 |
-
|
249 |
-
# Display extracted products
|
250 |
-
if "products" in final_data and final_data["products"]:
|
251 |
-
st.subheader("Products (Final Extracted)")
|
252 |
-
st.dataframe(pd.DataFrame(final_data["products"]))
|
253 |
|
254 |
except Exception as e:
|
255 |
logger.error(f"Error processing document: {str(e)}")
|
|
|
97 |
|
98 |
def extract_with_perplexity_llm(ocr_text):
|
99 |
prompt = f"""
|
100 |
+
You are an expert at extracting structured data from receipts.
|
101 |
+
|
102 |
+
From the following OCR text, extract these fields and return them as a flat JSON object with exactly these keys:
|
103 |
- name (customer name)
|
104 |
+
- date (date of purchase)
|
105 |
+
- amount_paid (total amount paid, or price if only one product)
|
106 |
+
- receipt_no (receipt number)
|
107 |
+
- product (the main product name, as a string; if multiple products, pick the most expensive or the only one)
|
108 |
+
|
109 |
+
**Note:** If the receipt has only one product, set 'product' to its name and 'amount_paid' to its price. If there is a 'price' and an 'amount paid', treat them as the same if they are equal.
|
110 |
|
111 |
Example output:
|
112 |
{{
|
113 |
+
"name": "Mrs. Genevieve Lopez",
|
|
|
|
|
114 |
"date": "12/13/2024",
|
115 |
+
"amount_paid": 579.18,
|
116 |
+
"receipt_no": "042085",
|
117 |
+
"product": "Wireless Airpods"
|
118 |
}}
|
119 |
|
120 |
Text:
|
|
|
233 |
llm_json = extract_json_from_llm_output(llm_result)
|
234 |
if llm_json:
|
235 |
llm_data = json.loads(llm_json)
|
236 |
+
# Save to DynamoDB
|
237 |
+
try:
|
238 |
+
save_to_dynamodb(llm_data)
|
239 |
+
st.success("Saved to DynamoDB!")
|
240 |
+
except Exception as e:
|
241 |
+
st.error(f"Failed to save to DynamoDB: {e}")
|
242 |
except Exception as e:
|
243 |
st.error(f"Failed to parse LLM output as JSON: {e}")
|
244 |
|
245 |
+
# Display extracted products if present
|
246 |
+
if "products" in llm_data and llm_data["products"]:
|
247 |
+
st.subheader("Products (LLM Extracted)")
|
248 |
+
st.dataframe(pd.DataFrame(llm_data["products"]))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
249 |
|
250 |
except Exception as e:
|
251 |
logger.error(f"Error processing document: {str(e)}")
|