Spaces:

chandinisaisri
/

formiq

Sleeping

App Files Files Community

chandini2595 commited on 24 days ago

Commit

210b4bb

1 Parent(s): 7a3eff7

added

Browse files

Files changed (1) hide show

app.py +23 -27

app.py CHANGED Viewed

@@ -97,20 +97,24 @@ def extract_products(text):
 def extract_with_perplexity_llm(ocr_text):
     prompt = f"""
-Extract the following fields from this receipt text and return them as a single flat JSON object (not a list or nested object):
 - name (customer name)
-- product (main product name)
-- price (main product price)
-- date
-- receipt_no
 Example output:
 {{
-  "name": "ABS",
-  "product": "airpods",
-  "price": "579.18",
   "date": "12/13/2024",
-  "receipt_no": "042085"
 }}
 Text:
@@ -229,27 +233,19 @@ def main():
                         llm_json = extract_json_from_llm_output(llm_result)
                         if llm_json:
                             llm_data = json.loads(llm_json)
                     except Exception as e:
                         st.error(f"Failed to parse LLM output as JSON: {e}")
-                    # Merge results
-                    final_data = merge_extractions(fields, llm_data)
-                    # Display merged fields
-                    st.subheader("Final Extracted Fields (Merged)")
-                    st.json(final_data)
-                    # Save to DynamoDB
-                    try:
-                        save_to_dynamodb(final_data)
-                        st.success("Saved to DynamoDB!")
-                    except Exception as e:
-                        st.error(f"Failed to save to DynamoDB: {e}")
-                    # Display extracted products
-                    if "products" in final_data and final_data["products"]:
-                        st.subheader("Products (Final Extracted)")
-                        st.dataframe(pd.DataFrame(final_data["products"]))
                 except Exception as e:
                     logger.error(f"Error processing document: {str(e)}")

 def extract_with_perplexity_llm(ocr_text):
     prompt = f"""
+You are an expert at extracting structured data from receipts.
+From the following OCR text, extract these fields and return them as a flat JSON object with exactly these keys:
 - name (customer name)
+- date (date of purchase)
+- amount_paid (total amount paid, or price if only one product)
+- receipt_no (receipt number)
+- product (the main product name, as a string; if multiple products, pick the most expensive or the only one)
+**Note:** If the receipt has only one product, set 'product' to its name and 'amount_paid' to its price. If there is a 'price' and an 'amount paid', treat them as the same if they are equal.
 Example output:
 {{
+  "name": "Mrs. Genevieve Lopez",
   "date": "12/13/2024",
+  "amount_paid": 579.18,
+  "receipt_no": "042085",
+  "product": "Wireless Airpods"
 }}
 Text:
                         llm_json = extract_json_from_llm_output(llm_result)
                         if llm_json:
                             llm_data = json.loads(llm_json)
+                            # Save to DynamoDB
+                            try:
+                                save_to_dynamodb(llm_data)
+                                st.success("Saved to DynamoDB!")
+                            except Exception as e:
+                                st.error(f"Failed to save to DynamoDB: {e}")
                     except Exception as e:
                         st.error(f"Failed to parse LLM output as JSON: {e}")
+                    # Display extracted products if present
+                    if "products" in llm_data and llm_data["products"]:
+                        st.subheader("Products (LLM Extracted)")
+                        st.dataframe(pd.DataFrame(llm_data["products"]))
                 except Exception as e:
                     logger.error(f"Error processing document: {str(e)}")