Spaces:

chandinisaisri
/

formiq

Sleeping

App Files Files Community

chandini2595 commited on 24 days ago

Commit

8c81f89

1 Parent(s): bb9f60a

Remove post-training TensorBoard log charts

Browse files

Files changed (1) hide show

app.py +20 -11

app.py CHANGED Viewed

@@ -62,10 +62,7 @@ def extract_json_from_llm_output(llm_result):
     return None
 def extract_fields(image_path):
-    # OCR
     text = pytesseract.image_to_string(Image.open(image_path))
-    # Display OCR output for debugging
     st.subheader("Raw OCR Output")
     st.code(text)
@@ -86,21 +83,24 @@ def extract_fields(image_path):
         else:
             results[field] = None
     return results
 def extract_with_perplexity_llm(ocr_text):
     prompt = f"""
-Extract the following fields from this receipt text:
-- name
-- date
-- product
-- amount_paid
-- receipt_no
 Text:
 \"\"\"{ocr_text}\"\"\"
-Return the result as a JSON object with those fields.
 """
     messages = [
         {
@@ -119,6 +119,11 @@ Return the result as a JSON object with those fields.
     )
     return response.choices[0].message.content
 def main():
     st.set_page_config(
         page_title="FormIQ - Intelligent Document Parser",
@@ -199,6 +204,10 @@ def main():
                             fields_df = pd.DataFrame([fields])
                             st.dataframe(fields_df)
                         except Exception as e:
                             st.error(f"LLM extraction failed: {e}")

     return None
 def extract_fields(image_path):
     text = pytesseract.image_to_string(Image.open(image_path))
     st.subheader("Raw OCR Output")
     st.code(text)
         else:
             results[field] = None
+    # Extract all products
+    results["products"] = extract_products(text)
     return results
+def extract_products(text):
+    # Example regex: product name (letters/numbers/spaces), then price (float)
+    product_pattern = r"([A-Z0-9 ]+)\s+([0-9]+\.[0-9]{2})"
+    matches = re.findall(product_pattern, text)
+    products = [{"name": name.strip(), "price": float(price)} for name, price in matches]
+    return products
 def extract_with_perplexity_llm(ocr_text):
     prompt = f"""
+Extract all products as a list of {name, price} from this receipt text.
+Return the result as a JSON object with a 'products' field (list of objects).
 Text:
 \"\"\"{ocr_text}\"\"\"
 """
     messages = [
         {
     )
     return response.choices[0].message.content
+def save_to_dynamodb(data, table_name="Receipts"):
+    # ... existing code ...
+    # data["products"] is a list of dicts
+    table.put_item(Item=data)
 def main():
     st.set_page_config(
         page_title="FormIQ - Intelligent Document Parser",
                             fields_df = pd.DataFrame([fields])
                             st.dataframe(fields_df)
+                            if "products" in fields and fields["products"]:
+                                st.subheader("Products")
+                                st.dataframe(pd.DataFrame(fields["products"]))
                         except Exception as e:
                             st.error(f"LLM extraction failed: {e}")