Spaces:

leadingbridge
/

ammu-order

Running

App Files Files Community

leadingbridge commited on 13 days ago

Commit

a0c3eb1

verified ·

1 Parent(s): 669196a

Update app.py

Browse files

Files changed (1) hide show

app.py +246 -178

app.py CHANGED Viewed

@@ -1,192 +1,260 @@
-import io
-import os
-import tempfile
 import gradio as gr
-import pandas as pd
-import requests
 from openpyxl import load_workbook
-from gradio.data_classes import FileData
-# Hugging Face dataset template (we'll always fetch the latest)
-HF_TEMPLATE_URL = "https://huggingface.co/datasets/leadingbridge/ammu/blob/main/AMMU-order-form-template.xlsx"
 TEMPLATE_FILENAME = "AMMU-order-form-template.xlsx"
-def _raw_url(url: str) -> str:
-    """Convert HF 'blob' URL to 'resolve' URL for raw bytes download."""
-    return url.replace("/blob/", "/resolve/")
-def _read_input(file_input: str | bytes) -> pd.DataFrame:
-    """
-    Read CSV/XLSX to DataFrame.
-    - With Spaces, we use type='filepath' so file_input is a path string.
-    - If bytes are ever passed, try Excel first then CSV as fallback.
-    """
-    if isinstance(file_input, str):  # filepath
-        lower = file_input.lower()
-        if lower.endswith(".csv"):
-            df = pd.read_csv(file_input)
-        else:
-            df = pd.read_excel(file_input)
-    else:  # bytes
-        b = io.BytesIO(file_input)
-        try:
-            df = pd.read_excel(b)
-        except Exception:
-            b.seek(0)
-            df = pd.read_csv(io.BytesIO(file_input))
-    df.columns = [c.strip() for c in df.columns]
-    return df
-def _aggregate(df: pd.DataFrame) -> pd.DataFrame:
     """
-    Validate required columns and aggregate Quantity by SKU + Product Option Value.
-    - Sums quantities for duplicate SKU/option rows (your requirement).
     """
-    required = ["SKU", "Product Option Value", "Quantity"]
-    missing = [c for c in required if c not in df.columns]
-    if missing:
-        raise ValueError(f"Input file is missing required column(s): {', '.join(missing)}")
-    df["Quantity"] = pd.to_numeric(df["Quantity"], errors="coerce").fillna(0).astype(int)
-    grouped = (
-        df.groupby(["SKU", "Product Option Value"], dropna=False)["Quantity"]
-          .sum()
-          .reset_index()
-    )
-    return grouped
-def _find_cell(ws, text: str):
-    """Return (row, col) coordinates of the first cell whose value == text (exact match)."""
-    for r in ws.iter_rows(values_only=False):
-        for c in r:
-            if (c.value or "") == text:
-                return c.row, c.column
-    return None, None
-def _match_row(ws, opt_val: str | None):
-    """
-    Match the output row using:
-      1) Exact match: Column A == Product Option Value
-      2) Composite match: Column A == 'Product Option Value - <value in Column B of that row>'
-    Returns the row index or None if not found.
-    """
-    opt_val = "" if opt_val is None else str(opt_val).strip()
-    # 1) Exact match on Column A
-    for r in range(1, ws.max_row + 1):
-        colA = ws.cell(row=r, column=1).value
-        if colA is not None and str(colA).strip() == opt_val:
-            return r
-    # 2) Composite match
-    for r in range(1, ws.max_row + 1):
-        colA = ws.cell(row=r, column=1).value
-        colB = ws.cell(row=r, column=2).value
-        colA = "" if colA is None else str(colA).strip()
-        colB = "" if colB is None else str(colB).strip()
-        if colA == f"{opt_val} - {colB}":
-            return r
-    return None
-def _choose_quantity_col(ws) -> int:
     """
-    Choose the 'mapping column' where we should write quantities.
-    Heuristic:
-      - If a header cell equals 'Qty' or 'Quantity' in row 2 or 3, use that column.
-      - Otherwise, default to column B (2).
     """
-    headers = {"qty", "quantity"}
-    for rr in (2, 3):
-        for cc in range(1, ws.max_column + 1):
-            v = ws.cell(row=rr, column=cc).value
-            if isinstance(v, str) and v.strip().lower() in headers:
-                return cc
-    return 2  # default to column B
-def fill_template(input_file):
     """
-    Pipeline:
-      1) Download the live AMMU template from HF dataset (preserves formatting).
-      2) Read and aggregate input (SKU + Product Option Value → sum of Quantity).
-      3) Write chosen SKU to 'My SKU' (same row as label, next column).
-      4) For each Product Option Value, find matching row and write the aggregated quantity
-         into the chosen mapping column.
-      5) Return the filled template for download (same filename).
     """
-    if input_file is None:
-        return None, "Please upload an input file."
-    # Fetch the latest template
-    resp = requests.get(_raw_url(HF_TEMPLATE_URL))
-    resp.raise_for_status()
-    tmpl_bytes = io.BytesIO(resp.content)
-    # Load template; openpyxl may warn about data validation extensions—this is expected.
-    wb = load_workbook(tmpl_bytes)
-    ws = wb.active  # assume first sheet is the order sheet
-    # Read + aggregate input
-    df = _read_input(input_file)
-    grouped = _aggregate(df)
-    # Choose SKU (if multiple SKUs, use the first and inform the user)
-    skus = grouped["SKU"].dropna().astype(str).unique().tolist()
-    if not skus:
-        return None, "No SKU found in the input file."
-    chosen_sku = skus[0]
-    note = f"Multiple SKUs found {skus}. Using the first: {chosen_sku}." if len(skus) > 1 else ""
-    # Map SKU → "My SKU" (fill the cell to the right of the label)
-    r, c = _find_cell(ws, "My SKU")
-    if r and c:
-        ws.cell(row=r, column=c + 1, value=str(chosen_sku))
-    # Determine which column to use for quantities
-    qty_col = _choose_quantity_col(ws)
-    # Aggregate for the chosen SKU by Product Option Value (ensures dedup)
-    block = (
-        grouped[grouped["SKU"].astype(str) == str(chosen_sku)]
-        .groupby("Product Option Value", dropna=False)["Quantity"]
-        .sum()
-        .reset_index()
-    )
-    # Fill quantities into the template
-    filled = 0
-    for _, rec in block.iterrows():
-        opt_val = rec["Product Option Value"]
-        qty = int(rec["Quantity"])
-        target_row = _match_row(ws, opt_val)
-        if target_row:
-            ws.cell(row=target_row, column=qty_col, value=qty)
-            filled += 1
-    # Save to a temp file and return as FileData(path=...) so Gradio can serve it
-    with tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx") as tmp:
-        tmp_path = tmp.name
-    wb.save(tmp_path)
-    filedata = FileData(
-        path=tmp_path,
-        orig_name=TEMPLATE_FILENAME,  # keep same filename as requested
-        mime_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
-    )
-    status = f"Filled {filled} row(s). {note}"
-    return filedata, status
-with gr.Blocks(title="AMMU Order Filler (Template Preserved)") as demo:
-    gr.Markdown("## AMMU Order Filler\nUpload your input file; we’ll fill your live AMMU template and return it unchanged in format.")
-    # Use filepath to avoid pandas FutureWarning and for better performance on Spaces
-    file_in = gr.File(label="Upload Input File (CSV or XLSX)", file_count="single", type="filepath")
-    run_btn = gr.Button("Fill Template")
-    download = gr.File(label="Download Filled Template")
-    msg = gr.Markdown()
-    run_btn.click(fn=fill_template, inputs=file_in, outputs=[download, msg])
 if __name__ == "__main__":
-    # SSR is okay by default; set share=True if you want a public link from the Space logs
     demo.launch()

 import gradio as gr
 from openpyxl import load_workbook
+from openpyxl.worksheet.worksheet import Worksheet
+from huggingface_hub import hf_hub_download
+import tempfile
+import os, re
+from collections import defaultdict
+HF_DATASET_REPO = "leadingbridge/ammu"
 TEMPLATE_FILENAME = "AMMU-order-form-template.xlsx"
+# If you commit the template file into the Space repo, this local fallback will be used.
+LOCAL_TEMPLATE_FALLBACK = os.path.join(os.path.dirname(__file__), TEMPLATE_FILENAME)
+def _normalize_power(val):
     """
+    Normalize input "Product Option Value" to match template's row-2 labels, e.g. '0.00', '-1.25'
+    Accepts numbers or strings such as 'PLANO', '0', '-1', '-1.0', '-1.00'.
     """
+    if val is None:
+        return None
+    s = str(val).strip()
+    if s == "":
+        return None
+    # Common synonyms for zero power
+    if s.lower() in {"plano", "piano", "0", "0.0", "0.00", "000"}:
+        return "0.00"
+    # Extract a signed/decimal number if present
+    m = re.search(r"(-?\d+(?:\.\d+)?)", s.replace(",", ""))
+    if not m:
+        return None
+    try:
+        num = float(m.group(1))
+    except ValueError:
+        return None
+    return f"{num:.2f}"
+def _power_to_triplet_digits(power_str: str) -> str:
+    """'-1.25' -> '125', '0.00' -> '000', '-4.00' -> '400'"""
+    if power_str is None:
+        return None
+    s = power_str.strip().lstrip("+")
+    s = s.replace("-", "")
+    if "." in s:
+        whole, frac = s.split(".", 1)
+        frac = (frac + "00")[:2]
+    else:
+        whole, frac = s, "00"
+    digits = f"{whole}{frac}"
+    return digits.zfill(3)
+def _find_header_row(ws: Worksheet, required_headers):
     """
+    Scan the top 10 rows to find a header row that includes all required headers (case-insensitive).
+    Returns (row_index, {header_lower: col_index})
     """
+    req = {h.lower() for h in required_headers}
+    for r in range(1, 11):
+        header_map = {}
+        present = set()
+        for c in range(1, ws.max_column + 1):
+            v = ws.cell(row=r, column=c).value
+            if isinstance(v, str) and v.strip():
+                key = v.strip().lower()
+                header_map[key] = c
+                if key in req:
+                    present.add(key)
+        if req.issubset(present):
+            return r, header_map
+    raise ValueError(f"Could not locate a header row containing: {required_headers}")
+def _download_template():
+    # Prefer local copy if present (commit AMMU-order-form-template.xlsx to your Space repo for offline reliability).
+    if os.path.exists(LOCAL_TEMPLATE_FALLBACK):
+        return LOCAL_TEMPLATE_FALLBACK
+    # Otherwise download from the HF dataset (as you provided).
+    return hf_hub_download(repo_id=HF_DATASET_REPO, filename=TEMPLATE_FILENAME, repo_type="dataset")
+def process(input_file):
     """
+    1) Read the uploaded input Excel.
+    2) Aggregate quantities by (SKU, Product Option Value).
+    3) Load the AMMU order form template.
+    4) For each (SKU, power), write quantity into the matching row (by SKU) and column:
+         - Prefer the row-2 labels like '0.00', '-1.00'
+         - Fallback to the row with numeric triplets '000', '125', etc.
+    5) Return a filled Excel file for download + a short log.
     """
+    try:
+        if input_file is None:
+            return None, "Please upload an Excel file first."
+        # --- Read input & detect headers by name (not index) ---
+        wb_in = load_workbook(input_file.name, data_only=True)
+        ws_in = wb_in.active
+        header_row_idx, header_map = _find_header_row(ws_in, {"SKU", "Product Option Value", "Quantity"})
+        col_sku = header_map["sku"]
+        col_pov = header_map["product option value"]
+        col_qty = header_map["quantity"]
+        # --- Aggregate quantities across duplicate lines (same SKU + same power) ---
+        agg = defaultdict(int)  # (sku, power_str) -> qty sum
+        rows_scanned = 0
+        for r in range(header_row_idx + 1, ws_in.max_row + 1):
+            sku = ws_in.cell(row=r, column=col_sku).value
+            pov = ws_in.cell(row=r, column=col_pov).value
+            qty = ws_in.cell(row=r, column=col_qty).value
+            if sku is None and pov is None and qty is None:
+                continue
+            rows_scanned += 1
+            power = _normalize_power(pov)
+            try:
+                q = int(qty) if qty is not None and str(qty).strip() != "" else 0
+            except Exception:
+                try:
+                    q = int(float(qty))
+                except Exception:
+                    q = 0
+            if sku and power is not None and q:
+                agg[(str(sku).strip(), power)] += q
+        # --- Load template ---
+        template_path = _download_template()
+        wb_out = load_workbook(template_path)
+        ws_out = wb_out.active
+        # Find:
+        # a) header row containing "SKU"
+        # b) row containing textual power labels ('0.00', '-1.00', ...)
+        # c) (optional) row containing numeric triplets ('000', '125', ...)
+        sku_header_row = None
+        sku_col_idx = None
+        power_label_row = None
+        power_col_map = {}
+        triplet_row = None
+        triplet_col_map = {}
+        for r in range(1, 11):
+            row_vals = [ws_out.cell(row=r, column=c).value for c in range(1, ws_out.max_column + 1)]
+            # (a) 'SKU' header
+            for c, v in enumerate(row_vals, start=1):
+                if isinstance(v, str) and v.strip().lower() == "sku":
+                    sku_header_row = r
+                    sku_col_idx = c
+            # (b) textual labels
+            labels = {}
+            for c, v in enumerate(row_vals, start=1):
+                if isinstance(v, str):
+                    nv = _normalize_power(v)
+                    if nv is not None and re.match(r"^-?\d+\.\d{2}$", v.strip()):
+                        labels[nv] = c
+            if len(labels) >= 5 and power_label_row is None:
+                power_label_row = r
+                power_col_map = labels
+            # (c) numeric triplets
+            trip = {}
+            for c, v in enumerate(row_vals, start=1):
+                if isinstance(v, str) and re.fullmatch(r"\d{2,3}", v.strip()):
+                    trip[v.strip()] = c
+            if len(trip) >= 5 and triplet_row is None:
+                triplet_row = r
+                triplet_col_map = trip
+        if sku_header_row is None or sku_col_idx is None:
+            raise ValueError("Could not find the 'SKU' header row in the template (looked in rows 1–10).")
+        if not (power_label_row or triplet_row):
+            raise ValueError("Could not find the power-column headers in the template (looked in rows 1–10).")
+        # Build SKU -> row map from the template
+        sku_to_row = {}
+        for r in range(sku_header_row + 1, ws_out.max_row + 1):
+            val = ws_out.cell(row=r, column=sku_col_idx).value
+            if val is None:
+                continue
+            sku_to_row[str(val).strip()] = r
+        # Optional: write unique SKUs next to a "My SKU" label if it exists in the top area
+        mysku_cell = None
+        for r in range(1, 11):
+            for c in range(1, ws_out.max_column + 1):
+                v = ws_out.cell(row=r, column=c).value
+                if isinstance(v, str) and v.strip().lower() == "my sku":
+                    mysku_cell = (r, c + 1)
+                    break
+            if mysku_cell:
+                break
+        if mysku_cell and agg:
+            unique_skus = sorted({k[0] for k in agg.keys()})
+            ws_out.cell(row=mysku_cell[0], column=mysku_cell[1]).value = ", ".join(unique_skus)
+        # Write aggregated quantities into the correct cells
+        missing_skus = set()
+        missing_powers = set()
+        written_count = 0
+        for (sku, power), qty in agg.items():
+            row_idx = sku_to_row.get(sku)
+            if row_idx is None:
+                missing_skus.add(sku)
+                continue
+            # Prefer textual power labels row (e.g. '0.00', '-1.25')
+            col_idx = power_col_map.get(power) if power_col_map else None
+            # Fallback to numeric triplets (e.g. '000', '125')
+            if col_idx is None and triplet_col_map:
+                key = _power_to_triplet_digits(power)  # e.g. '-1.25' -> '125'
+                col_idx = triplet_col_map.get(key)
+            if col_idx is None:
+                missing_powers.add(power)
+                continue
+            current = ws_out.cell(row=row_idx, column=col_idx).value
+            try:
+                current_val = int(current) if current is not None and str(current).strip() != "" else 0
+            except Exception:
+                try:
+                    current_val = int(float(current))
+                except Exception:
+                    current_val = 0
+            ws_out.cell(row=row_idx, column=col_idx).value = current_val + int(qty)
+            written_count += 1
+        # Save to a temp file and return
+        tmpdir = tempfile.mkdtemp()
+        out_path = os.path.join(tmpdir, "AMMU-order-form-FILLED.xlsx")
+        wb_out.save(out_path)
+        log_lines = []
+        log_lines.append(f"Rows scanned in input: {rows_scanned}")
+        log_lines.append(f"Unique (SKU, power) pairs aggregated: {len(agg)}")
+        log_lines.append(f"Entries written into template: {written_count}")
+        if missing_skus:
+            log_lines.append(f"⚠️ SKUs not found in template ({len(missing_skus)}): {', '.join(sorted(missing_skus))}")
+        if missing_powers:
+            log_lines.append(f"⚠️ Powers not found in template ({len(missing_powers)}): {', '.join(sorted(missing_powers))}")
+        log = "\n".join(log_lines) if log_lines else "Done."
+        return out_path, log
+    except Exception as e:
+        return None, f"Error: {e}"
+with gr.Blocks(title="AMMU Order Form Filler") as demo:
+    gr.Markdown("### AMMU Order Form Filler\nUpload your input Excel. The app will fill quantities into the official AMMU template based on SKU and power.")
+    with gr.Row():
+        in_file = gr.File(label="Upload input Excel (.xlsx)", file_types=[".xlsx"])
+    with gr.Row():
+        run_btn = gr.Button("Process")
+    with gr.Row():
+        out_file = gr.File(label="Download filled template (.xlsx)")
+    log_box = gr.Textbox(label="Log", lines=8)
+    run_btn.click(fn=process, inputs=in_file, outputs=[out_file, log_box])
 if __name__ == "__main__":
     demo.launch()