leadingbridge commited on
Commit
a0c3eb1
·
verified ·
1 Parent(s): 669196a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +246 -178
app.py CHANGED
@@ -1,192 +1,260 @@
1
- import io
2
- import os
3
- import tempfile
4
  import gradio as gr
5
- import pandas as pd
6
- import requests
7
  from openpyxl import load_workbook
8
- from gradio.data_classes import FileData
 
 
 
 
9
 
10
- # Hugging Face dataset template (we'll always fetch the latest)
11
- HF_TEMPLATE_URL = "https://huggingface.co/datasets/leadingbridge/ammu/blob/main/AMMU-order-form-template.xlsx"
12
  TEMPLATE_FILENAME = "AMMU-order-form-template.xlsx"
 
 
13
 
14
- def _raw_url(url: str) -> str:
15
- """Convert HF 'blob' URL to 'resolve' URL for raw bytes download."""
16
- return url.replace("/blob/", "/resolve/")
17
-
18
- def _read_input(file_input: str | bytes) -> pd.DataFrame:
19
- """
20
- Read CSV/XLSX to DataFrame.
21
- - With Spaces, we use type='filepath' so file_input is a path string.
22
- - If bytes are ever passed, try Excel first then CSV as fallback.
23
- """
24
- if isinstance(file_input, str): # filepath
25
- lower = file_input.lower()
26
- if lower.endswith(".csv"):
27
- df = pd.read_csv(file_input)
28
- else:
29
- df = pd.read_excel(file_input)
30
- else: # bytes
31
- b = io.BytesIO(file_input)
32
- try:
33
- df = pd.read_excel(b)
34
- except Exception:
35
- b.seek(0)
36
- df = pd.read_csv(io.BytesIO(file_input))
37
- df.columns = [c.strip() for c in df.columns]
38
- return df
39
-
40
- def _aggregate(df: pd.DataFrame) -> pd.DataFrame:
41
  """
42
- Validate required columns and aggregate Quantity by SKU + Product Option Value.
43
- - Sums quantities for duplicate SKU/option rows (your requirement).
44
  """
45
- required = ["SKU", "Product Option Value", "Quantity"]
46
- missing = [c for c in required if c not in df.columns]
47
- if missing:
48
- raise ValueError(f"Input file is missing required column(s): {', '.join(missing)}")
49
-
50
- df["Quantity"] = pd.to_numeric(df["Quantity"], errors="coerce").fillna(0).astype(int)
51
- grouped = (
52
- df.groupby(["SKU", "Product Option Value"], dropna=False)["Quantity"]
53
- .sum()
54
- .reset_index()
55
- )
56
- return grouped
57
-
58
- def _find_cell(ws, text: str):
59
- """Return (row, col) coordinates of the first cell whose value == text (exact match)."""
60
- for r in ws.iter_rows(values_only=False):
61
- for c in r:
62
- if (c.value or "") == text:
63
- return c.row, c.column
64
- return None, None
65
-
66
- def _match_row(ws, opt_val: str | None):
67
- """
68
- Match the output row using:
69
- 1) Exact match: Column A == Product Option Value
70
- 2) Composite match: Column A == 'Product Option Value - <value in Column B of that row>'
71
- Returns the row index or None if not found.
72
- """
73
- opt_val = "" if opt_val is None else str(opt_val).strip()
74
-
75
- # 1) Exact match on Column A
76
- for r in range(1, ws.max_row + 1):
77
- colA = ws.cell(row=r, column=1).value
78
- if colA is not None and str(colA).strip() == opt_val:
79
- return r
80
-
81
- # 2) Composite match
82
- for r in range(1, ws.max_row + 1):
83
- colA = ws.cell(row=r, column=1).value
84
- colB = ws.cell(row=r, column=2).value
85
- colA = "" if colA is None else str(colA).strip()
86
- colB = "" if colB is None else str(colB).strip()
87
- if colA == f"{opt_val} - {colB}":
88
- return r
89
-
90
- return None
91
-
92
- def _choose_quantity_col(ws) -> int:
93
  """
94
- Choose the 'mapping column' where we should write quantities.
95
- Heuristic:
96
- - If a header cell equals 'Qty' or 'Quantity' in row 2 or 3, use that column.
97
- - Otherwise, default to column B (2).
98
  """
99
- headers = {"qty", "quantity"}
100
- for rr in (2, 3):
101
- for cc in range(1, ws.max_column + 1):
102
- v = ws.cell(row=rr, column=cc).value
103
- if isinstance(v, str) and v.strip().lower() in headers:
104
- return cc
105
- return 2 # default to column B
106
-
107
- def fill_template(input_file):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  """
109
- Pipeline:
110
- 1) Download the live AMMU template from HF dataset (preserves formatting).
111
- 2) Read and aggregate input (SKU + Product Option Value → sum of Quantity).
112
- 3) Write chosen SKU to 'My SKU' (same row as label, next column).
113
- 4) For each Product Option Value, find matching row and write the aggregated quantity
114
- into the chosen mapping column.
115
- 5) Return the filled template for download (same filename).
116
  """
117
- if input_file is None:
118
- return None, "Please upload an input file."
119
-
120
- # Fetch the latest template
121
- resp = requests.get(_raw_url(HF_TEMPLATE_URL))
122
- resp.raise_for_status()
123
- tmpl_bytes = io.BytesIO(resp.content)
124
-
125
- # Load template; openpyxl may warn about data validation extensions—this is expected.
126
- wb = load_workbook(tmpl_bytes)
127
- ws = wb.active # assume first sheet is the order sheet
128
-
129
- # Read + aggregate input
130
- df = _read_input(input_file)
131
- grouped = _aggregate(df)
132
-
133
- # Choose SKU (if multiple SKUs, use the first and inform the user)
134
- skus = grouped["SKU"].dropna().astype(str).unique().tolist()
135
- if not skus:
136
- return None, "No SKU found in the input file."
137
- chosen_sku = skus[0]
138
- note = f"Multiple SKUs found {skus}. Using the first: {chosen_sku}." if len(skus) > 1 else ""
139
-
140
- # Map SKU → "My SKU" (fill the cell to the right of the label)
141
- r, c = _find_cell(ws, "My SKU")
142
- if r and c:
143
- ws.cell(row=r, column=c + 1, value=str(chosen_sku))
144
-
145
- # Determine which column to use for quantities
146
- qty_col = _choose_quantity_col(ws)
147
-
148
- # Aggregate for the chosen SKU by Product Option Value (ensures dedup)
149
- block = (
150
- grouped[grouped["SKU"].astype(str) == str(chosen_sku)]
151
- .groupby("Product Option Value", dropna=False)["Quantity"]
152
- .sum()
153
- .reset_index()
154
- )
155
-
156
- # Fill quantities into the template
157
- filled = 0
158
- for _, rec in block.iterrows():
159
- opt_val = rec["Product Option Value"]
160
- qty = int(rec["Quantity"])
161
- target_row = _match_row(ws, opt_val)
162
- if target_row:
163
- ws.cell(row=target_row, column=qty_col, value=qty)
164
- filled += 1
165
-
166
- # Save to a temp file and return as FileData(path=...) so Gradio can serve it
167
- with tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx") as tmp:
168
- tmp_path = tmp.name
169
- wb.save(tmp_path)
170
-
171
- filedata = FileData(
172
- path=tmp_path,
173
- orig_name=TEMPLATE_FILENAME, # keep same filename as requested
174
- mime_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
175
- )
176
- status = f"Filled {filled} row(s). {note}"
177
- return filedata, status
178
-
179
- with gr.Blocks(title="AMMU Order Filler (Template Preserved)") as demo:
180
- gr.Markdown("## AMMU Order Filler\nUpload your input file; we’ll fill your live AMMU template and return it unchanged in format.")
181
-
182
- # Use filepath to avoid pandas FutureWarning and for better performance on Spaces
183
- file_in = gr.File(label="Upload Input File (CSV or XLSX)", file_count="single", type="filepath")
184
- run_btn = gr.Button("Fill Template")
185
- download = gr.File(label="Download Filled Template")
186
- msg = gr.Markdown()
187
-
188
- run_btn.click(fn=fill_template, inputs=file_in, outputs=[download, msg])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
 
190
  if __name__ == "__main__":
191
- # SSR is okay by default; set share=True if you want a public link from the Space logs
192
  demo.launch()
 
 
 
 
1
  import gradio as gr
 
 
2
  from openpyxl import load_workbook
3
+ from openpyxl.worksheet.worksheet import Worksheet
4
+ from huggingface_hub import hf_hub_download
5
+ import tempfile
6
+ import os, re
7
+ from collections import defaultdict
8
 
9
+ HF_DATASET_REPO = "leadingbridge/ammu"
 
10
  TEMPLATE_FILENAME = "AMMU-order-form-template.xlsx"
11
+ # If you commit the template file into the Space repo, this local fallback will be used.
12
+ LOCAL_TEMPLATE_FALLBACK = os.path.join(os.path.dirname(__file__), TEMPLATE_FILENAME)
13
 
14
+ def _normalize_power(val):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  """
16
+ Normalize input "Product Option Value" to match template's row-2 labels, e.g. '0.00', '-1.25'
17
+ Accepts numbers or strings such as 'PLANO', '0', '-1', '-1.0', '-1.00'.
18
  """
19
+ if val is None:
20
+ return None
21
+ s = str(val).strip()
22
+ if s == "":
23
+ return None
24
+ # Common synonyms for zero power
25
+ if s.lower() in {"plano", "piano", "0", "0.0", "0.00", "000"}:
26
+ return "0.00"
27
+ # Extract a signed/decimal number if present
28
+ m = re.search(r"(-?\d+(?:\.\d+)?)", s.replace(",", ""))
29
+ if not m:
30
+ return None
31
+ try:
32
+ num = float(m.group(1))
33
+ except ValueError:
34
+ return None
35
+ return f"{num:.2f}"
36
+
37
+ def _power_to_triplet_digits(power_str: str) -> str:
38
+ """'-1.25' -> '125', '0.00' -> '000', '-4.00' -> '400'"""
39
+ if power_str is None:
40
+ return None
41
+ s = power_str.strip().lstrip("+")
42
+ s = s.replace("-", "")
43
+ if "." in s:
44
+ whole, frac = s.split(".", 1)
45
+ frac = (frac + "00")[:2]
46
+ else:
47
+ whole, frac = s, "00"
48
+ digits = f"{whole}{frac}"
49
+ return digits.zfill(3)
50
+
51
+ def _find_header_row(ws: Worksheet, required_headers):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  """
53
+ Scan the top 10 rows to find a header row that includes all required headers (case-insensitive).
54
+ Returns (row_index, {header_lower: col_index})
 
 
55
  """
56
+ req = {h.lower() for h in required_headers}
57
+ for r in range(1, 11):
58
+ header_map = {}
59
+ present = set()
60
+ for c in range(1, ws.max_column + 1):
61
+ v = ws.cell(row=r, column=c).value
62
+ if isinstance(v, str) and v.strip():
63
+ key = v.strip().lower()
64
+ header_map[key] = c
65
+ if key in req:
66
+ present.add(key)
67
+ if req.issubset(present):
68
+ return r, header_map
69
+ raise ValueError(f"Could not locate a header row containing: {required_headers}")
70
+
71
+ def _download_template():
72
+ # Prefer local copy if present (commit AMMU-order-form-template.xlsx to your Space repo for offline reliability).
73
+ if os.path.exists(LOCAL_TEMPLATE_FALLBACK):
74
+ return LOCAL_TEMPLATE_FALLBACK
75
+ # Otherwise download from the HF dataset (as you provided).
76
+ return hf_hub_download(repo_id=HF_DATASET_REPO, filename=TEMPLATE_FILENAME, repo_type="dataset")
77
+
78
+ def process(input_file):
79
  """
80
+ 1) Read the uploaded input Excel.
81
+ 2) Aggregate quantities by (SKU, Product Option Value).
82
+ 3) Load the AMMU order form template.
83
+ 4) For each (SKU, power), write quantity into the matching row (by SKU) and column:
84
+ - Prefer the row-2 labels like '0.00', '-1.00'
85
+ - Fallback to the row with numeric triplets '000', '125', etc.
86
+ 5) Return a filled Excel file for download + a short log.
87
  """
88
+ try:
89
+ if input_file is None:
90
+ return None, "Please upload an Excel file first."
91
+
92
+ # --- Read input & detect headers by name (not index) ---
93
+ wb_in = load_workbook(input_file.name, data_only=True)
94
+ ws_in = wb_in.active
95
+ header_row_idx, header_map = _find_header_row(ws_in, {"SKU", "Product Option Value", "Quantity"})
96
+ col_sku = header_map["sku"]
97
+ col_pov = header_map["product option value"]
98
+ col_qty = header_map["quantity"]
99
+
100
+ # --- Aggregate quantities across duplicate lines (same SKU + same power) ---
101
+ agg = defaultdict(int) # (sku, power_str) -> qty sum
102
+ rows_scanned = 0
103
+ for r in range(header_row_idx + 1, ws_in.max_row + 1):
104
+ sku = ws_in.cell(row=r, column=col_sku).value
105
+ pov = ws_in.cell(row=r, column=col_pov).value
106
+ qty = ws_in.cell(row=r, column=col_qty).value
107
+
108
+ if sku is None and pov is None and qty is None:
109
+ continue
110
+ rows_scanned += 1
111
+
112
+ power = _normalize_power(pov)
113
+ try:
114
+ q = int(qty) if qty is not None and str(qty).strip() != "" else 0
115
+ except Exception:
116
+ try:
117
+ q = int(float(qty))
118
+ except Exception:
119
+ q = 0
120
+
121
+ if sku and power is not None and q:
122
+ agg[(str(sku).strip(), power)] += q
123
+
124
+ # --- Load template ---
125
+ template_path = _download_template()
126
+ wb_out = load_workbook(template_path)
127
+ ws_out = wb_out.active
128
+
129
+ # Find:
130
+ # a) header row containing "SKU"
131
+ # b) row containing textual power labels ('0.00', '-1.00', ...)
132
+ # c) (optional) row containing numeric triplets ('000', '125', ...)
133
+ sku_header_row = None
134
+ sku_col_idx = None
135
+ power_label_row = None
136
+ power_col_map = {}
137
+ triplet_row = None
138
+ triplet_col_map = {}
139
+
140
+ for r in range(1, 11):
141
+ row_vals = [ws_out.cell(row=r, column=c).value for c in range(1, ws_out.max_column + 1)]
142
+ # (a) 'SKU' header
143
+ for c, v in enumerate(row_vals, start=1):
144
+ if isinstance(v, str) and v.strip().lower() == "sku":
145
+ sku_header_row = r
146
+ sku_col_idx = c
147
+ # (b) textual labels
148
+ labels = {}
149
+ for c, v in enumerate(row_vals, start=1):
150
+ if isinstance(v, str):
151
+ nv = _normalize_power(v)
152
+ if nv is not None and re.match(r"^-?\d+\.\d{2}$", v.strip()):
153
+ labels[nv] = c
154
+ if len(labels) >= 5 and power_label_row is None:
155
+ power_label_row = r
156
+ power_col_map = labels
157
+ # (c) numeric triplets
158
+ trip = {}
159
+ for c, v in enumerate(row_vals, start=1):
160
+ if isinstance(v, str) and re.fullmatch(r"\d{2,3}", v.strip()):
161
+ trip[v.strip()] = c
162
+ if len(trip) >= 5 and triplet_row is None:
163
+ triplet_row = r
164
+ triplet_col_map = trip
165
+
166
+ if sku_header_row is None or sku_col_idx is None:
167
+ raise ValueError("Could not find the 'SKU' header row in the template (looked in rows 1–10).")
168
+ if not (power_label_row or triplet_row):
169
+ raise ValueError("Could not find the power-column headers in the template (looked in rows 1–10).")
170
+
171
+ # Build SKU -> row map from the template
172
+ sku_to_row = {}
173
+ for r in range(sku_header_row + 1, ws_out.max_row + 1):
174
+ val = ws_out.cell(row=r, column=sku_col_idx).value
175
+ if val is None:
176
+ continue
177
+ sku_to_row[str(val).strip()] = r
178
+
179
+ # Optional: write unique SKUs next to a "My SKU" label if it exists in the top area
180
+ mysku_cell = None
181
+ for r in range(1, 11):
182
+ for c in range(1, ws_out.max_column + 1):
183
+ v = ws_out.cell(row=r, column=c).value
184
+ if isinstance(v, str) and v.strip().lower() == "my sku":
185
+ mysku_cell = (r, c + 1)
186
+ break
187
+ if mysku_cell:
188
+ break
189
+ if mysku_cell and agg:
190
+ unique_skus = sorted({k[0] for k in agg.keys()})
191
+ ws_out.cell(row=mysku_cell[0], column=mysku_cell[1]).value = ", ".join(unique_skus)
192
+
193
+ # Write aggregated quantities into the correct cells
194
+ missing_skus = set()
195
+ missing_powers = set()
196
+ written_count = 0
197
+
198
+ for (sku, power), qty in agg.items():
199
+ row_idx = sku_to_row.get(sku)
200
+ if row_idx is None:
201
+ missing_skus.add(sku)
202
+ continue
203
+
204
+ # Prefer textual power labels row (e.g. '0.00', '-1.25')
205
+ col_idx = power_col_map.get(power) if power_col_map else None
206
+
207
+ # Fallback to numeric triplets (e.g. '000', '125')
208
+ if col_idx is None and triplet_col_map:
209
+ key = _power_to_triplet_digits(power) # e.g. '-1.25' -> '125'
210
+ col_idx = triplet_col_map.get(key)
211
+
212
+ if col_idx is None:
213
+ missing_powers.add(power)
214
+ continue
215
+
216
+ current = ws_out.cell(row=row_idx, column=col_idx).value
217
+ try:
218
+ current_val = int(current) if current is not None and str(current).strip() != "" else 0
219
+ except Exception:
220
+ try:
221
+ current_val = int(float(current))
222
+ except Exception:
223
+ current_val = 0
224
+ ws_out.cell(row=row_idx, column=col_idx).value = current_val + int(qty)
225
+ written_count += 1
226
+
227
+ # Save to a temp file and return
228
+ tmpdir = tempfile.mkdtemp()
229
+ out_path = os.path.join(tmpdir, "AMMU-order-form-FILLED.xlsx")
230
+ wb_out.save(out_path)
231
+
232
+ log_lines = []
233
+ log_lines.append(f"Rows scanned in input: {rows_scanned}")
234
+ log_lines.append(f"Unique (SKU, power) pairs aggregated: {len(agg)}")
235
+ log_lines.append(f"Entries written into template: {written_count}")
236
+ if missing_skus:
237
+ log_lines.append(f"⚠️ SKUs not found in template ({len(missing_skus)}): {', '.join(sorted(missing_skus))}")
238
+ if missing_powers:
239
+ log_lines.append(f"⚠️ Powers not found in template ({len(missing_powers)}): {', '.join(sorted(missing_powers))}")
240
+ log = "\n".join(log_lines) if log_lines else "Done."
241
+
242
+ return out_path, log
243
+
244
+ except Exception as e:
245
+ return None, f"Error: {e}"
246
+
247
+ with gr.Blocks(title="AMMU Order Form Filler") as demo:
248
+ gr.Markdown("### AMMU Order Form Filler\nUpload your input Excel. The app will fill quantities into the official AMMU template based on SKU and power.")
249
+ with gr.Row():
250
+ in_file = gr.File(label="Upload input Excel (.xlsx)", file_types=[".xlsx"])
251
+ with gr.Row():
252
+ run_btn = gr.Button("Process")
253
+ with gr.Row():
254
+ out_file = gr.File(label="Download filled template (.xlsx)")
255
+ log_box = gr.Textbox(label="Log", lines=8)
256
+
257
+ run_btn.click(fn=process, inputs=in_file, outputs=[out_file, log_box])
258
 
259
  if __name__ == "__main__":
 
260
  demo.launch()