Spaces:
Runtime error
Runtime error
| #!/usr/bin/env python | |
| # -*- coding: utf-8 -*- | |
| """ | |
| *NetCom β WooCommerce CSV/Excel Processor* | |
| Robust edition β catches and logs every recoverable error so one failure never | |
| brings the whole pipeline down. Only small, surgical changes were made. | |
| """ | |
| import gradio as gr | |
| import pandas as pd | |
| import tempfile | |
| import os, sys, json, re, hashlib, asyncio, aiohttp, traceback | |
| from io import BytesIO | |
| from pathlib import Path | |
| from functools import lru_cache | |
| import openai | |
| import gradio_client.utils | |
| # ββββββββββββββββββββββββββββββ HELPERS ββββββββββββββββββββββββββββββ | |
| def _log(err: Exception, msg: str = ""): | |
| """Log errors without stopping execution.""" | |
| print(f"[WARN] {msg}: {err}", file=sys.stderr) | |
| traceback.print_exception(err) | |
| # Patch: tolerate bad JSON-schemas produced by some OpenAI tools | |
| _original_json_schema_to_python_type = gradio_client.utils._json_schema_to_python_type | |
| def _fixed_json_schema_to_python_type(schema, defs=None): | |
| try: | |
| if isinstance(schema, bool): | |
| return "any" | |
| return _original_json_schema_to_python_type(schema, defs) | |
| except Exception as e: # last-chance fallback | |
| _log(e, "json_schema_to_python_type failed") | |
| return "any" | |
| gradio_client.utils._json_schema_to_python_type = _fixed_json_schema_to_python_type | |
| # ββββββββββββββββββββββββββββββ DISK CACHE ββββββββββββββββββββββββββββββ | |
| CACHE_DIR = Path("ai_response_cache"); CACHE_DIR.mkdir(exist_ok=True) | |
| def _cache_path(prompt): # deterministic path | |
| return CACHE_DIR / f"{hashlib.md5(prompt.encode()).hexdigest()}.json" | |
| def get_cached_response(prompt): | |
| try: | |
| p = _cache_path(prompt) | |
| if p.exists(): | |
| return json.loads(p.read_text(encoding="utf-8"))["response"] | |
| except Exception as e: | |
| _log(e, "reading cache") | |
| return None | |
| def cache_response(prompt, response): | |
| try: | |
| _cache_path(prompt).write_text( | |
| json.dumps({"prompt": prompt, "response": response}), encoding="utf-8" | |
| ) | |
| except Exception as e: | |
| _log(e, "writing cache") | |
| # ββββββββββββββββββββββββββββββ OPENAI ββββββββββββββββββββββββββββββ | |
| async def _call_openai(client, prompt): | |
| """Single protected OpenAI call.""" | |
| try: | |
| rsp = await client.chat.completions.create( | |
| model="gpt-4o-mini", | |
| messages=[{"role": "user", "content": prompt}], | |
| temperature=0, | |
| ) | |
| return rsp.choices[0].message.content | |
| except Exception as e: | |
| _log(e, "OpenAI error") | |
| return f"Error: {e}" | |
| async def process_text_batch_async(client, prompts): | |
| """Return results in original order, resilient to any error.""" | |
| results, tasks = {}, [] | |
| for p in prompts: | |
| cached = get_cached_response(p) | |
| if cached is not None: | |
| results[p] = cached | |
| else: | |
| tasks.append(asyncio.create_task(_call_openai(client, p))) | |
| for prompt, task in zip([p for p in prompts if p not in results], tasks): | |
| try: | |
| res = await task | |
| except Exception as e: | |
| _log(e, "async OpenAI task") | |
| res = f"Error: {e}" | |
| cache_response(prompt, res) | |
| results[prompt] = res | |
| return [results[p] for p in prompts] | |
| async def process_text_with_ai_async(texts, instruction): | |
| if not texts: | |
| return [] | |
| client = openai.AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY")) | |
| batch_size, out = 500, [] | |
| for i in range(0, len(texts), batch_size): | |
| prompts = [f"{instruction}\n\nText: {t}" for t in texts[i : i + batch_size]] | |
| out.extend(await process_text_batch_async(client, prompts)) | |
| return out | |
| # ββββββββββββββββββββββββββββββ MAIN TRANSFORM ββββββββββββββββββββββββββββββ | |
| def process_woocommerce_data_in_memory(upload): | |
| """Convert NetCom β Woo CSV/XLSX; every stage guarded.""" | |
| try: | |
| # brand β logo mapping | |
| brand_logo = { | |
| "Amazon Web Services": "/wp-content/uploads/2025/04/aws.png", | |
| "Cisco": "/wp-content/uploads/2025/04/cisco-e1738593292198-1.webp", | |
| "Microsoft": "/wp-content/uploads/2025/04/Microsoft-e1737494120985-1.png", | |
| "Google Cloud": "/wp-content/uploads/2025/04/Google_Cloud.png", | |
| "EC Council": "/wp-content/uploads/2025/04/Ec_Council.png", | |
| "ITIL": "/wp-content/uploads/2025/04/ITIL.webp", | |
| "PMI": "/wp-content/uploads/2025/04/PMI.png", | |
| "Comptia": "/wp-content/uploads/2025/04/Comptia.png", | |
| "Autodesk": "/wp-content/uploads/2025/04/autodesk.png", | |
| "ISC2": "/wp-content/uploads/2025/04/ISC2.png", | |
| "AICerts": "/wp-content/uploads/2025/04/aicerts-logo-1.png", | |
| } | |
| default_prereq = ( | |
| "No specific prerequisites are required for this course. " | |
| "Basic computer literacy and familiarity with fundamental concepts in the " | |
| "subject area are recommended for the best learning experience." | |
| ) | |
| # ---------------- I/O ---------------- | |
| ext = Path(upload.name).suffix.lower() | |
| try: | |
| if ext in {".xlsx", ".xls"}: | |
| try: | |
| df = pd.read_excel(upload.name, sheet_name="Active Schedules") | |
| except Exception as e: | |
| _log(e, "Excel read failed (falling back to first sheet)") | |
| df = pd.read_excel(upload.name, sheet_name=0) | |
| else: # CSV | |
| try: | |
| df = pd.read_csv(upload.name, encoding="latin1") | |
| except Exception as e: | |
| _log(e, "CSV read failed (trying utf-8)") | |
| df = pd.read_csv(upload.name, encoding="utf-8", errors="ignore") | |
| except Exception as e: | |
| _log(e, "file read totally failed") | |
| raise | |
| df.columns = df.columns.str.strip() | |
| # --------- column harmonisation (new vs old formats) ---------- | |
| rename_map = { | |
| "Decription": "Description", | |
| "description": "Description", | |
| "Objectives": "Objectives", | |
| "objectives": "Objectives", | |
| "RequiredPrerequisite": "Required Prerequisite", | |
| "Required Pre-requisite": "Required Prerequisite", | |
| "RequiredPre-requisite": "Required Prerequisite", | |
| } | |
| df.rename(columns={k: v for k, v in rename_map.items() if k in df.columns}, inplace=True) | |
| # duration if missing | |
| if "Duration" not in df.columns: | |
| try: | |
| df["Duration"] = ( | |
| pd.to_datetime(df["Course End Date"]) - pd.to_datetime(df["Course Start Date"]) | |
| ).dt.days.add(1) | |
| except Exception as e: | |
| _log(e, "duration calc failed") | |
| df["Duration"] = "" | |
| # ---------------- ASYNC AI ---------------- | |
| loop = asyncio.new_event_loop() | |
| asyncio.set_event_loop(loop) | |
| col_desc = "Description" | |
| col_obj = "Objectives" | |
| col_prereq = "Required Prerequisite" | |
| try: | |
| res = loop.run_until_complete( | |
| asyncio.gather( | |
| process_text_with_ai_async( | |
| df[col_desc].fillna("").tolist(), | |
| "Create a concise 250-character summary of this course description:", | |
| ), | |
| process_text_with_ai_async( | |
| df[col_desc].fillna("").tolist(), | |
| "Condense this description to maximum 750 characters in paragraph format, with clean formatting:", | |
| ), | |
| process_text_with_ai_async( | |
| df[col_obj].fillna("").tolist(), | |
| "Format these objectives into a bullet list format with clean formatting. Start each bullet with 'β’ ':", | |
| ), | |
| process_text_with_ai_async( | |
| df["Outline"].fillna("").tolist(), | |
| "Format this agenda into a bullet list format with clean formatting. Start each bullet with 'β’ ':", | |
| ), | |
| ) | |
| ) | |
| except Exception as e: | |
| _log(e, "async AI gather failed") | |
| res = [[""] * len(df)] * 4 | |
| finally: | |
| loop.close() | |
| short_desc, long_desc, objectives, agendas = res | |
| # prerequisites handled synchronously (tiny) | |
| prereq_out = [] | |
| for p in df[col_prereq].fillna("").tolist(): | |
| if not p.strip(): | |
| prereq_out.append(default_prereq) | |
| else: | |
| try: | |
| prereq_out.append( | |
| asyncio.run( | |
| process_text_with_ai_async( | |
| [p], | |
| "Format these prerequisites into a bullet list format with clean formatting. Start each bullet with 'β’ ':", | |
| ) | |
| )[0] | |
| ) | |
| except Exception as e: | |
| _log(e, "prereq AI failed") | |
| prereq_out.append(default_prereq) | |
| # ---------------- DATAFRAME BUILD ---------------- | |
| try: | |
| df["Short_Description"] = short_desc | |
| df["Condensed_Description"] = long_desc | |
| df["Formatted_Objectives"] = objectives | |
| df["Formatted_Prerequisites"] = prereq_out | |
| df["Formatted_Agenda"] = agendas | |
| except Exception as e: | |
| _log(e, "adding AI columns") | |
| # 2. aggregate date/time | |
| df = df.sort_values(["Course ID", "Course Start Date"]) | |
| date_agg = ( | |
| df.groupby("Course ID")["Course Start Date"] | |
| .apply(lambda x: ",".join(x.astype(str).unique())) | |
| .reset_index(name="Aggregated_Dates") | |
| ) | |
| time_agg = ( | |
| df.groupby("Course ID") | |
| .apply( | |
| lambda d: ",".join( | |
| f"{s}-{e} {tz}" | |
| for s, e, tz in zip( | |
| d["Course Start Time"], d["Course End Time"], d["Time Zone"] | |
| ) | |
| ) | |
| ) | |
| .reset_index(name="Aggregated_Times") | |
| ) | |
| parent = df.drop_duplicates(subset=["Course ID"]).merge(date_agg).merge(time_agg) | |
| woo_parent_df = pd.DataFrame( | |
| { | |
| "Type": "variable", | |
| "SKU": parent["Course ID"], | |
| "Name": parent["Course Name"], | |
| "Published": 1, | |
| "Visibility in catalog": "visible", | |
| "Short description": parent["Short_Description"], | |
| "Description": parent["Condensed_Description"], | |
| "Tax status": "taxable", | |
| "In stock?": 1, | |
| "Regular price": parent["SRP Pricing"].replace("[\\$,]", "", regex=True), | |
| "Categories": "courses", | |
| "Images": parent["Vendor"].map(brand_logo).fillna(""), | |
| "Parent": "", | |
| "Brands": parent["Vendor"], | |
| "Attribute 1 name": "Date", | |
| "Attribute 1 value(s)": parent["Aggregated_Dates"], | |
| "Attribute 1 visible": "visible", | |
| "Attribute 1 global": 1, | |
| "Attribute 2 name": "Location", | |
| "Attribute 2 value(s)": "Virtual", | |
| "Attribute 2 visible": "visible", | |
| "Attribute 2 global": 1, | |
| "Attribute 3 name": "Time", | |
| "Attribute 3 value(s)": parent["Aggregated_Times"], | |
| "Attribute 3 visible": "visible", | |
| "Attribute 3 global": 1, | |
| "Meta: outline": parent["Formatted_Agenda"], | |
| "Meta: days": parent["Duration"], | |
| "Meta: location": "Virtual", | |
| "Meta: overview": parent["Target Audience"], | |
| "Meta: objectives": parent["Formatted_Objectives"], | |
| "Meta: prerequisites": parent["Formatted_Prerequisites"], | |
| "Meta: agenda": parent["Formatted_Agenda"], | |
| } | |
| ) | |
| woo_child_df = pd.DataFrame( | |
| { | |
| "Type": "variation, virtual", | |
| "SKU": df["Course SID"], | |
| "Name": df["Course Name"], | |
| "Published": 1, | |
| "Visibility in catalog": "visible", | |
| "Short description": df["Short_Description"], | |
| "Description": df["Condensed_Description"], | |
| "Tax status": "taxable", | |
| "In stock?": 1, | |
| "Regular price": df["SRP Pricing"].replace("[\\$,]", "", regex=True), | |
| "Categories": "courses", | |
| "Images": df["Vendor"].map(brand_logo).fillna(""), | |
| "Parent": df["Course ID"], | |
| "Brands": df["Vendor"], | |
| "Attribute 1 name": "Date", | |
| "Attribute 1 value(s)": df["Course Start Date"], | |
| "Attribute 1 visible": "visible", | |
| "Attribute 1 global": 1, | |
| "Attribute 2 name": "Location", | |
| "Attribute 2 value(s)": "Virtual", | |
| "Attribute 2 visible": "visible", | |
| "Attribute 2 global": 1, | |
| "Attribute 3 name": "Time", | |
| "Attribute 3 value(s)": df.apply( | |
| lambda r: f"{r['Course Start Time']}-{r['Course End Time']} {r['Time Zone']}", | |
| axis=1, | |
| ), | |
| "Attribute 3 visible": "visible", | |
| "Attribute 3 global": 1, | |
| "Meta: outline": df["Formatted_Agenda"], | |
| "Meta: days": df["Duration"], | |
| "Meta: location": "Virtual", | |
| "Meta: overview": df["Target Audience"], | |
| "Meta: objectives": df["Formatted_Objectives"], | |
| "Meta: prerequisites": df["Formatted_Prerequisites"], | |
| "Meta: agenda": df["Formatted_Agenda"], | |
| } | |
| ) | |
| final_cols = [ | |
| "Type", | |
| "SKU", | |
| "Name", | |
| "Published", | |
| "Visibility in catalog", | |
| "Short description", | |
| "Description", | |
| "Tax status", | |
| "In stock?", | |
| "Regular price", | |
| "Categories", | |
| "Images", | |
| "Parent", | |
| "Brands", | |
| "Attribute 1 name", | |
| "Attribute 1 value(s)", | |
| "Attribute 1 visible", | |
| "Attribute 1 global", | |
| "Attribute 2 name", | |
| "Attribute 2 value(s)", | |
| "Attribute 2 visible", | |
| "Attribute 2 global", | |
| "Attribute 3 name", | |
| "Attribute 3 value(s)", | |
| "Attribute 3 visible", | |
| "Attribute 3 global", | |
| "Meta: outline", | |
| "Meta: days", | |
| "Meta: location", | |
| "Meta: overview", | |
| "Meta: objectives", | |
| "Meta: prerequisites", | |
| "Meta: agenda", | |
| ] | |
| woo_final_df = pd.concat([woo_parent_df, woo_child_df], ignore_index=True)[ | |
| final_cols | |
| ] | |
| buf = BytesIO() | |
| woo_final_df.to_csv(buf, index=False, encoding="utf-8-sig") | |
| buf.seek(0) | |
| return buf | |
| except Exception as e: | |
| _log(e, "fatal transformation error") | |
| err_buf = BytesIO() | |
| pd.DataFrame({"error": [str(e)]}).to_csv(err_buf, index=False) | |
| err_buf.seek(0) | |
| return err_buf | |
| # ββββββββββββββββββββββββββββββ GRADIO BINDINGS ββββββββββββββββββββββββββββββ | |
| def process_file(file): | |
| try: | |
| out_io = process_woocommerce_data_in_memory(file) | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp: | |
| tmp.write(out_io.getvalue()) | |
| return tmp.name | |
| except Exception as e: | |
| _log(e, "top-level process_file") | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".txt") as tmp: | |
| tmp.write(f"Processing failed:\n{e}".encode()) | |
| return tmp.name | |
| interface = gr.Interface( | |
| fn=process_file, | |
| inputs=gr.File(label="Upload NetCom Schedule", file_types=[".csv", ".xlsx", ".xls"]), | |
| outputs=gr.File(label="Download WooCommerce CSV"), | |
| title="NetCom β WooCommerce CSV/Excel Processor", | |
| description="Upload a NetCom Reseller Schedule CSV or XLSX to generate a WooCommerce-ready CSV.", | |
| analytics_enabled=False, | |
| ) | |
| if __name__ == "__main__": # run | |
| if not os.getenv("OPENAI_API_KEY"): | |
| print("[WARN] OPENAI_API_KEY not set; AI steps will error out.") | |
| interface.launch() | |