Spaces:

codys12
/

NetCom-to-WooComerce

Runtime error

App Files Files Community

codys12 commited on May 14

Commit

f302c17

verified ·

1 Parent(s): 90e73d9

Update app.py

Browse files

Files changed (1) hide show

app.py +241 -69

app.py CHANGED Viewed

@@ -1,30 +1,34 @@
-import gradio as gr
-import pandas as pd
-import tempfile
-import os
-import json
-import hashlib
 import asyncio
 from io import BytesIO
 from pathlib import Path
-import openai
-import gradio_client.utils
-"""NetCom → WooCommerce transformer (Try 1 schema)
-=================================================
-*Accept CSV **or** Excel schedule files and output the WooCommerce CSV.*
-Fixes vs last run
------------------
-* Output written to a **temporary file path** (Gradio BytesIO bug fixed).
-* **Excel upload** support.
-* **Pandas future‑warning** silenced (`group_keys=False`).
-"""
 # -------- Gradio bool‑schema hot‑patch --------------------------------------
 _original = gradio_client.utils._json_schema_to_python_type
-def _fixed_json_schema_to_python_type(schema, defs=None):
     if isinstance(schema, bool):
         return "any"
     return _original(schema, defs)
@@ -32,101 +36,269 @@ def _fixed_json_schema_to_python_type(schema, defs=None):
 gradio_client.utils._json_schema_to_python_type = _fixed_json_schema_to_python_type  # type: ignore
 # -------- Tiny disk cache ----------------------------------------------------
-CACHE_DIR = Path("ai_response_cache"); CACHE_DIR.mkdir(exist_ok=True)
-def _cache_path(p: str):
     return CACHE_DIR / f"{hashlib.md5(p.encode()).hexdigest()}.json"
-def _get_cached(p: str):
     try:
-        return json.loads(_cache_path(p).read_text("utf-8"))["response"]
     except Exception:
         return None
-def _set_cache(p: str, r: str):
     try:
         _cache_path(p).write_text(json.dumps({"prompt": p, "response": r}), "utf-8")
     except Exception:
         pass
 # -------- Async GPT helpers --------------------------------------------------
-async def _gpt(client, prompt):
-    c = _get_cached(prompt)
-    if c is not None:
-        return c
     try:
-        msg = await client.chat.completions.create(model="gpt-4o-mini", messages=[{"role": "user", "content": prompt}], temperature=0)
         text = msg.choices[0].message.content
-    except Exception as e:
-        text = f"Error: {e}"
     _set_cache(prompt, text)
     return text
-async def _batch(lst, instr):
-    out = ["" for _ in lst]; idx,prompts=[],[]
-    for i,t in enumerate(lst):
-        if isinstance(t,str) and t.strip(): idx.append(i); prompts.append(f"{instr}\n\nText: {t}")
-    if not prompts: return out
-    client = openai.AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))
-    res = await asyncio.gather(*[_gpt(client,p) for p in prompts])
-    for j,val in enumerate(res): out[idx[j]] = val
     return out
 # -------- Core converter -----------------------------------------------------
-def _read(path: str):
-    return pd.read_excel(path) if path.lower().endswith((".xlsx",".xls")) else pd.read_csv(path, encoding="latin1")
 def convert(path: str) -> BytesIO:
-    logos = {"Amazon Web Services":"/wp-content/uploads/2025/04/aws.png","Cisco":"/wp-content/uploads/2025/04/cisco-e1738593292198-1.webp","Microsoft":"/wp-content/uploads/2025/04/Microsoft-e1737494120985-1.png","Google Cloud":"/wp-content/uploads/2025/04/Google_Cloud.png","EC Council":"/wp-content/uploads/2025/04/Ec_Council.png","ITIL":"/wp-content/uploads/2025/04/ITIL.webp","PMI":"/wp-content/uploads/2025/04/PMI.png","Comptia":"/wp-content/uploads/2025/04/Comptia.png","Autodesk":"/wp-content/uploads/2025/04/autodesk.png","ISC2":"/wp-content/uploads/2025/04/ISC2.png","AICerts":"/wp-content/uploads/2025/04/aicerts-logo-1.png"}
-    default_pre = "No specific prerequisites are required for this course. Basic computer literacy and familiarity with fundamental concepts in the subject area are recommended for the best learning experience."
-    df = _read(path); df.columns = df.columns.str.strip()
-    c = lambda *o: next((x for x in o if x in df.columns), None)
-    dcol, ocol, pcol, acol, dur, sid = c("Description","Decription"), c("Objectives","objectives"), c("RequiredPrerequisite","Required Pre-requisite"), c("Outline"), c("Duration"), c("Course SID","ï»¿Course SID")
-    if dur is None: df["Duration"]=""; dur="Duration"
-    loop=asyncio.new_event_loop(); asyncio.set_event_loop(loop)
-    sdesc, ldesc, fobj, fout = loop.run_until_complete(asyncio.gather(
-        _batch(df.get(dcol,"").fillna("").tolist(), "Create a concise 250-character summary of this course description:"),
-        _batch(df.get(dcol,"").fillna("").tolist(), "Condense this description to a maximum of 750 characters in paragraph format, with clean formatting:"),
-        _batch(df.get(ocol,"").fillna("").tolist(), "Format these objectives into a bullet list with clean formatting. Start each bullet with '• ':") ,
-        _batch(df.get(acol,"").fillna("").tolist(), "Format this agenda into a bullet list with clean formatting. Start each bullet with '• ':")))
-    loop.close()
-    fpre=[default_pre if not str(p).strip() else asyncio.run(_batch([p],"Format these prerequisites into a bullet list with clean formatting. Start each bullet with '• ':"))[0] for p in df.get(pcol,"").fillna("").tolist()]
-    df["Short_Description"],df["Condensed_Description"],df["Formatted_Objectives"],df["Formatted_Agenda"],df["Formatted_Prerequisites"] = sdesc,ldesc,fobj,fout,fpre
     df["Course Start Date"] = pd.to_datetime(df["Course Start Date"], errors="coerce")
     df["Date_fmt"] = df["Course Start Date"].dt.strftime("%-m/%-d/%Y")
-    dsorted=df.sort_values(["Course ID","Course Start Date"])
-    d_agg = dsorted.groupby("Course ID")["Date_fmt"].apply(lambda s: ",".join(s.dropna().unique())).reset_index(name="Dates")
-    t_agg = dsorted.groupby("Course ID",group_keys=False).apply(lambda g: ",".join(f"{st}-{et} {tz}" for st,et,tz in zip(g["Course Start Time"],g["Course End Time"],g["Time Zone"]))).reset_index(name="Times")
     parents = dsorted.drop_duplicates("Course ID").merge(d_agg).merge(t_agg)
     parent = pd.DataFrame({
-        "Type":"variable","SKU":parents["Course ID"],"Name":parents["Course Name"],"Published":1,"Visibility in catalog":"visible","Short description":parents["Short_Description"],"Description":parents["Condensed_Description"],"Tax status":"taxable","In stock?":1,"Stock":1,"Sold individually?":1,"Regular price":parents["SRP Pricing"].replace("[\\$,]","",regex=True),"Categories":"courses","Images":parents["Vendor"].map(logos).fillna(""),"Parent":"","Brands":parents["Vendor"],"Attribute 1 name":"Date","Attribute 1 value(s)":parents["Dates"],"Attribute 1 visible":"visible","Attribute 1 global":1,"Attribute 2 name":"Location","Attribute 2 value(s)":"Virtual","Attribute 2 visible":"visible","Attribute 2 global":1,"Attribute 3 name":"Time","Attribute 3 value(s)":parents["Times"],"Attribute 3 visible":"visible","Attribute 3 global":1,"Meta: outline":parents["Formatted_Agenda"],"Meta: days":parents[dur],"Meta: location":"Virtual","Meta: overview":parents["Target Audience"],"Meta: objectives":parents["Formatted_Objectives"],"Meta: prerequisites":parents["Formatted_Prerequisites"],"Meta: agenda":parents["Formatted_Agenda"]})
     child = pd.DataFrame({
-        "Type":"variation, virtual","SKU":dsorted[sid].astype(str).str.strip(),"Name":dsorted["Course Name"],"Published":1,"Visibility in catalog":"visible","Short description":dsorted["Short_Description"],"Description":dsorted["Condensed_Description"],"Tax status":"taxable","In stock?":1,"Stock":1,"Sold individually?":1,"Regular price":dsorted["SRP Pricing"].replace("[\\$,]","",regex=True),"Categories":"courses","Images":dsorted["Vendor"].map(logos).fillna(""),"Parent":dsorted["Course ID"],"Brands":dsorted["Vendor"],"Attribute 1 name":"Date","Attribute 1 value(s)":dsorted["Date_fmt"],"Attribute 1 visible":"visible","Attribute 1 global":1,"Attribute 2 name":"Location","Attribute 2 value(s)":"Virtual","Attribute 2 visible":"visible","Attribute 2 global":1,"Attribute 3 name":"Time","Attribute 3 value(s)":dsorted.apply(lambda r:f"{r['Course Start Time']}-{r['Course End Time']} {r['Time Zone']}",axis=1),"Attribute 3 visible":"visible","Attribute 3 global":1,"Meta: outline":dsorted["Formatted_Agenda"],"Meta: days":dsorted[dur],"Meta: location":"Virtual","Meta: overview":dsorted["Target Audience"],"Meta: objectives":dsorted["Formatted_Objectives"],"Meta: prerequisites":dsorted["Formatted_Prerequisites"],"Meta: agenda":dsorted["Formatted_Agenda"]})
-    all_rows = pd.concat([parent,child],ignore_index=True)
-    order=["Type","SKU","Name","Published","Visibility in catalog","Short description","Description","Tax status","In stock?","Stock","Sold individually?","Regular price","Categories","Images","Parent","Brands","Attribute 1 name","Attribute 1 value(s)","Attribute 1 visible","Attribute 1 global","Attribute 2 name","Attribute 2 value(s)","Attribute 2 visible","Attribute 2 global","Attribute 3 name","Attribute 3 value(s)","Attribute 3 visible","Attribute 3 global","Meta: outline","Meta: days","Meta: location","Meta: overview","Meta: objectives","Meta: prerequisites","Meta: agenda"]
-    out=BytesIO(); all_rows[order].to_csv(out,index=False,encoding="utf-8-sig"); out.seek(0); return out
 # -------- Gradio wrappers ----------------------------------------------------
-def process_file(upload):
     csv_bytes = convert(upload.name)
     with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp:
-        tmp.write(csv_bytes.getvalue()); path = tmp.name
     return path
 ui = gr.Interface(
     fn=process_file,
-    inputs=gr.File(label="Upload NetCom CSV / Excel", file_types=[".csv",".xlsx",".xls"]),
     outputs=gr.File(label="Download WooCommerce CSV"),
-    title="NetCom → WooCommerce CSV Processor",
-    description="Upload NetCom schedule (.csv/.xlsx) to get the Try 1‑formatted WooCommerce CSV.",
     analytics_enabled=False,
 )

+"""NetCom → WooCommerce transformer (Try 2 schema — cleaned async)
+=============================================================
+*Accept CSV **or** Excel schedule files and output the WooCommerce CSV.*
+Changes vs Try 1
+----------------
+* Use **one** event‑loop via `asyncio.run()` — no manual `new_event_loop()` / `loop.close()` gymnastics.
+* **One** shared `openai.AsyncOpenAI` client, properly closed with an `async with` block.
+* Fixed pandas future‑warning by adding `include_groups=False`.
+* Same Gradio interface, caching, and JSON‑schema hot‑patch as before.
+"""
+from __future__ import annotations
 import asyncio
+import hashlib
+import json
+import os
+import tempfile
 from io import BytesIO
 from pathlib import Path
+import gradio as gr
+import gradio_client.utils
+import openai
+import pandas as pd
 # -------- Gradio bool‑schema hot‑patch --------------------------------------
 _original = gradio_client.utils._json_schema_to_python_type
+def _fixed_json_schema_to_python_type(schema, defs=None):  # type: ignore
     if isinstance(schema, bool):
         return "any"
     return _original(schema, defs)
 gradio_client.utils._json_schema_to_python_type = _fixed_json_schema_to_python_type  # type: ignore
 # -------- Tiny disk cache ----------------------------------------------------
+CACHE_DIR = Path("ai_response_cache")
+CACHE_DIR.mkdir(exist_ok=True)
+def _cache_path(p: str) -> Path:
     return CACHE_DIR / f"{hashlib.md5(p.encode()).hexdigest()}.json"
+def _get_cached(p: str) -> str | None:
     try:
+        return json.loads(_cache_path(p).read_text("utf-8"))['response']
     except Exception:
         return None
+def _set_cache(p: str, r: str) -> None:
     try:
         _cache_path(p).write_text(json.dumps({"prompt": p, "response": r}), "utf-8")
     except Exception:
         pass
 # -------- Async GPT helpers --------------------------------------------------
+async def _gpt_async(client: openai.AsyncOpenAI, prompt: str) -> str:
+    """Single LLM call with on‑disk response cache."""
+    cached = _get_cached(prompt)
+    if cached is not None:
+        return cached
     try:
+        msg = await client.chat.completions.create(
+            model="gpt-4o-mini",
+            messages=[{"role": "user", "content": prompt}],
+            temperature=0,
+        )
         text = msg.choices[0].message.content
+    except Exception as exc:  # network or auth failure ‑ return explicit error string
+        text = f"Error: {exc}"
     _set_cache(prompt, text)
     return text
+async def _batch_async(lst: list[str], instruction: str, client: openai.AsyncOpenAI) -> list[str]:
+    """Vectorised helper — returns an output list matching *lst* length."""
+    out: list[str] = ["" for _ in lst]
+    idx, prompts = [], []
+    for i, txt in enumerate(lst):
+        if isinstance(txt, str) and txt.strip():
+            idx.append(i)
+            prompts.append(f"{instruction}\n\nText: {txt}")
+    # Fast‑path: nothing to do
+    if not prompts:
+        return out
+    # Fire off all prompts concurrently
+    responses = await asyncio.gather(*[_gpt_async(client, p) for p in prompts])
+    for j, val in enumerate(responses):
+        out[idx[j]] = val
     return out
 # -------- Core converter -----------------------------------------------------
+DEFAULT_PREREQ = (
+    "No specific prerequisites are required for this course. Basic computer literacy and "
+    "familiarity with fundamental concepts in the subject area are recommended for the best "
+    "learning experience."
+)
+def _read(path: str) -> pd.DataFrame:
+    if path.lower().endswith((".xlsx", ".xls")):
+        return pd.read_excel(path)
+    return pd.read_csv(path, encoding="latin1")
+async def _enrich_dataframe(df: pd.DataFrame, dcol: str, ocol: str, pcol: str, acol: str) -> tuple[list[str], list[str], list[str], list[str], list[str]]:
+    """Run all LLM batches concurrently and return the five enrichment columns."""
+    async with openai.AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY")) as client:
+        # 1) Descriptions and objectives/agenda batches
+        sdesc, ldesc, fobj, fout = await asyncio.gather(
+            _batch_async(df.get(dcol, "").fillna("").tolist(),
+                         "Create a concise 250-character summary of this course description:", client),
+            _batch_async(df.get(dcol, "").fillna("").tolist(),
+                         "Condense this description to a maximum of 750 characters in paragraph format, with clean formatting:", client),
+            _batch_async(df.get(ocol, "").fillna("").tolist(),
+                         "Format these objectives into a bullet list with clean formatting. Start each bullet with '• ':", client),
+            _batch_async(df.get(acol, "").fillna("").tolist(),
+                         "Format this agenda into a bullet list with clean formatting. Start each bullet with '• ':", client),
+        )
+        # 2) Prerequisites batch (some rows may be empty → DEFAULT_PREREQ)
+        prereq_raw = df.get(pcol, "").fillna("").tolist()
+        fpre: list[str] = []
+        for req in prereq_raw:
+            if not str(req).strip():
+                fpre.append(DEFAULT_PREREQ)
+            else:
+                formatted = await _batch_async([req],
+                                               "Format these prerequisites into a bullet list with clean formatting. Start each bullet with '• ':",
+                                               client)
+                fpre.append(formatted[0])
+    return sdesc, ldesc, fobj, fout, fpre
 def convert(path: str) -> BytesIO:
+    logos = {
+        "Amazon Web Services": "/wp-content/uploads/2025/04/aws.png",
+        "Cisco": "/wp-content/uploads/2025/04/cisco-e1738593292198-1.webp",
+        "Microsoft": "/wp-content/uploads/2025/04/Microsoft-e1737494120985-1.png",
+        "Google Cloud": "/wp-content/uploads/2025/04/Google_Cloud.png",
+        "EC Council": "/wp-content/uploads/2025/04/Ec_Council.png",
+        "ITIL": "/wp-content/uploads/2025/04/ITIL.webp",
+        "PMI": "/wp-content/uploads/2025/04/PMI.png",
+        "Comptia": "/wp-content/uploads/2025/04/Comptia.png",
+        "Autodesk": "/wp-content/uploads/2025/04/autodesk.png",
+        "ISC2": "/wp-content/uploads/2025/04/ISC2.png",
+        "AICerts": "/wp-content/uploads/2025/04/aicerts-logo-1.png",
+    }
+    df = _read(path)
+    df.columns = df.columns.str.strip()
+    # Helper to locate first existing column name from a list of candidates
+    first_col = lambda *candidates: next((c for c in candidates if c in df.columns), None)
+    dcol = first_col("Description", "Decription")
+    ocol = first_col("Objectives", "objectives")
+    pcol = first_col("RequiredPrerequisite", "Required Pre-requisite")
+    acol = first_col("Outline")
+    dur = first_col("Duration") or "Duration"
+    sid = first_col("Course SID", "ï»¿Course SID")
+    if dur not in df.columns:
+        df[dur] = ""  # create empty Duration col if missing
+    # ---------- LLM enrichment (async) -------------------------------------
+    sdesc, ldesc, fobj, fout, fpre = asyncio.run(_enrich_dataframe(df, dcol, ocol, pcol, acol))
+    df["Short_Description"] = sdesc
+    df["Condensed_Description"] = ldesc
+    df["Formatted_Objectives"] = fobj
+    df["Formatted_Agenda"] = fout
+    df["Formatted_Prerequisites"] = fpre
+    # ---------- Schedule aggregation --------------------------------------
     df["Course Start Date"] = pd.to_datetime(df["Course Start Date"], errors="coerce")
     df["Date_fmt"] = df["Course Start Date"].dt.strftime("%-m/%-d/%Y")
+    dsorted = df.sort_values(["Course ID", "Course Start Date"])
+    d_agg = (
+        dsorted
+        .groupby("Course ID")["Date_fmt"]
+        .apply(lambda s: ",".join(s.dropna().unique()))
+        .reset_index(name="Dates")
+    )
+    t_agg = (
+        dsorted
+        .groupby("Course ID", group_keys=False, include_groups=False)
+        .apply(lambda g: ",".join(f"{st}-{et} {tz}" for st, et, tz in zip(g["Course Start Time"], g["Course End Time"], g["Time Zone"])))
+        .reset_index(name="Times")
+    )
     parents = dsorted.drop_duplicates("Course ID").merge(d_agg).merge(t_agg)
+    # ---------- Parent / child product rows --------------------------------
     parent = pd.DataFrame({
+        "Type": "variable",
+        "SKU": parents["Course ID"],
+        "Name": parents["Course Name"],
+        "Published": 1,
+        "Visibility in catalog": "visible",
+        "Short description": parents["Short_Description"],
+        "Description": parents["Condensed_Description"],
+        "Tax status": "taxable",
+        "In stock?": 1,
+        "Stock": 1,
+        "Sold individually?": 1,
+        "Regular price": parents["SRP Pricing"].replace("[\\$,]", "", regex=True),
+        "Categories": "courses",
+        "Images": parents["Vendor"].map(logos).fillna(""),
+        "Parent": "",
+        "Brands": parents["Vendor"],
+        "Attribute 1 name": "Date",
+        "Attribute 1 value(s)": parents["Dates"],
+        "Attribute 1 visible": "visible",
+        "Attribute 1 global": 1,
+        "Attribute 2 name": "Location",
+        "Attribute 2 value(s)": "Virtual",
+        "Attribute 2 visible": "visible",
+        "Attribute 2 global": 1,
+        "Attribute 3 name": "Time",
+        "Attribute 3 value(s)": parents["Times"],
+        "Attribute 3 visible": "visible",
+        "Attribute 3 global": 1,
+        "Meta: outline": parents["Formatted_Agenda"],
+        "Meta: days": parents[dur],
+        "Meta: location": "Virtual",
+        "Meta: overview": parents["Target Audience"],
+        "Meta: objectives": parents["Formatted_Objectives"],
+        "Meta: prerequisites": parents["Formatted_Prerequisites"],
+        "Meta: agenda": parents["Formatted_Agenda"],
+    })
     child = pd.DataFrame({
+        "Type": "variation, virtual",
+        "SKU": dsorted[sid].astype(str).str.strip(),
+        "Name": dsorted["Course Name"],
+        "Published": 1,
+        "Visibility in catalog": "visible",
+        "Short description": dsorted["Short_Description"],
+        "Description": dsorted["Condensed_Description"],
+        "Tax status": "taxable",
+        "In stock?": 1,
+        "Stock": 1,
+        "Sold individually?": 1,
+        "Regular price": dsorted["SRP Pricing"].replace("[\\$,]", "", regex=True),
+        "Categories": "courses",
+        "Images": dsorted["Vendor"].map(logos).fillna(""),
+        "Parent": dsorted["Course ID"],
+        "Brands": dsorted["Vendor"],
+        "Attribute 1 name": "Date",
+        "Attribute 1 value(s)": dsorted["Date_fmt"],
+        "Attribute 1 visible": "visible",
+        "Attribute 1 global": 1,
+        "Attribute 2 name": "Location",
+        "Attribute 2 value(s)": "Virtual",
+        "Attribute 2 visible": "visible",
+        "Attribute 2 global": 1,
+        "Attribute 3 name": "Time",
+        "Attribute 3 value(s)": dsorted.apply(lambda r: f"{r['Course Start Time']}-{r['Course End Time']} {r['Time Zone']}", axis=1),
+        "Attribute 3 visible": "visible",
+        "Attribute 3 global": 1,
+        "Meta: outline": dsorted["Formatted_Agenda"],
+        "Meta: days": dsorted[dur],
+        "Meta: location": "Virtual",
+        "Meta: overview": dsorted["Target Audience"],
+        "Meta: objectives": dsorted["Formatted_Objectives"],
+        "Meta: prerequisites": dsorted["Formatted_Prerequisites"],
+        "Meta: agenda": dsorted["Formatted_Agenda"],
+    })
+    all_rows = pd.concat([parent, child], ignore_index=True)
+    order = [
+        "Type", "SKU", "Name", "Published", "Visibility in catalog", "Short description", "Description",
+        "Tax status", "In stock?", "Stock", "Sold individually?", "Regular price", "Categories", "Images",
+        "Parent", "Brands", "Attribute 1 name", "Attribute 1 value(s)", "Attribute 1 visible", "Attribute 1 global",
+        "Attribute 2 name", "Attribute 2 value(s)", "Attribute 2 visible", "Attribute 2 global", "Attribute 3 name",
+        "Attribute 3 value(s)", "Attribute 3 visible", "Attribute 3 global", "Meta: outline", "Meta: days", "Meta: location",
+        "Meta: overview", "Meta: objectives", "Meta: prerequisites", "Meta: agenda",
+    ]
+    out = BytesIO()
+    all_rows[order].to_csv(out, index=False, encoding="utf-8-sig")
+    out.seek(0)
+    return out
 # -------- Gradio wrappers ----------------------------------------------------
+def process_file(upload: gr.File) -> str:
     csv_bytes = convert(upload.name)
     with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp:
+        tmp.write(csv_bytes.getvalue())
+        path = tmp.name
     return path
 ui = gr.Interface(
     fn=process_file,
+    inputs=gr.File(label="Upload NetCom CSV / Excel", file_types=[".csv", ".xlsx", ".xls"]),
     outputs=gr.File(label="Download WooCommerce CSV"),
+    title="NetCom → WooCommerce CSV Processor (Try 2)",
+    description="Upload NetCom schedule (.csv/.xlsx) to get the Try 2‑formatted WooCommerce CSV.",
     analytics_enabled=False,
 )