Spaces:

codys12
/

NetCom-to-WooComerce

Runtime error

App Files Files Community

codys12 commited on 14 days ago

Commit

cf087b0

verified ·

1 Parent(s): db70732

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -19

app.py CHANGED Viewed

@@ -1,11 +1,12 @@
-"""NetCom → WooCommerce transformer (Try 2 schema — 100-parallel + de-dupe, pandas fix)
-======================================================================================
 *Accept CSV **or** Excel schedule files and output the WooCommerce CSV.*
-New since the last paste
-------------------------
-* Fix for older pandas: move `include_groups=False` from `.groupby()` to `.apply()`.
-* Everything else (cache names, concurrency cap, in-flight de-duplication) is unchanged.
 """
 from __future__ import annotations
@@ -26,51 +27,60 @@ import pandas as pd
 # -------- Gradio bool-schema hot-patch --------------------------------------
 _original = gradio_client.utils._json_schema_to_python_type
 def _fixed_json_schema_to_python_type(schema, defs=None):  # type: ignore
     if isinstance(schema, bool):
         return "any"
     return _original(schema, defs)
-gradio_client.utils._json_schema_to_python_type = _fixed_json_schema_to_python_type  # type: ignore
 # -------- Tiny disk cache ----------------------------------------------------
 CACHE_DIR = Path("ai_response_cache")
 CACHE_DIR.mkdir(exist_ok=True)
 def _cache_path(p: str) -> Path:
     return CACHE_DIR / f"{hashlib.md5(p.encode()).hexdigest()}.json"
 def _get_cached(p: str) -> str | None:
     try:
         return json.loads(_cache_path(p).read_text("utf-8"))["response"]
     except Exception:
         return None
 def _set_cache(p: str, r: str) -> None:
     try:
         _cache_path(p).write_text(json.dumps({"prompt": p, "response": r}), "utf-8")
     except Exception:
         pass
 # -------- Async GPT helpers --------------------------------------------------
-_SEM = asyncio.Semaphore(100)              # ≤100 concurrent OpenAI calls
 _inflight: dict[str, asyncio.Future] = {}  # prompt → Future
 async def _gpt_async(client: openai.AsyncOpenAI, prompt: str) -> str:
-    """Single LLM call with disk cache, concurrency cap, and de-duplication."""
     cached = _get_cached(prompt)
     if cached is not None:
         return cached
-    # De-duplicate identical prompts already in flight
-    running = _inflight.get(prompt)
-    if running is not None:
-        return await running
     loop = asyncio.get_running_loop()
     async def _call_api() -> str:
-        async with _SEM:  # concurrency limiter
             try:
                 msg = await client.chat.completions.create(
                     model="gpt-4o-mini",
@@ -90,7 +100,10 @@ async def _gpt_async(client: openai.AsyncOpenAI, prompt: str) -> str:
     finally:
         _inflight.pop(prompt, None)
-async def _batch_async(lst: list[str], instruction: str, client: openai.AsyncOpenAI) -> list[str]:
     """Vectorised helper — returns an output list matching *lst* length."""
     out: list[str] = ["" for _ in lst]
     idx, prompts = [], []
@@ -106,6 +119,7 @@ async def _batch_async(lst: list[str], instruction: str, client: openai.AsyncOpe
         out[idx[j]] = val
     return out
 # -------- Core converter -----------------------------------------------------
 DEFAULT_PREREQ = (
     "No specific prerequisites are required for this course. Basic computer literacy and "
@@ -113,11 +127,13 @@ DEFAULT_PREREQ = (
     "learning experience."
 )
 def _read(path: str) -> pd.DataFrame:
     if path.lower().endswith((".xlsx", ".xls")):
         return pd.read_excel(path)
     return pd.read_csv(path, encoding="latin1")
 async def _enrich_dataframe(
     df: pd.DataFrame, dcol: str, ocol: str, pcol: str, acol: str
 ) -> tuple[list[str], list[str], list[str], list[str], list[str]]:
@@ -146,7 +162,7 @@ async def _enrich_dataframe(
             ),
         )
-        # Prerequisites (some rows empty → default text)
         prereq_raw = df.get(pcol, "").fillna("").tolist()
         fpre: list[str] = []
         for req in prereq_raw:
@@ -162,9 +178,11 @@ async def _enrich_dataframe(
     return sdesc, ldesc, fobj, fout, fpre
 def convert(path: str) -> BytesIO:
     logos = {
         "Amazon Web Services": "/wp-content/uploads/2025/04/aws.png",
         "Cisco": "/wp-content/uploads/2025/04/cisco-e1738593292198-1.webp",
         "Microsoft": "/wp-content/uploads/2025/04/Microsoft-e1737494120985-1.png",
         "Google Cloud": "/wp-content/uploads/2025/04/Google_Cloud.png",
@@ -190,7 +208,7 @@ def convert(path: str) -> BytesIO:
     sid = first_col("Course SID", "ï»¿Course SID")
     if dur not in df.columns:
-        df[dur] = ""  # ensure Duration column exists
     # ---------- LLM enrichment (async) -------------------------------------
     sdesc, ldesc, fobj, fout, fpre = asyncio.run(
@@ -223,7 +241,7 @@ def convert(path: str) -> BytesIO:
                     g["Course Start Time"], g["Course End Time"], g["Time Zone"]
                 )
             ),
-            include_groups=False,  # <- moved here
         )
         .reset_index(name="Times")
     )
@@ -358,8 +376,8 @@ def convert(path: str) -> BytesIO:
     out.seek(0)
     return out
-# -------- Gradio wrappers ----------------------------------------------------
 def process_file(upload: gr.File) -> str:
     csv_bytes = convert(upload.name)
     with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp:
@@ -367,6 +385,7 @@ def process_file(upload: gr.File) -> str:
         path = tmp.name
     return path
 ui = gr.Interface(
     fn=process_file,
     inputs=gr.File(

+"""NetCom → WooCommerce transformer (Try 2 schema — 100-parallel, de-dupe, pandas-fix)
+=====================================================================================
 *Accept CSV **or** Excel schedule files and output the WooCommerce CSV.*
+Latest tweak
+------------
+• **Logo map** now contains both `"Amazon Web Services"` *and* `"AWS"` keys
+  so either value in the *Vendor* column resolves to the same upload path.
+  (Everything else is untouched.)
 """
 from __future__ import annotations
 # -------- Gradio bool-schema hot-patch --------------------------------------
 _original = gradio_client.utils._json_schema_to_python_type
 def _fixed_json_schema_to_python_type(schema, defs=None):  # type: ignore
     if isinstance(schema, bool):
         return "any"
     return _original(schema, defs)
+gradio_client.utils._json_schema_to_python_type = (  # type: ignore
+    _fixed_json_schema_to_python_type
+)
 # -------- Tiny disk cache ----------------------------------------------------
 CACHE_DIR = Path("ai_response_cache")
 CACHE_DIR.mkdir(exist_ok=True)
 def _cache_path(p: str) -> Path:
     return CACHE_DIR / f"{hashlib.md5(p.encode()).hexdigest()}.json"
 def _get_cached(p: str) -> str | None:
     try:
         return json.loads(_cache_path(p).read_text("utf-8"))["response"]
     except Exception:
         return None
 def _set_cache(p: str, r: str) -> None:
     try:
         _cache_path(p).write_text(json.dumps({"prompt": p, "response": r}), "utf-8")
     except Exception:
         pass
 # -------- Async GPT helpers --------------------------------------------------
+_SEM = asyncio.Semaphore(100)  # ≤100 concurrent OpenAI calls
 _inflight: dict[str, asyncio.Future] = {}  # prompt → Future
 async def _gpt_async(client: openai.AsyncOpenAI, prompt: str) -> str:
+    """Single LLM call with cache, concurrency cap, and de-duplication."""
     cached = _get_cached(prompt)
     if cached is not None:
         return cached
+    # de-dup identical prompts already in-flight
+    existing = _inflight.get(prompt)
+    if existing is not None:
+        return await existing
     loop = asyncio.get_running_loop()
     async def _call_api() -> str:
+        async with _SEM:
             try:
                 msg = await client.chat.completions.create(
                     model="gpt-4o-mini",
     finally:
         _inflight.pop(prompt, None)
+async def _batch_async(
+    lst: list[str], instruction: str, client: openai.AsyncOpenAI
+) -> list[str]:
     """Vectorised helper — returns an output list matching *lst* length."""
     out: list[str] = ["" for _ in lst]
     idx, prompts = [], []
         out[idx[j]] = val
     return out
 # -------- Core converter -----------------------------------------------------
 DEFAULT_PREREQ = (
     "No specific prerequisites are required for this course. Basic computer literacy and "
     "learning experience."
 )
 def _read(path: str) -> pd.DataFrame:
     if path.lower().endswith((".xlsx", ".xls")):
         return pd.read_excel(path)
     return pd.read_csv(path, encoding="latin1")
 async def _enrich_dataframe(
     df: pd.DataFrame, dcol: str, ocol: str, pcol: str, acol: str
 ) -> tuple[list[str], list[str], list[str], list[str], list[str]]:
             ),
         )
+        # prerequisites
         prereq_raw = df.get(pcol, "").fillna("").tolist()
         fpre: list[str] = []
         for req in prereq_raw:
     return sdesc, ldesc, fobj, fout, fpre
 def convert(path: str) -> BytesIO:
     logos = {
         "Amazon Web Services": "/wp-content/uploads/2025/04/aws.png",
+        "AWS": "/wp-content/uploads/2025/04/aws.png",
         "Cisco": "/wp-content/uploads/2025/04/cisco-e1738593292198-1.webp",
         "Microsoft": "/wp-content/uploads/2025/04/Microsoft-e1737494120985-1.png",
         "Google Cloud": "/wp-content/uploads/2025/04/Google_Cloud.png",
     sid = first_col("Course SID", "ï»¿Course SID")
     if dur not in df.columns:
+        df[dur] = ""
     # ---------- LLM enrichment (async) -------------------------------------
     sdesc, ldesc, fobj, fout, fpre = asyncio.run(
                     g["Course Start Time"], g["Course End Time"], g["Time Zone"]
                 )
             ),
+            include_groups=False,
         )
         .reset_index(name="Times")
     )
     out.seek(0)
     return out
+# -------- Gradio wrappers ----------------------------------------------------
 def process_file(upload: gr.File) -> str:
     csv_bytes = convert(upload.name)
     with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp:
         path = tmp.name
     return path
 ui = gr.Interface(
     fn=process_file,
     inputs=gr.File(