Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,11 +1,12 @@
|
|
| 1 |
-
"""NetCom → WooCommerce transformer (Try 2 schema — 100-parallel
|
| 2 |
-
|
| 3 |
*Accept CSV **or** Excel schedule files and output the WooCommerce CSV.*
|
| 4 |
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
|
|
|
| 9 |
"""
|
| 10 |
|
| 11 |
from __future__ import annotations
|
|
@@ -26,51 +27,60 @@ import pandas as pd
|
|
| 26 |
# -------- Gradio bool-schema hot-patch --------------------------------------
|
| 27 |
_original = gradio_client.utils._json_schema_to_python_type
|
| 28 |
|
|
|
|
| 29 |
def _fixed_json_schema_to_python_type(schema, defs=None): # type: ignore
|
| 30 |
if isinstance(schema, bool):
|
| 31 |
return "any"
|
| 32 |
return _original(schema, defs)
|
| 33 |
|
| 34 |
-
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
# -------- Tiny disk cache ----------------------------------------------------
|
| 37 |
CACHE_DIR = Path("ai_response_cache")
|
| 38 |
CACHE_DIR.mkdir(exist_ok=True)
|
| 39 |
|
|
|
|
| 40 |
def _cache_path(p: str) -> Path:
|
| 41 |
return CACHE_DIR / f"{hashlib.md5(p.encode()).hexdigest()}.json"
|
| 42 |
|
|
|
|
| 43 |
def _get_cached(p: str) -> str | None:
|
| 44 |
try:
|
| 45 |
return json.loads(_cache_path(p).read_text("utf-8"))["response"]
|
| 46 |
except Exception:
|
| 47 |
return None
|
| 48 |
|
|
|
|
| 49 |
def _set_cache(p: str, r: str) -> None:
|
| 50 |
try:
|
| 51 |
_cache_path(p).write_text(json.dumps({"prompt": p, "response": r}), "utf-8")
|
| 52 |
except Exception:
|
| 53 |
pass
|
| 54 |
|
|
|
|
| 55 |
# -------- Async GPT helpers --------------------------------------------------
|
| 56 |
-
_SEM = asyncio.Semaphore(100)
|
| 57 |
_inflight: dict[str, asyncio.Future] = {} # prompt → Future
|
| 58 |
|
|
|
|
| 59 |
async def _gpt_async(client: openai.AsyncOpenAI, prompt: str) -> str:
|
| 60 |
-
"""Single LLM call with
|
| 61 |
cached = _get_cached(prompt)
|
| 62 |
if cached is not None:
|
| 63 |
return cached
|
| 64 |
|
| 65 |
-
#
|
| 66 |
-
|
| 67 |
-
if
|
| 68 |
-
return await
|
| 69 |
|
| 70 |
loop = asyncio.get_running_loop()
|
| 71 |
|
| 72 |
async def _call_api() -> str:
|
| 73 |
-
async with _SEM:
|
| 74 |
try:
|
| 75 |
msg = await client.chat.completions.create(
|
| 76 |
model="gpt-4o-mini",
|
|
@@ -90,7 +100,10 @@ async def _gpt_async(client: openai.AsyncOpenAI, prompt: str) -> str:
|
|
| 90 |
finally:
|
| 91 |
_inflight.pop(prompt, None)
|
| 92 |
|
| 93 |
-
|
|
|
|
|
|
|
|
|
|
| 94 |
"""Vectorised helper — returns an output list matching *lst* length."""
|
| 95 |
out: list[str] = ["" for _ in lst]
|
| 96 |
idx, prompts = [], []
|
|
@@ -106,6 +119,7 @@ async def _batch_async(lst: list[str], instruction: str, client: openai.AsyncOpe
|
|
| 106 |
out[idx[j]] = val
|
| 107 |
return out
|
| 108 |
|
|
|
|
| 109 |
# -------- Core converter -----------------------------------------------------
|
| 110 |
DEFAULT_PREREQ = (
|
| 111 |
"No specific prerequisites are required for this course. Basic computer literacy and "
|
|
@@ -113,11 +127,13 @@ DEFAULT_PREREQ = (
|
|
| 113 |
"learning experience."
|
| 114 |
)
|
| 115 |
|
|
|
|
| 116 |
def _read(path: str) -> pd.DataFrame:
|
| 117 |
if path.lower().endswith((".xlsx", ".xls")):
|
| 118 |
return pd.read_excel(path)
|
| 119 |
return pd.read_csv(path, encoding="latin1")
|
| 120 |
|
|
|
|
| 121 |
async def _enrich_dataframe(
|
| 122 |
df: pd.DataFrame, dcol: str, ocol: str, pcol: str, acol: str
|
| 123 |
) -> tuple[list[str], list[str], list[str], list[str], list[str]]:
|
|
@@ -146,7 +162,7 @@ async def _enrich_dataframe(
|
|
| 146 |
),
|
| 147 |
)
|
| 148 |
|
| 149 |
-
#
|
| 150 |
prereq_raw = df.get(pcol, "").fillna("").tolist()
|
| 151 |
fpre: list[str] = []
|
| 152 |
for req in prereq_raw:
|
|
@@ -162,9 +178,11 @@ async def _enrich_dataframe(
|
|
| 162 |
|
| 163 |
return sdesc, ldesc, fobj, fout, fpre
|
| 164 |
|
|
|
|
| 165 |
def convert(path: str) -> BytesIO:
|
| 166 |
logos = {
|
| 167 |
"Amazon Web Services": "/wp-content/uploads/2025/04/aws.png",
|
|
|
|
| 168 |
"Cisco": "/wp-content/uploads/2025/04/cisco-e1738593292198-1.webp",
|
| 169 |
"Microsoft": "/wp-content/uploads/2025/04/Microsoft-e1737494120985-1.png",
|
| 170 |
"Google Cloud": "/wp-content/uploads/2025/04/Google_Cloud.png",
|
|
@@ -190,7 +208,7 @@ def convert(path: str) -> BytesIO:
|
|
| 190 |
sid = first_col("Course SID", "Course SID")
|
| 191 |
|
| 192 |
if dur not in df.columns:
|
| 193 |
-
df[dur] = ""
|
| 194 |
|
| 195 |
# ---------- LLM enrichment (async) -------------------------------------
|
| 196 |
sdesc, ldesc, fobj, fout, fpre = asyncio.run(
|
|
@@ -223,7 +241,7 @@ def convert(path: str) -> BytesIO:
|
|
| 223 |
g["Course Start Time"], g["Course End Time"], g["Time Zone"]
|
| 224 |
)
|
| 225 |
),
|
| 226 |
-
include_groups=False,
|
| 227 |
)
|
| 228 |
.reset_index(name="Times")
|
| 229 |
)
|
|
@@ -358,8 +376,8 @@ def convert(path: str) -> BytesIO:
|
|
| 358 |
out.seek(0)
|
| 359 |
return out
|
| 360 |
|
| 361 |
-
# -------- Gradio wrappers ----------------------------------------------------
|
| 362 |
|
|
|
|
| 363 |
def process_file(upload: gr.File) -> str:
|
| 364 |
csv_bytes = convert(upload.name)
|
| 365 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp:
|
|
@@ -367,6 +385,7 @@ def process_file(upload: gr.File) -> str:
|
|
| 367 |
path = tmp.name
|
| 368 |
return path
|
| 369 |
|
|
|
|
| 370 |
ui = gr.Interface(
|
| 371 |
fn=process_file,
|
| 372 |
inputs=gr.File(
|
|
|
|
| 1 |
+
"""NetCom → WooCommerce transformer (Try 2 schema — 100-parallel, de-dupe, pandas-fix)
|
| 2 |
+
=====================================================================================
|
| 3 |
*Accept CSV **or** Excel schedule files and output the WooCommerce CSV.*
|
| 4 |
|
| 5 |
+
Latest tweak
|
| 6 |
+
------------
|
| 7 |
+
• **Logo map** now contains both `"Amazon Web Services"` *and* `"AWS"` keys
|
| 8 |
+
so either value in the *Vendor* column resolves to the same upload path.
|
| 9 |
+
(Everything else is untouched.)
|
| 10 |
"""
|
| 11 |
|
| 12 |
from __future__ import annotations
|
|
|
|
| 27 |
# -------- Gradio bool-schema hot-patch --------------------------------------
|
| 28 |
_original = gradio_client.utils._json_schema_to_python_type
|
| 29 |
|
| 30 |
+
|
| 31 |
def _fixed_json_schema_to_python_type(schema, defs=None): # type: ignore
|
| 32 |
if isinstance(schema, bool):
|
| 33 |
return "any"
|
| 34 |
return _original(schema, defs)
|
| 35 |
|
| 36 |
+
|
| 37 |
+
gradio_client.utils._json_schema_to_python_type = ( # type: ignore
|
| 38 |
+
_fixed_json_schema_to_python_type
|
| 39 |
+
)
|
| 40 |
|
| 41 |
# -------- Tiny disk cache ----------------------------------------------------
|
| 42 |
CACHE_DIR = Path("ai_response_cache")
|
| 43 |
CACHE_DIR.mkdir(exist_ok=True)
|
| 44 |
|
| 45 |
+
|
| 46 |
def _cache_path(p: str) -> Path:
|
| 47 |
return CACHE_DIR / f"{hashlib.md5(p.encode()).hexdigest()}.json"
|
| 48 |
|
| 49 |
+
|
| 50 |
def _get_cached(p: str) -> str | None:
|
| 51 |
try:
|
| 52 |
return json.loads(_cache_path(p).read_text("utf-8"))["response"]
|
| 53 |
except Exception:
|
| 54 |
return None
|
| 55 |
|
| 56 |
+
|
| 57 |
def _set_cache(p: str, r: str) -> None:
|
| 58 |
try:
|
| 59 |
_cache_path(p).write_text(json.dumps({"prompt": p, "response": r}), "utf-8")
|
| 60 |
except Exception:
|
| 61 |
pass
|
| 62 |
|
| 63 |
+
|
| 64 |
# -------- Async GPT helpers --------------------------------------------------
|
| 65 |
+
_SEM = asyncio.Semaphore(100) # ≤100 concurrent OpenAI calls
|
| 66 |
_inflight: dict[str, asyncio.Future] = {} # prompt → Future
|
| 67 |
|
| 68 |
+
|
| 69 |
async def _gpt_async(client: openai.AsyncOpenAI, prompt: str) -> str:
|
| 70 |
+
"""Single LLM call with cache, concurrency cap, and de-duplication."""
|
| 71 |
cached = _get_cached(prompt)
|
| 72 |
if cached is not None:
|
| 73 |
return cached
|
| 74 |
|
| 75 |
+
# de-dup identical prompts already in-flight
|
| 76 |
+
existing = _inflight.get(prompt)
|
| 77 |
+
if existing is not None:
|
| 78 |
+
return await existing
|
| 79 |
|
| 80 |
loop = asyncio.get_running_loop()
|
| 81 |
|
| 82 |
async def _call_api() -> str:
|
| 83 |
+
async with _SEM:
|
| 84 |
try:
|
| 85 |
msg = await client.chat.completions.create(
|
| 86 |
model="gpt-4o-mini",
|
|
|
|
| 100 |
finally:
|
| 101 |
_inflight.pop(prompt, None)
|
| 102 |
|
| 103 |
+
|
| 104 |
+
async def _batch_async(
|
| 105 |
+
lst: list[str], instruction: str, client: openai.AsyncOpenAI
|
| 106 |
+
) -> list[str]:
|
| 107 |
"""Vectorised helper — returns an output list matching *lst* length."""
|
| 108 |
out: list[str] = ["" for _ in lst]
|
| 109 |
idx, prompts = [], []
|
|
|
|
| 119 |
out[idx[j]] = val
|
| 120 |
return out
|
| 121 |
|
| 122 |
+
|
| 123 |
# -------- Core converter -----------------------------------------------------
|
| 124 |
DEFAULT_PREREQ = (
|
| 125 |
"No specific prerequisites are required for this course. Basic computer literacy and "
|
|
|
|
| 127 |
"learning experience."
|
| 128 |
)
|
| 129 |
|
| 130 |
+
|
| 131 |
def _read(path: str) -> pd.DataFrame:
|
| 132 |
if path.lower().endswith((".xlsx", ".xls")):
|
| 133 |
return pd.read_excel(path)
|
| 134 |
return pd.read_csv(path, encoding="latin1")
|
| 135 |
|
| 136 |
+
|
| 137 |
async def _enrich_dataframe(
|
| 138 |
df: pd.DataFrame, dcol: str, ocol: str, pcol: str, acol: str
|
| 139 |
) -> tuple[list[str], list[str], list[str], list[str], list[str]]:
|
|
|
|
| 162 |
),
|
| 163 |
)
|
| 164 |
|
| 165 |
+
# prerequisites
|
| 166 |
prereq_raw = df.get(pcol, "").fillna("").tolist()
|
| 167 |
fpre: list[str] = []
|
| 168 |
for req in prereq_raw:
|
|
|
|
| 178 |
|
| 179 |
return sdesc, ldesc, fobj, fout, fpre
|
| 180 |
|
| 181 |
+
|
| 182 |
def convert(path: str) -> BytesIO:
|
| 183 |
logos = {
|
| 184 |
"Amazon Web Services": "/wp-content/uploads/2025/04/aws.png",
|
| 185 |
+
"AWS": "/wp-content/uploads/2025/04/aws.png",
|
| 186 |
"Cisco": "/wp-content/uploads/2025/04/cisco-e1738593292198-1.webp",
|
| 187 |
"Microsoft": "/wp-content/uploads/2025/04/Microsoft-e1737494120985-1.png",
|
| 188 |
"Google Cloud": "/wp-content/uploads/2025/04/Google_Cloud.png",
|
|
|
|
| 208 |
sid = first_col("Course SID", "Course SID")
|
| 209 |
|
| 210 |
if dur not in df.columns:
|
| 211 |
+
df[dur] = ""
|
| 212 |
|
| 213 |
# ---------- LLM enrichment (async) -------------------------------------
|
| 214 |
sdesc, ldesc, fobj, fout, fpre = asyncio.run(
|
|
|
|
| 241 |
g["Course Start Time"], g["Course End Time"], g["Time Zone"]
|
| 242 |
)
|
| 243 |
),
|
| 244 |
+
include_groups=False,
|
| 245 |
)
|
| 246 |
.reset_index(name="Times")
|
| 247 |
)
|
|
|
|
| 376 |
out.seek(0)
|
| 377 |
return out
|
| 378 |
|
|
|
|
| 379 |
|
| 380 |
+
# -------- Gradio wrappers ----------------------------------------------------
|
| 381 |
def process_file(upload: gr.File) -> str:
|
| 382 |
csv_bytes = convert(upload.name)
|
| 383 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp:
|
|
|
|
| 385 |
path = tmp.name
|
| 386 |
return path
|
| 387 |
|
| 388 |
+
|
| 389 |
ui = gr.Interface(
|
| 390 |
fn=process_file,
|
| 391 |
inputs=gr.File(
|