Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,12 +1,17 @@
|
|
1 |
-
"""NetCom β WooCommerce transformer (Try 2 schema β 100-parallel,
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
|
|
|
|
|
|
|
|
|
|
10 |
"""
|
11 |
|
12 |
from __future__ import annotations
|
@@ -24,58 +29,46 @@ import gradio_client.utils
|
|
24 |
import openai
|
25 |
import pandas as pd
|
26 |
|
27 |
-
#
|
28 |
_original = gradio_client.utils._json_schema_to_python_type
|
29 |
-
|
30 |
-
|
31 |
def _fixed_json_schema_to_python_type(schema, defs=None): # type: ignore
|
32 |
if isinstance(schema, bool):
|
33 |
return "any"
|
34 |
return _original(schema, defs)
|
|
|
35 |
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
)
|
40 |
-
|
41 |
-
# -------- Tiny disk cache ----------------------------------------------------
|
42 |
-
CACHE_DIR = Path("ai_response_cache")
|
43 |
-
CACHE_DIR.mkdir(exist_ok=True)
|
44 |
-
|
45 |
|
46 |
def _cache_path(p: str) -> Path:
|
47 |
return CACHE_DIR / f"{hashlib.md5(p.encode()).hexdigest()}.json"
|
48 |
|
49 |
-
|
50 |
def _get_cached(p: str) -> str | None:
|
51 |
try:
|
52 |
return json.loads(_cache_path(p).read_text("utf-8"))["response"]
|
53 |
except Exception:
|
54 |
return None
|
55 |
|
56 |
-
|
57 |
def _set_cache(p: str, r: str) -> None:
|
58 |
try:
|
59 |
_cache_path(p).write_text(json.dumps({"prompt": p, "response": r}), "utf-8")
|
60 |
except Exception:
|
61 |
pass
|
62 |
|
63 |
-
|
64 |
-
#
|
65 |
-
_SEM = asyncio.Semaphore(100) # β€100 concurrent OpenAI calls
|
66 |
_inflight: dict[str, asyncio.Future] = {} # prompt β Future
|
67 |
|
68 |
-
|
69 |
async def _gpt_async(client: openai.AsyncOpenAI, prompt: str) -> str:
|
70 |
-
"""Single LLM call with cache, concurrency cap, and de-duplication."""
|
71 |
cached = _get_cached(prompt)
|
72 |
if cached is not None:
|
73 |
return cached
|
74 |
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
return await existing
|
79 |
|
80 |
loop = asyncio.get_running_loop()
|
81 |
|
@@ -100,12 +93,8 @@ async def _gpt_async(client: openai.AsyncOpenAI, prompt: str) -> str:
|
|
100 |
finally:
|
101 |
_inflight.pop(prompt, None)
|
102 |
|
103 |
-
|
104 |
-
|
105 |
-
lst: list[str], instruction: str, client: openai.AsyncOpenAI
|
106 |
-
) -> list[str]:
|
107 |
-
"""Vectorised helper β returns an output list matching *lst* length."""
|
108 |
-
out: list[str] = ["" for _ in lst]
|
109 |
idx, prompts = [], []
|
110 |
for i, txt in enumerate(lst):
|
111 |
if isinstance(txt, str) and txt.strip():
|
@@ -119,109 +108,122 @@ async def _batch_async(
|
|
119 |
out[idx[j]] = val
|
120 |
return out
|
121 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
122 |
|
123 |
-
# -------- Core converter -----------------------------------------------------
|
124 |
DEFAULT_PREREQ = (
|
125 |
"No specific prerequisites are required for this course. Basic computer literacy and "
|
126 |
"familiarity with fundamental concepts in the subject area are recommended for the best "
|
127 |
"learning experience."
|
128 |
)
|
129 |
|
130 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
def _read(path: str) -> pd.DataFrame:
|
132 |
if path.lower().endswith((".xlsx", ".xls")):
|
133 |
return pd.read_excel(path)
|
134 |
return pd.read_csv(path, encoding="latin1")
|
135 |
|
136 |
-
|
137 |
-
async def _enrich_dataframe(
|
138 |
-
df: pd.DataFrame, dcol: str, ocol: str, pcol: str, acol: str
|
139 |
-
) -> tuple[list[str], list[str], list[str], list[str], list[str]]:
|
140 |
-
"""Run all LLM batches concurrently and return the five enrichment columns."""
|
141 |
async with openai.AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY")) as client:
|
142 |
sdesc, ldesc, fobj, fout = await asyncio.gather(
|
143 |
-
_batch_async(
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
),
|
148 |
-
_batch_async(
|
149 |
-
df.get(dcol, "").fillna("").tolist(),
|
150 |
-
"Condense this description to a maximum of 750 characters in paragraph format, with clean formatting:",
|
151 |
-
client,
|
152 |
-
),
|
153 |
-
_batch_async(
|
154 |
-
df.get(ocol, "").fillna("").tolist(),
|
155 |
-
"Format these objectives into a bullet list with clean formatting. Start each bullet with 'β’ ':",
|
156 |
-
client,
|
157 |
-
),
|
158 |
-
_batch_async(
|
159 |
-
df.get(acol, "").fillna("").tolist(),
|
160 |
-
"Format this agenda into a bullet list with clean formatting. Start each bullet with 'β’ ':",
|
161 |
-
client,
|
162 |
-
),
|
163 |
)
|
164 |
|
165 |
-
# prerequisites
|
166 |
prereq_raw = df.get(pcol, "").fillna("").tolist()
|
167 |
-
fpre
|
168 |
for req in prereq_raw:
|
169 |
if not str(req).strip():
|
170 |
fpre.append(DEFAULT_PREREQ)
|
171 |
else:
|
172 |
-
|
173 |
-
|
174 |
-
"Format these prerequisites into a bullet list with clean formatting. Start each bullet with 'β’ ':",
|
175 |
-
client,
|
176 |
-
)
|
177 |
-
fpre.append(formatted[0])
|
178 |
|
179 |
return sdesc, ldesc, fobj, fout, fpre
|
180 |
|
181 |
-
|
182 |
-
def convert(
|
183 |
-
|
184 |
-
"Amazon Web Services": "/wp-content/uploads/2025/04/aws.png",
|
185 |
-
"AWS": "/wp-content/uploads/2025/04/aws.png",
|
186 |
-
"Cisco": "/wp-content/uploads/2025/04/cisco-e1738593292198-1.webp",
|
187 |
-
"Microsoft": "/wp-content/uploads/2025/04/Microsoft-e1737494120985-1.png",
|
188 |
-
"Google Cloud": "/wp-content/uploads/2025/04/Google_Cloud.png",
|
189 |
-
"EC Council": "/wp-content/uploads/2025/04/Ec_Council.png",
|
190 |
-
"ITIL": "/wp-content/uploads/2025/04/ITIL.webp",
|
191 |
-
"PMI": "/wp-content/uploads/2025/04/PMI.png",
|
192 |
-
"Comptia": "/wp-content/uploads/2025/04/Comptia.png",
|
193 |
-
"Autodesk": "/wp-content/uploads/2025/04/autodesk.png",
|
194 |
-
"ISC2": "/wp-content/uploads/2025/04/ISC2.png",
|
195 |
-
"AICerts": "/wp-content/uploads/2025/04/aicerts-logo-1.png",
|
196 |
-
}
|
197 |
-
|
198 |
-
df = _read(path)
|
199 |
df.columns = df.columns.str.strip()
|
200 |
|
201 |
-
first_col = lambda *
|
202 |
-
|
203 |
dcol = first_col("Description", "Decription")
|
204 |
ocol = first_col("Objectives", "objectives")
|
205 |
pcol = first_col("RequiredPrerequisite", "Required Pre-requisite")
|
206 |
acol = first_col("Outline")
|
207 |
-
dur
|
208 |
-
sid
|
209 |
|
210 |
if dur not in df.columns:
|
211 |
df[dur] = ""
|
212 |
|
213 |
-
#
|
|
|
|
|
|
|
|
|
214 |
sdesc, ldesc, fobj, fout, fpre = asyncio.run(
|
215 |
_enrich_dataframe(df, dcol, ocol, pcol, acol)
|
216 |
)
|
|
|
|
|
|
|
|
|
|
|
217 |
|
218 |
-
|
219 |
-
df["Condensed_Description"] = ldesc
|
220 |
-
df["Formatted_Objectives"] = fobj
|
221 |
-
df["Formatted_Agenda"] = fout
|
222 |
-
df["Formatted_Prerequisites"] = fpre
|
223 |
-
|
224 |
-
# ---------- Schedule aggregation --------------------------------------
|
225 |
df["Course Start Date"] = pd.to_datetime(df["Course Start Date"], errors="coerce")
|
226 |
df["Date_fmt"] = df["Course Start Date"].dt.strftime("%-m/%-d/%Y")
|
227 |
|
@@ -231,7 +233,6 @@ def convert(path: str) -> BytesIO:
|
|
231 |
.apply(lambda s: ",".join(s.dropna().unique()))
|
232 |
.reset_index(name="Dates")
|
233 |
)
|
234 |
-
|
235 |
t_agg = (
|
236 |
dsorted.groupby("Course ID", group_keys=False)
|
237 |
.apply(
|
@@ -245,10 +246,8 @@ def convert(path: str) -> BytesIO:
|
|
245 |
)
|
246 |
.reset_index(name="Times")
|
247 |
)
|
248 |
-
|
249 |
parents = dsorted.drop_duplicates("Course ID").merge(d_agg).merge(t_agg)
|
250 |
|
251 |
-
# ---------- Parent / child product rows --------------------------------
|
252 |
parent = pd.DataFrame(
|
253 |
{
|
254 |
"Type": "variable",
|
@@ -334,66 +333,39 @@ def convert(path: str) -> BytesIO:
|
|
334 |
|
335 |
all_rows = pd.concat([parent, child], ignore_index=True)
|
336 |
order = [
|
337 |
-
"Type",
|
338 |
-
"
|
339 |
-
"
|
340 |
-
"
|
341 |
-
"
|
342 |
-
"
|
343 |
-
"
|
344 |
-
"Tax status",
|
345 |
-
"In stock?",
|
346 |
-
"Stock",
|
347 |
-
"Sold individually?",
|
348 |
-
"Regular price",
|
349 |
-
"Categories",
|
350 |
-
"Images",
|
351 |
-
"Parent",
|
352 |
-
"Brands",
|
353 |
-
"Attribute 1 name",
|
354 |
-
"Attribute 1 value(s)",
|
355 |
-
"Attribute 1 visible",
|
356 |
-
"Attribute 1 global",
|
357 |
-
"Attribute 2 name",
|
358 |
-
"Attribute 2 value(s)",
|
359 |
-
"Attribute 2 visible",
|
360 |
-
"Attribute 2 global",
|
361 |
-
"Attribute 3 name",
|
362 |
-
"Attribute 3 value(s)",
|
363 |
-
"Attribute 3 visible",
|
364 |
-
"Attribute 3 global",
|
365 |
-
"Meta: outline",
|
366 |
-
"Meta: days",
|
367 |
-
"Meta: location",
|
368 |
-
"Meta: overview",
|
369 |
-
"Meta: objectives",
|
370 |
-
"Meta: prerequisites",
|
371 |
-
"Meta: agenda",
|
372 |
]
|
373 |
-
|
374 |
out = BytesIO()
|
375 |
all_rows[order].to_csv(out, index=False, encoding="utf-8-sig")
|
376 |
out.seek(0)
|
377 |
return out
|
378 |
|
379 |
-
|
380 |
-
|
381 |
-
|
382 |
-
csv_bytes = convert(upload.name)
|
383 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp:
|
384 |
tmp.write(csv_bytes.getvalue())
|
385 |
-
|
386 |
-
return path
|
387 |
-
|
388 |
|
389 |
ui = gr.Interface(
|
390 |
-
fn=
|
391 |
-
inputs=
|
392 |
-
label="Upload NetCom
|
393 |
-
|
|
|
394 |
outputs=gr.File(label="Download WooCommerce CSV"),
|
395 |
title="NetCom β WooCommerce CSV Processor (Try 2)",
|
396 |
-
description=
|
|
|
|
|
|
|
|
|
397 |
analytics_enabled=False,
|
398 |
)
|
399 |
|
|
|
1 |
+
"""NetCom β WooCommerce transformer (Try 2 schema β persistent cache, 100-parallel,
|
2 |
+
duplicate-safe, relative-logo paths, cache-preload)
|
3 |
+
==================================================================================
|
4 |
+
|
5 |
+
*Accept a NetCom schedule (CSV/XLSX) and **optionally** a *previous* WooCommerce
|
6 |
+
CSV; output the fresh WooCommerce CSV.*
|
7 |
+
|
8 |
+
New in this revision
|
9 |
+
--------------------
|
10 |
+
* **Relative** image paths kept (WooCommerce resolves them to your own domain).
|
11 |
+
* Second optional file-input lets you *pre-load* the on-disk cache from a prior
|
12 |
+
run, so already-processed courses skip OpenAI completely.
|
13 |
+
* Everything else (persistent cache in `/data`, 100-parallel semaphore,
|
14 |
+
in-flight de-duplication, pandas compatibility fix) remains unchanged.
|
15 |
"""
|
16 |
|
17 |
from __future__ import annotations
|
|
|
29 |
import openai
|
30 |
import pandas as pd
|
31 |
|
32 |
+
# ββ Gradio bool-schema hot-patch βββββββββββββββββββββββββββββββββββββββββββββ
|
33 |
_original = gradio_client.utils._json_schema_to_python_type
|
|
|
|
|
34 |
def _fixed_json_schema_to_python_type(schema, defs=None): # type: ignore
|
35 |
if isinstance(schema, bool):
|
36 |
return "any"
|
37 |
return _original(schema, defs)
|
38 |
+
gradio_client.utils._json_schema_to_python_type = _fixed_json_schema_to_python_type # type: ignore
|
39 |
|
40 |
+
# ββ Persistent disk cache (HF Spaces uses /data) βββββββββββββββββββββββββββββ
|
41 |
+
_PERSISTENT_ROOT = Path("/data")
|
42 |
+
CACHE_DIR = (_PERSISTENT_ROOT if _PERSISTENT_ROOT.exists() else Path(".")) / "ai_response_cache"
|
43 |
+
CACHE_DIR.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
|
|
|
|
|
44 |
|
45 |
def _cache_path(p: str) -> Path:
|
46 |
return CACHE_DIR / f"{hashlib.md5(p.encode()).hexdigest()}.json"
|
47 |
|
|
|
48 |
def _get_cached(p: str) -> str | None:
|
49 |
try:
|
50 |
return json.loads(_cache_path(p).read_text("utf-8"))["response"]
|
51 |
except Exception:
|
52 |
return None
|
53 |
|
|
|
54 |
def _set_cache(p: str, r: str) -> None:
|
55 |
try:
|
56 |
_cache_path(p).write_text(json.dumps({"prompt": p, "response": r}), "utf-8")
|
57 |
except Exception:
|
58 |
pass
|
59 |
|
60 |
+
# ββ OpenAI helpers: 100-parallel + de-dup ββββββββββββββββββββββββββββββββββββ
|
61 |
+
_SEM = asyncio.Semaphore(100) # β€100 concurrent OpenAI calls
|
|
|
62 |
_inflight: dict[str, asyncio.Future] = {} # prompt β Future
|
63 |
|
|
|
64 |
async def _gpt_async(client: openai.AsyncOpenAI, prompt: str) -> str:
|
|
|
65 |
cached = _get_cached(prompt)
|
66 |
if cached is not None:
|
67 |
return cached
|
68 |
|
69 |
+
running = _inflight.get(prompt)
|
70 |
+
if running is not None:
|
71 |
+
return await running
|
|
|
72 |
|
73 |
loop = asyncio.get_running_loop()
|
74 |
|
|
|
93 |
finally:
|
94 |
_inflight.pop(prompt, None)
|
95 |
|
96 |
+
async def _batch_async(lst, instruction: str, client):
|
97 |
+
out = ["" for _ in lst]
|
|
|
|
|
|
|
|
|
98 |
idx, prompts = [], []
|
99 |
for i, txt in enumerate(lst):
|
100 |
if isinstance(txt, str) and txt.strip():
|
|
|
108 |
out[idx[j]] = val
|
109 |
return out
|
110 |
|
111 |
+
# ββ Instructions (reuse across preload & gen) ββββββββββββββββββββββββββββββββ
|
112 |
+
DESC_SHORT = "Create a concise 250-character summary of this course description:"
|
113 |
+
DESC_LONG = "Condense this description to a maximum of 750 characters in paragraph format, with clean formatting:"
|
114 |
+
OBJECTIVES = "Format these objectives into a bullet list with clean formatting. Start each bullet with 'β’ ':"
|
115 |
+
AGENDA = "Format this agenda into a bullet list with clean formatting. Start each bullet with 'β’ ':"
|
116 |
+
PREREQ = "Format these prerequisites into a bullet list with clean formatting. Start each bullet with 'β’ ':"
|
117 |
+
|
118 |
+
# ββ Logo map (relative paths, with common aliases) βββββββββββββββββββββββββββ
|
119 |
+
logos = {
|
120 |
+
"Amazon Web Services": "/wp-content/uploads/2025/04/aws.png",
|
121 |
+
"AWS": "/wp-content/uploads/2025/04/aws.png",
|
122 |
+
"Cisco": "/wp-content/uploads/2025/04/cisco-e1738593292198-1.webp",
|
123 |
+
"Microsoft": "/wp-content/uploads/2025/04/Microsoft-e1737494120985-1.png",
|
124 |
+
"Google Cloud": "/wp-content/uploads/2025/04/Google_Cloud.png",
|
125 |
+
"EC Council": "/wp-content/uploads/2025/04/Ec_Council.png",
|
126 |
+
"ITIL": "/wp-content/uploads/2025/04/ITIL.webp",
|
127 |
+
"PMI": "/wp-content/uploads/2025/04/PMI.png",
|
128 |
+
"Comptia": "/wp-content/uploads/2025/04/Comptia.png",
|
129 |
+
"Autodesk": "/wp-content/uploads/2025/04/autodesk.png",
|
130 |
+
"ISC2": "/wp-content/uploads/2025/04/ISC2.png",
|
131 |
+
"AICerts": "/wp-content/uploads/2025/04/aicerts-logo-1.png",
|
132 |
+
}
|
133 |
|
|
|
134 |
DEFAULT_PREREQ = (
|
135 |
"No specific prerequisites are required for this course. Basic computer literacy and "
|
136 |
"familiarity with fundamental concepts in the subject area are recommended for the best "
|
137 |
"learning experience."
|
138 |
)
|
139 |
|
140 |
+
# ββ Cache-preload from previous WooCommerce CSV ββββββββββββββββββββββββββββββ
|
141 |
+
def _preload_cache(prev_csv: str, df_new: pd.DataFrame, dcol, ocol, pcol, acol):
|
142 |
+
"""Seed the on-disk cache with completions from an earlier WooCommerce CSV."""
|
143 |
+
try:
|
144 |
+
prev = pd.read_csv(prev_csv, encoding="utf-8-sig")
|
145 |
+
except Exception:
|
146 |
+
return
|
147 |
+
|
148 |
+
prev_parent = prev[prev["Type"].str.startswith("variable", na=False)]
|
149 |
+
prev_map = {row["SKU"]: row for _, row in prev_parent.iterrows()} # SKU == Course ID
|
150 |
+
|
151 |
+
for _, row in df_new.iterrows():
|
152 |
+
cid = row["Course ID"]
|
153 |
+
if cid not in prev_map:
|
154 |
+
continue
|
155 |
+
old = prev_map[cid]
|
156 |
+
|
157 |
+
desc = str(row[dcol])
|
158 |
+
obj = str(row[ocol])
|
159 |
+
ag = str(row[acol])
|
160 |
+
pre = str(row[pcol])
|
161 |
+
|
162 |
+
_set_cache(f"{DESC_SHORT}\n\nText: {desc}", old.get("Short description", ""))
|
163 |
+
_set_cache(f"{DESC_LONG}\n\nText: {desc}", old.get("Description", ""))
|
164 |
+
_set_cache(f"{OBJECTIVES}\n\nText: {obj}", old.get("Meta: objectives", ""))
|
165 |
+
_set_cache(f"{AGENDA}\n\nText: {ag}", old.get("Meta: agenda", ""))
|
166 |
+
if pre.strip():
|
167 |
+
_set_cache(f"{PREREQ}\n\nText: {pre}", old.get("Meta: prerequisites", ""))
|
168 |
+
|
169 |
+
# ββ Helper: read user file (CSV or Excel) ββββββββββββββββββββββββββββββββββββ
|
170 |
def _read(path: str) -> pd.DataFrame:
|
171 |
if path.lower().endswith((".xlsx", ".xls")):
|
172 |
return pd.read_excel(path)
|
173 |
return pd.read_csv(path, encoding="latin1")
|
174 |
|
175 |
+
# ββ Enrichment step (async batched LLM) ββββββββββββββββββββββββββββββββββββββ
|
176 |
+
async def _enrich_dataframe(df, dcol, ocol, pcol, acol):
|
|
|
|
|
|
|
177 |
async with openai.AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY")) as client:
|
178 |
sdesc, ldesc, fobj, fout = await asyncio.gather(
|
179 |
+
_batch_async(df.get(dcol, "").fillna("").tolist(), DESC_SHORT, client),
|
180 |
+
_batch_async(df.get(dcol, "").fillna("").tolist(), DESC_LONG, client),
|
181 |
+
_batch_async(df.get(ocol, "").fillna("").tolist(), OBJECTIVES, client),
|
182 |
+
_batch_async(df.get(acol, "").fillna("").tolist(), AGENDA, client),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
183 |
)
|
184 |
|
|
|
185 |
prereq_raw = df.get(pcol, "").fillna("").tolist()
|
186 |
+
fpre = []
|
187 |
for req in prereq_raw:
|
188 |
if not str(req).strip():
|
189 |
fpre.append(DEFAULT_PREREQ)
|
190 |
else:
|
191 |
+
out = await _batch_async([req], PREREQ, client)
|
192 |
+
fpre.append(out[0])
|
|
|
|
|
|
|
|
|
193 |
|
194 |
return sdesc, ldesc, fobj, fout, fpre
|
195 |
|
196 |
+
# ββ Main converter βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
197 |
+
def convert(schedule_path: str, prev_csv_path: str | None = None) -> BytesIO:
|
198 |
+
df = _read(schedule_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
199 |
df.columns = df.columns.str.strip()
|
200 |
|
201 |
+
first_col = lambda *cand: next((c for c in cand if c in df.columns), None)
|
|
|
202 |
dcol = first_col("Description", "Decription")
|
203 |
ocol = first_col("Objectives", "objectives")
|
204 |
pcol = first_col("RequiredPrerequisite", "Required Pre-requisite")
|
205 |
acol = first_col("Outline")
|
206 |
+
dur = first_col("Duration") or "Duration"
|
207 |
+
sid = first_col("Course SID", "Course SID")
|
208 |
|
209 |
if dur not in df.columns:
|
210 |
df[dur] = ""
|
211 |
|
212 |
+
# optional cache preload
|
213 |
+
if prev_csv_path:
|
214 |
+
_preload_cache(prev_csv_path, df, dcol, ocol, pcol, acol)
|
215 |
+
|
216 |
+
# async-enrich via LLM
|
217 |
sdesc, ldesc, fobj, fout, fpre = asyncio.run(
|
218 |
_enrich_dataframe(df, dcol, ocol, pcol, acol)
|
219 |
)
|
220 |
+
df["Short_Description"] = sdesc
|
221 |
+
df["Condensed_Description"] = ldesc
|
222 |
+
df["Formatted_Objectives"] = fobj
|
223 |
+
df["Formatted_Agenda"] = fout
|
224 |
+
df["Formatted_Prerequisites"]= fpre
|
225 |
|
226 |
+
# schedule aggregation
|
|
|
|
|
|
|
|
|
|
|
|
|
227 |
df["Course Start Date"] = pd.to_datetime(df["Course Start Date"], errors="coerce")
|
228 |
df["Date_fmt"] = df["Course Start Date"].dt.strftime("%-m/%-d/%Y")
|
229 |
|
|
|
233 |
.apply(lambda s: ",".join(s.dropna().unique()))
|
234 |
.reset_index(name="Dates")
|
235 |
)
|
|
|
236 |
t_agg = (
|
237 |
dsorted.groupby("Course ID", group_keys=False)
|
238 |
.apply(
|
|
|
246 |
)
|
247 |
.reset_index(name="Times")
|
248 |
)
|
|
|
249 |
parents = dsorted.drop_duplicates("Course ID").merge(d_agg).merge(t_agg)
|
250 |
|
|
|
251 |
parent = pd.DataFrame(
|
252 |
{
|
253 |
"Type": "variable",
|
|
|
333 |
|
334 |
all_rows = pd.concat([parent, child], ignore_index=True)
|
335 |
order = [
|
336 |
+
"Type","SKU","Name","Published","Visibility in catalog","Short description","Description",
|
337 |
+
"Tax status","In stock?","Stock","Sold individually?","Regular price","Categories","Images",
|
338 |
+
"Parent","Brands","Attribute 1 name","Attribute 1 value(s)","Attribute 1 visible","Attribute 1 global",
|
339 |
+
"Attribute 2 name","Attribute 2 value(s)","Attribute 2 visible","Attribute 2 global",
|
340 |
+
"Attribute 3 name","Attribute 3 value(s)","Attribute 3 visible","Attribute 3 global",
|
341 |
+
"Meta: outline","Meta: days","Meta: location","Meta: overview","Meta: objectives",
|
342 |
+
"Meta: prerequisites","Meta: agenda",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
343 |
]
|
|
|
344 |
out = BytesIO()
|
345 |
all_rows[order].to_csv(out, index=False, encoding="utf-8-sig")
|
346 |
out.seek(0)
|
347 |
return out
|
348 |
|
349 |
+
# ββ Gradio interface βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
350 |
+
def process_files(schedule: gr.File, previous: gr.File | None) -> str:
|
351 |
+
csv_bytes = convert(schedule.name, previous.name if previous else None)
|
|
|
352 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp:
|
353 |
tmp.write(csv_bytes.getvalue())
|
354 |
+
return tmp.name
|
|
|
|
|
355 |
|
356 |
ui = gr.Interface(
|
357 |
+
fn=process_files,
|
358 |
+
inputs=[
|
359 |
+
gr.File(label="Upload NetCom schedule (.csv/.xlsx/.xls)", file_types=[".csv", ".xlsx", ".xls"]),
|
360 |
+
gr.File(label="Previous WooCommerce CSV (optional)", file_types=[".csv"], optional=True),
|
361 |
+
],
|
362 |
outputs=gr.File(label="Download WooCommerce CSV"),
|
363 |
title="NetCom β WooCommerce CSV Processor (Try 2)",
|
364 |
+
description=(
|
365 |
+
"1. Upload the **latest NetCom schedule** file.\n"
|
366 |
+
"2. *(Optional)* Upload the **WooCommerce CSV** generated by a previous run to "
|
367 |
+
"pre-load the cache and skip already-processed courses."
|
368 |
+
),
|
369 |
analytics_enabled=False,
|
370 |
)
|
371 |
|