Spaces:
Runtime error
Runtime error
import gradio as gr | |
import pandas as pd | |
import tempfile | |
import os | |
import json | |
import hashlib | |
import asyncio | |
from io import BytesIO | |
from pathlib import Path | |
import openai | |
import gradio_client.utils | |
"""NetCom → WooCommerce transformer (Try 1 schema) | |
================================================= | |
*Accept CSV **or** Excel schedule files and output the WooCommerce CSV.* | |
Fixes vs last run | |
----------------- | |
* Output written to a **temporary file path** (Gradio BytesIO bug fixed). | |
* **Excel upload** support. | |
* **Pandas future‑warning** silenced (`group_keys=False`). | |
""" | |
# -------- Gradio bool‑schema hot‑patch -------------------------------------- | |
_original = gradio_client.utils._json_schema_to_python_type | |
def _fixed_json_schema_to_python_type(schema, defs=None): | |
if isinstance(schema, bool): | |
return "any" | |
return _original(schema, defs) | |
gradio_client.utils._json_schema_to_python_type = _fixed_json_schema_to_python_type # type: ignore | |
# -------- Tiny disk cache ---------------------------------------------------- | |
CACHE_DIR = Path("ai_response_cache"); CACHE_DIR.mkdir(exist_ok=True) | |
def _cache_path(p: str): | |
return CACHE_DIR / f"{hashlib.md5(p.encode()).hexdigest()}.json" | |
def _get_cached(p: str): | |
try: | |
return json.loads(_cache_path(p).read_text("utf-8"))["response"] | |
except Exception: | |
return None | |
def _set_cache(p: str, r: str): | |
try: | |
_cache_path(p).write_text(json.dumps({"prompt": p, "response": r}), "utf-8") | |
except Exception: | |
pass | |
# -------- Async GPT helpers -------------------------------------------------- | |
async def _gpt(client, prompt): | |
c = _get_cached(prompt) | |
if c is not None: | |
return c | |
try: | |
msg = await client.chat.completions.create(model="gpt-4o-mini", messages=[{"role": "user", "content": prompt}], temperature=0) | |
text = msg.choices[0].message.content | |
except Exception as e: | |
text = f"Error: {e}" | |
_set_cache(prompt, text) | |
return text | |
async def _batch(lst, instr): | |
out = ["" for _ in lst]; idx,prompts=[],[] | |
for i,t in enumerate(lst): | |
if isinstance(t,str) and t.strip(): idx.append(i); prompts.append(f"{instr}\n\nText: {t}") | |
if not prompts: return out | |
client = openai.AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY")) | |
res = await asyncio.gather(*[_gpt(client,p) for p in prompts]) | |
for j,val in enumerate(res): out[idx[j]] = val | |
return out | |
# -------- Core converter ----------------------------------------------------- | |
def _read(path: str): | |
return pd.read_excel(path) if path.lower().endswith((".xlsx",".xls")) else pd.read_csv(path, encoding="latin1") | |
def convert(path: str) -> BytesIO: | |
logos = {"Amazon Web Services":"/wp-content/uploads/2025/04/aws.png","Cisco":"/wp-content/uploads/2025/04/cisco-e1738593292198-1.webp","Microsoft":"/wp-content/uploads/2025/04/Microsoft-e1737494120985-1.png","Google Cloud":"/wp-content/uploads/2025/04/Google_Cloud.png","EC Council":"/wp-content/uploads/2025/04/Ec_Council.png","ITIL":"/wp-content/uploads/2025/04/ITIL.webp","PMI":"/wp-content/uploads/2025/04/PMI.png","Comptia":"/wp-content/uploads/2025/04/Comptia.png","Autodesk":"/wp-content/uploads/2025/04/autodesk.png","ISC2":"/wp-content/uploads/2025/04/ISC2.png","AICerts":"/wp-content/uploads/2025/04/aicerts-logo-1.png"} | |
default_pre = "No specific prerequisites are required for this course. Basic computer literacy and familiarity with fundamental concepts in the subject area are recommended for the best learning experience." | |
df = _read(path); df.columns = df.columns.str.strip() | |
c = lambda *o: next((x for x in o if x in df.columns), None) | |
dcol, ocol, pcol, acol, dur, sid = c("Description","Decription"), c("Objectives","objectives"), c("RequiredPrerequisite","Required Pre-requisite"), c("Outline"), c("Duration"), c("Course SID","Course SID") | |
if dur is None: df["Duration"]=""; dur="Duration" | |
loop=asyncio.new_event_loop(); asyncio.set_event_loop(loop) | |
sdesc, ldesc, fobj, fout = loop.run_until_complete(asyncio.gather( | |
_batch(df.get(dcol,"").fillna("").tolist(), "Create a concise 250-character summary of this course description:"), | |
_batch(df.get(dcol,"").fillna("").tolist(), "Condense this description to a maximum of 750 characters in paragraph format, with clean formatting:"), | |
_batch(df.get(ocol,"").fillna("").tolist(), "Format these objectives into a bullet list with clean formatting. Start each bullet with '• ':") , | |
_batch(df.get(acol,"").fillna("").tolist(), "Format this agenda into a bullet list with clean formatting. Start each bullet with '• ':"))) | |
loop.close() | |
fpre=[default_pre if not str(p).strip() else asyncio.run(_batch([p],"Format these prerequisites into a bullet list with clean formatting. Start each bullet with '• ':"))[0] for p in df.get(pcol,"").fillna("").tolist()] | |
df["Short_Description"],df["Condensed_Description"],df["Formatted_Objectives"],df["Formatted_Agenda"],df["Formatted_Prerequisites"] = sdesc,ldesc,fobj,fout,fpre | |
df["Course Start Date"] = pd.to_datetime(df["Course Start Date"], errors="coerce") | |
df["Date_fmt"] = df["Course Start Date"].dt.strftime("%-m/%-d/%Y") | |
dsorted=df.sort_values(["Course ID","Course Start Date"]) | |
d_agg = dsorted.groupby("Course ID")["Date_fmt"].apply(lambda s: ",".join(s.dropna().unique())).reset_index(name="Dates") | |
t_agg = dsorted.groupby("Course ID",group_keys=False).apply(lambda g: ",".join(f"{st}-{et} {tz}" for st,et,tz in zip(g["Course Start Time"],g["Course End Time"],g["Time Zone"]))).reset_index(name="Times") | |
parents = dsorted.drop_duplicates("Course ID").merge(d_agg).merge(t_agg) | |
parent = pd.DataFrame({ | |
"Type":"variable","SKU":parents["Course ID"],"Name":parents["Course Name"],"Published":1,"Visibility in catalog":"visible","Short description":parents["Short_Description"],"Description":parents["Condensed_Description"],"Tax status":"taxable","In stock?":1,"Stock":1,"Sold individually?":1,"Regular price":parents["SRP Pricing"].replace("[\\$,]","",regex=True),"Categories":"courses","Images":parents["Vendor"].map(logos).fillna(""),"Parent":"","Brands":parents["Vendor"],"Attribute 1 name":"Date","Attribute 1 value(s)":parents["Dates"],"Attribute 1 visible":"visible","Attribute 1 global":1,"Attribute 2 name":"Location","Attribute 2 value(s)":"Virtual","Attribute 2 visible":"visible","Attribute 2 global":1,"Attribute 3 name":"Time","Attribute 3 value(s)":parents["Times"],"Attribute 3 visible":"visible","Attribute 3 global":1,"Meta: outline":parents["Formatted_Agenda"],"Meta: days":parents[dur],"Meta: location":"Virtual","Meta: overview":parents["Target Audience"],"Meta: objectives":parents["Formatted_Objectives"],"Meta: prerequisites":parents["Formatted_Prerequisites"],"Meta: agenda":parents["Formatted_Agenda"]}) | |
child = pd.DataFrame({ | |
"Type":"variation, virtual","SKU":dsorted[sid].astype(str).str.strip(),"Name":dsorted["Course Name"],"Published":1,"Visibility in catalog":"visible","Short description":dsorted["Short_Description"],"Description":dsorted["Condensed_Description"],"Tax status":"taxable","In stock?":1,"Stock":1,"Sold individually?":1,"Regular price":dsorted["SRP Pricing"].replace("[\\$,]","",regex=True),"Categories":"courses","Images":dsorted["Vendor"].map(logos).fillna(""),"Parent":dsorted["Course ID"],"Brands":dsorted["Vendor"],"Attribute 1 name":"Date","Attribute 1 value(s)":dsorted["Date_fmt"],"Attribute 1 visible":"visible","Attribute 1 global":1,"Attribute 2 name":"Location","Attribute 2 value(s)":"Virtual","Attribute 2 visible":"visible","Attribute 2 global":1,"Attribute 3 name":"Time","Attribute 3 value(s)":dsorted.apply(lambda r:f"{r['Course Start Time']}-{r['Course End Time']} {r['Time Zone']}",axis=1),"Attribute 3 visible":"visible","Attribute 3 global":1,"Meta: outline":dsorted["Formatted_Agenda"],"Meta: days":dsorted[dur],"Meta: location":"Virtual","Meta: overview":dsorted["Target Audience"],"Meta: objectives":dsorted["Formatted_Objectives"],"Meta: prerequisites":dsorted["Formatted_Prerequisites"],"Meta: agenda":dsorted["Formatted_Agenda"]}) | |
all_rows = pd.concat([parent,child],ignore_index=True) | |
order=["Type","SKU","Name","Published","Visibility in catalog","Short description","Description","Tax status","In stock?","Stock","Sold individually?","Regular price","Categories","Images","Parent","Brands","Attribute 1 name","Attribute 1 value(s)","Attribute 1 visible","Attribute 1 global","Attribute 2 name","Attribute 2 value(s)","Attribute 2 visible","Attribute 2 global","Attribute 3 name","Attribute 3 value(s)","Attribute 3 visible","Attribute 3 global","Meta: outline","Meta: days","Meta: location","Meta: overview","Meta: objectives","Meta: prerequisites","Meta: agenda"] | |
out=BytesIO(); all_rows[order].to_csv(out,index=False,encoding="utf-8-sig"); out.seek(0); return out | |
# -------- Gradio wrappers ---------------------------------------------------- | |
def process_file(upload): | |
csv_bytes = convert(upload.name) | |
with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp: | |
tmp.write(csv_bytes.getvalue()); path = tmp.name | |
return path | |
ui = gr.Interface( | |
fn=process_file, | |
inputs=gr.File(label="Upload NetCom CSV / Excel", file_types=[".csv",".xlsx",".xls"]), | |
outputs=gr.File(label="Download WooCommerce CSV"), | |
title="NetCom → WooCommerce CSV Processor", | |
description="Upload NetCom schedule (.csv/.xlsx) to get the Try 1‑formatted WooCommerce CSV.", | |
analytics_enabled=False, | |
) | |
if __name__ == "__main__": | |
if not os.getenv("OPENAI_API_KEY"): | |
print("⚠️ OPENAI_API_KEY not set – AI features will error") | |
ui.launch() | |