Spaces:
Runtime error
Runtime error
import gradio as gr | |
import pandas as pd | |
import tempfile | |
import os | |
import json | |
import hashlib | |
import asyncio | |
from io import BytesIO | |
from pathlib import Path | |
import openai | |
import gradio_client.utils | |
"""NetCom → WooCommerce transformer (Try 1 schema) | |
================================================= | |
Drop a *Reseller Schedule* CSV and get back a WooCommerce‑ready CSV that matches | |
`Try 1 - WooCommerce_Mapped_Data__Fixed_Attributes_and_Agenda_.csv` exactly – | |
including `Stock` and `Sold individually?` columns that NetCom doesn’t supply. | |
Highlights | |
---------- | |
* Empty cells are skipped – no wasted GPT calls. | |
* GPT‑4o mini used with a tiny disk cache (`ai_response_cache/`). | |
* Brand → logo URLs hard‑coded below (update when media library changes). | |
""" | |
# --------------------------------------------------------------------------- | |
# Gradio JSON‑schema helper hot‑patch (bool schema bug) | |
# --------------------------------------------------------------------------- | |
_original = gradio_client.utils._json_schema_to_python_type | |
def _fixed_json_schema_to_python_type(schema, defs=None): | |
if isinstance(schema, bool): # gradio 4.29 bug | |
return "any" | |
return _original(schema, defs) | |
gradio_client.utils._json_schema_to_python_type = _fixed_json_schema_to_python_type # type: ignore | |
# --------------------------------------------------------------------------- | |
# Tiny disk cache for OpenAI responses | |
# --------------------------------------------------------------------------- | |
CACHE_DIR = Path("ai_response_cache"); CACHE_DIR.mkdir(exist_ok=True) | |
def _cache_path(prompt: str) -> Path: | |
return CACHE_DIR / f"{hashlib.md5(prompt.encode()).hexdigest()}.json" | |
def _get_cached(prompt: str): | |
try: | |
return json.loads(_cache_path(prompt).read_text("utf-8"))["response"] | |
except Exception: | |
return None | |
def _set_cache(prompt: str, rsp: str): | |
try: | |
_cache_path(prompt).write_text(json.dumps({"prompt": prompt, "response": rsp}), "utf-8") | |
except Exception: | |
pass | |
# --------------------------------------------------------------------------- | |
# Async GPT helpers | |
# --------------------------------------------------------------------------- | |
async def _gpt(client: openai.AsyncOpenAI, prompt: str) -> str: | |
cached = _get_cached(prompt) | |
if cached is not None: | |
return cached | |
try: | |
cmp = await client.chat.completions.create( | |
model="gpt-4o-mini", | |
messages=[{"role": "user", "content": prompt}], | |
temperature=0, | |
) | |
txt = cmp.choices[0].message.content | |
except Exception as e: | |
txt = f"Error: {e}" | |
_set_cache(prompt, txt) | |
return txt | |
async def _batch(texts: list[str], instruction: str) -> list[str]: | |
"""Return len(texts) list. Blank inputs remain blank.""" | |
res = ["" for _ in texts] | |
idx, prompts = [], [] | |
for i, t in enumerate(texts): | |
if isinstance(t, str) and t.strip(): | |
idx.append(i); prompts.append(f"{instruction}\n\nText: {t}") | |
if not prompts: | |
return res | |
client = openai.AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY")) | |
tasks = [_gpt(client, p) for p in prompts] | |
outs = await asyncio.gather(*tasks) | |
for k, v in enumerate(outs): | |
res[idx[k]] = v | |
return res | |
# --------------------------------------------------------------------------- | |
# Main converter | |
# --------------------------------------------------------------------------- | |
def process_woocommerce_data_in_memory(netcom_file): | |
"""Return BytesIO of Woo CSV.""" | |
# Brand logos | |
brand_logo_map = { | |
"Amazon Web Services": "/wp-content/uploads/2025/04/aws.png", | |
"Cisco": "/wp-content/uploads/2025/04/cisco-e1738593292198-1.webp", | |
"Microsoft": "/wp-content/uploads/2025/04/Microsoft-e1737494120985-1.png", | |
"Google Cloud": "/wp-content/uploads/2025/04/Google_Cloud.png", | |
"EC Council": "/wp-content/uploads/2025/04/Ec_Council.png", | |
"ITIL": "/wp-content/uploads/2025/04/ITIL.webp", | |
"PMI": "/wp-content/uploads/2025/04/PMI.png", | |
"Comptia": "/wp-content/uploads/2025/04/Comptia.png", | |
"Autodesk": "/wp-content/uploads/2025/04/autodesk.png", | |
"ISC2": "/wp-content/uploads/2025/04/ISC2.png", | |
"AICerts": "/wp-content/uploads/2025/04/aicerts-logo-1.png", | |
} | |
default_prereq = ( | |
"No specific prerequisites are required for this course. " | |
"Basic computer literacy and familiarity with fundamental concepts in the subject area are recommended for the best learning experience." | |
) | |
# Load NetCom CSV | |
df = pd.read_csv(netcom_file.name, encoding="latin1"); df.columns = df.columns.str.strip() | |
def _col(opts): | |
return next((c for c in opts if c in df.columns), None) | |
# Column aliases | |
col_desc = _col(["Description", "Decription"]) | |
col_obj = _col(["Objectives", "objectives"]) | |
col_pre = _col(["RequiredPrerequisite", "Required Pre-requisite"]) | |
col_out = _col(["Outline"]) | |
col_dur = _col(["Duration"]) | |
col_sid = _col(["Course SID", "Course SID"]) | |
if col_dur is None: | |
df["Duration"] = ""; col_dur = "Duration" | |
# AI prep lists | |
descs, objs, pres, outs = (df.get(c, pd.Series([""]*len(df))).fillna("").tolist() for c in (col_desc, col_obj, col_pre, col_out)) | |
loop = asyncio.new_event_loop(); asyncio.set_event_loop(loop) | |
short_d, long_d, fmt_obj, fmt_out = loop.run_until_complete(asyncio.gather( | |
_batch(descs, "Create a concise 250-character summary of this course description:"), | |
_batch(descs, "Condense this description to a maximum of 750 characters in paragraph format, with clean formatting:"), | |
_batch(objs, "Format these objectives into a bullet list with clean formatting. Start each bullet with '• ':"), | |
_batch(outs, "Format this agenda into a bullet list with clean formatting. Start each bullet with '• ':"), | |
)); loop.close() | |
fmt_pre = [default_prereq if not str(p).strip() else asyncio.run(_batch([p], "Format these prerequisites into a bullet list with clean formatting. Start each bullet with '• ':"))[0] for p in pres] | |
# Attach processed cols | |
df["Short_Description"] = short_d; df["Condensed_Description"] = long_d | |
df["Formatted_Objectives"] = fmt_obj; df["Formatted_Agenda"] = fmt_out; df["Formatted_Prerequisites"] = fmt_pre | |
# Dates | |
df["Course Start Date"] = pd.to_datetime(df["Course Start Date"], errors="coerce") | |
df["Date_fmt"] = df["Course Start Date"].dt.strftime("%-m/%-d/%Y") | |
df_sorted = df.sort_values(["Course ID", "Course Start Date"]) | |
date_agg = df_sorted.groupby("Course ID")["Date_fmt"].apply(lambda s: ",".join(s.dropna().unique())).reset_index(name="Aggregated_Dates") | |
time_agg = df_sorted.groupby("Course ID").apply(lambda g: ",".join(f"{st}-{et} {tz}" for st, et, tz in zip(g["Course Start Time"], g["Course End Time"], g["Time Zone"]))).reset_index(name="Aggregated_Times") | |
parents = df_sorted.drop_duplicates("Course ID").merge(date_agg).merge(time_agg) | |
# Parent rows | |
woo_parent = pd.DataFrame({ | |
"Type": "variable", | |
"SKU": parents["Course ID"], | |
"Name": parents["Course Name"], | |
"Published": 1, | |
"Visibility in catalog": "visible", | |
"Short description": parents["Short_Description"], | |
"Description": parents["Condensed_Description"], | |
"Tax status": "taxable", | |
"In stock?": 1, | |
"Stock": 1, | |
"Sold individually?": 1, | |
"Regular price": parents["SRP Pricing"].replace("[\\$,]", "", regex=True), | |
"Categories": "courses", | |
"Images": parents["Vendor"].map(brand_logo_map).fillna(""), | |
"Parent": "", | |
"Brands": parents["Vendor"], | |
# Attributes | |
"Attribute 1 name": "Date", "Attribute 1 value(s)": parents["Aggregated_Dates"], "Attribute 1 visible": "visible", "Attribute 1 global": 1, | |
"Attribute 2 name": "Location", "Attribute 2 value(s)": "Virtual", "Attribute 2 visible": "visible", "Attribute 2 global": 1, | |
"Attribute 3 name": "Time", "Attribute 3 value(s)": parents["Aggregated_Times"], "Attribute 3 visible": "visible", "Attribute 3 global": 1, | |
# Meta | |
"Meta: outline": parents["Formatted_Agenda"], "Meta: days": parents[col_dur], "Meta: location": "Virtual", | |
"Meta: overview": parents["Target Audience"], "Meta: objectives": parents["Formatted_Objectives"], | |
"Meta: prerequisites": parents["Formatted_Prerequisites"], "Meta: agenda": parents["Formatted_Agenda"], | |
}) | |
# Child rows | |
woo_child = pd.DataFrame({ | |
"Type": "variation, virtual", | |
"SKU": df_sorted[col_sid].astype(str).str.strip(), | |
"Name": df_sorted["Course Name"], | |
"Published": 1, | |
"Visibility in catalog": "visible", | |
"Short description": df_sorted["Short_Description"], | |
"Description": df_sorted["Condensed_Description"], | |
"Tax status": "taxable", | |
"In stock?": 1, | |
"Stock": 1, | |
"Sold individually?": 1, | |
"Regular price": df_sorted["SRP Pricing"].replace("[\\$,]", "", regex=True), | |
"Categories": "courses", | |
"Images": df_sorted["Vendor"].map(brand_logo_map).fillna(""), | |
"Parent": df_sorted["Course ID"], | |
"Brands": df_sorted["Vendor"], | |
"Attribute 1 name": "Date", "Attribute 1 value(s)": df_sorted["Date_fmt"], "Attribute 1 visible": "visible", "Attribute 1 global": 1, | |
"Attribute 2 name": "Location", "Attribute 2 value(s)": "Virtual", "Attribute 2 visible": "visible", "Attribute 2 global": 1, | |
"Attribute 3 name": "Time", "Attribute 3 value(s)": df_sorted.apply(lambda r: f"{r['Course Start Time']}-{r['Course End Time']} {r['Time Zone']}", axis=1), "Attribute 3 visible": "visible", "Attribute 3 global": 1, | |
"Meta: outline": df_sorted["Formatted_Agenda"], "Meta: days": df_sorted[col_dur], "Meta: location": "Virtual", | |
"Meta: overview": df_sorted["Target Audience"], "Meta: objectives": df_sorted["Formatted_Objectives"], | |
"Meta: prerequisites": df_sorted["Formatted_Prerequisites"], "Meta: agenda": df_sorted["Formatted_Agenda"], | |
}) | |
# Combine & order | |
combined = pd.concat([woo_parent, woo_child], ignore_index=True) | |
column_order = [ | |
"Type","SKU","Name","Published","Visibility in catalog","Short description","Description","Tax status","In stock?","Stock","Sold individually?","Regular price","Categories","Images","Parent","Brands", "Attribute 1 name","Attribute 1 value(s)","Attribute 1 visible","Attribute 1 global","Attribute 2 name","Attribute 2 value(s)","Attribute 2 visible","Attribute 2 global","Attribute 3 name","Attribute 3 value(s)","Attribute 3 visible","Attribute 3 global","Meta: outline","Meta: days","Meta: location","Meta: overview","Meta: objectives","Meta: prerequisites","Meta: agenda" | |
] | |
combined = combined[column_order] | |
buf = BytesIO(); combined.to_csv(buf, index=False, encoding="utf-8-sig"); buf.seek(0); return buf | |
# --------------------------------------------------------------------------- | |
# Gradio wrapper | |
# --------------------------------------------------------------------------- | |
def process_file(upload): | |
return process_woocommerce_data_in_memory(upload) | |
interface = gr.Interface( | |
fn=process_file, | |
inputs=gr.File(label="Upload NetCom CSV", file_types=[".csv"]), | |
outputs=gr.File(label="Download WooCommerce CSV"), | |
title="NetCom → WooCommerce CSV Processor", | |
description="Upload a NetCom Reseller Schedule CSV to generate a WooCommerce‑import CSV (Try 1 schema).", | |
analytics_enabled=False, | |
) | |
if __name__ == "__main__": | |
if not os.getenv("OPENAI_API_KEY"): | |
print("⚠️ OPENAI_API_KEY not set – AI paraphrasing will error out") | |
interface.launch() | |