Spaces:

codys12
/

NetCom-to-WooComerce

Runtime error

App Files Files Community

codys12 commited on May 13

Commit

f86c87e

verified ·

1 Parent(s): 8b2b5a0

Update app.py

Browse files

Files changed (1) hide show

app.py +312 -362

app.py CHANGED Viewed

@@ -1,413 +1,363 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-"""
-*NetCom → WooCommerce CSV/Excel Processor*
-Robust edition – catches and logs every recoverable error so one failure never
-brings the whole pipeline down.  Only small, surgical changes were made.
-"""
 import gradio as gr
 import pandas as pd
 import tempfile
-import os, sys, json, re, hashlib, asyncio, aiohttp, traceback
 from io import BytesIO
 from pathlib import Path
 from functools import lru_cache
-import openai
-import gradio_client.utils
-# ──────────────────────────────  HELPERS  ──────────────────────────────
-def _log(err: Exception, msg: str = ""):
-    """Log errors without stopping execution."""
-    print(f"[WARN] {msg}: {err}", file=sys.stderr)
-    traceback.print_exception(err)
-# Patch: tolerate bad JSON-schemas produced by some OpenAI tools
 _original_json_schema_to_python_type = gradio_client.utils._json_schema_to_python_type
 def _fixed_json_schema_to_python_type(schema, defs=None):
-    try:
-        if isinstance(schema, bool):
-            return "any"
-        return _original_json_schema_to_python_type(schema, defs)
-    except Exception as e:  # last-chance fallback
-        _log(e, "json_schema_to_python_type failed")
         return "any"
 gradio_client.utils._json_schema_to_python_type = _fixed_json_schema_to_python_type
-# ──────────────────────────────  DISK CACHE  ──────────────────────────────
-CACHE_DIR = Path("ai_response_cache"); CACHE_DIR.mkdir(exist_ok=True)
-def _cache_path(prompt):  # deterministic path
-    return CACHE_DIR / f"{hashlib.md5(prompt.encode()).hexdigest()}.json"
 def get_cached_response(prompt):
-    try:
-        p = _cache_path(prompt)
-        if p.exists():
-            return json.loads(p.read_text(encoding="utf-8"))["response"]
-    except Exception as e:
-        _log(e, "reading cache")
     return None
 def cache_response(prompt, response):
     try:
-        _cache_path(prompt).write_text(
-            json.dumps({"prompt": prompt, "response": response}), encoding="utf-8"
-        )
     except Exception as e:
-        _log(e, "writing cache")
-# ──────────────────────────────  OPENAI  ──────────────────────────────
-async def _call_openai(client, prompt):
-    """Single protected OpenAI call."""
-    try:
-        rsp = await client.chat.completions.create(
-            model="gpt-4o-mini",
-            messages=[{"role": "user", "content": prompt}],
-            temperature=0,
-        )
-        return rsp.choices[0].message.content
-    except Exception as e:
-        _log(e, "OpenAI error")
-        return f"Error: {e}"
-async def process_text_batch_async(client, prompts):
-    """Return results in original order, resilient to any error."""
-    results, tasks = {}, []
-    for p in prompts:
-        cached = get_cached_response(p)
-        if cached is not None:
-            results[p] = cached
-        else:
-            tasks.append(asyncio.create_task(_call_openai(client, p)))
-    for prompt, task in zip([p for p in prompts if p not in results], tasks):
-        try:
-            res = await task
-        except Exception as e:
-            _log(e, "async OpenAI task")
-            res = f"Error: {e}"
-        cache_response(prompt, res)
-        results[prompt] = res
-    return [results[p] for p in prompts]
 async def process_text_with_ai_async(texts, instruction):
     if not texts:
         return []
     client = openai.AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))
-    batch_size, out = 500, []
     for i in range(0, len(texts), batch_size):
-        prompts = [f"{instruction}\n\nText: {t}" for t in texts[i : i + batch_size]]
-        out.extend(await process_text_batch_async(client, prompts))
-    return out
-# ──────────────────────────────  MAIN TRANSFORM  ──────────────────────────────
-def process_woocommerce_data_in_memory(upload):
-    """Convert NetCom → Woo CSV/XLSX; every stage guarded."""
-    try:
-        # brand → logo mapping
-        brand_logo = {
-            "Amazon Web Services": "/wp-content/uploads/2025/04/aws.png",
-            "Cisco": "/wp-content/uploads/2025/04/cisco-e1738593292198-1.webp",
-            "Microsoft": "/wp-content/uploads/2025/04/Microsoft-e1737494120985-1.png",
-            "Google Cloud": "/wp-content/uploads/2025/04/Google_Cloud.png",
-            "EC Council": "/wp-content/uploads/2025/04/Ec_Council.png",
-            "ITIL": "/wp-content/uploads/2025/04/ITIL.webp",
-            "PMI": "/wp-content/uploads/2025/04/PMI.png",
-            "Comptia": "/wp-content/uploads/2025/04/Comptia.png",
-            "Autodesk": "/wp-content/uploads/2025/04/autodesk.png",
-            "ISC2": "/wp-content/uploads/2025/04/ISC2.png",
-            "AICerts": "/wp-content/uploads/2025/04/aicerts-logo-1.png",
-        }
-        default_prereq = (
-            "No specific prerequisites are required for this course. "
-            "Basic computer literacy and familiarity with fundamental concepts in the "
-            "subject area are recommended for the best learning experience."
-        )
-        # ---------------- I/O ----------------
-        ext = Path(upload.name).suffix.lower()
-        try:
-            if ext in {".xlsx", ".xls"}:
-                try:
-                    df = pd.read_excel(upload.name, sheet_name="Active Schedules")
-                except Exception as e:
-                    _log(e, "Excel read failed (falling back to first sheet)")
-                    df = pd.read_excel(upload.name, sheet_name=0)
-            else:  # CSV
-                try:
-                    df = pd.read_csv(upload.name, encoding="latin1")
-                except Exception as e:
-                    _log(e, "CSV read failed (trying utf-8)")
-                    df = pd.read_csv(upload.name, encoding="utf-8", errors="ignore")
-        except Exception as e:
-            _log(e, "file read totally failed")
-            raise
-        df.columns = df.columns.str.strip()
-        # --------- column harmonisation (new vs old formats) ----------
-        rename_map = {
-            "Decription": "Description",
-            "description": "Description",
-            "Objectives": "Objectives",
-            "objectives": "Objectives",
-            "RequiredPrerequisite": "Required Prerequisite",
-            "Required Pre-requisite": "Required Prerequisite",
-            "RequiredPre-requisite": "Required Prerequisite",
-        }
-        df.rename(columns={k: v for k, v in rename_map.items() if k in df.columns}, inplace=True)
-        # duration if missing
-        if "Duration" not in df.columns:
-            try:
-                df["Duration"] = (
-                    pd.to_datetime(df["Course End Date"]) - pd.to_datetime(df["Course Start Date"])
-                ).dt.days.add(1)
-            except Exception as e:
-                _log(e, "duration calc failed")
-                df["Duration"] = ""
-        # ---------------- ASYNC AI ----------------
-        loop = asyncio.new_event_loop()
-        asyncio.set_event_loop(loop)
-        col_desc = "Description"
-        col_obj = "Objectives"
-        col_prereq = "Required Prerequisite"
-        try:
-            res = loop.run_until_complete(
-                asyncio.gather(
-                    process_text_with_ai_async(
-                        df[col_desc].fillna("").tolist(),
-                        "Create a concise 250-character summary of this course description:",
-                    ),
-                    process_text_with_ai_async(
-                        df[col_desc].fillna("").tolist(),
-                        "Condense this description to maximum 750 characters in paragraph format, with clean formatting:",
-                    ),
-                    process_text_with_ai_async(
-                        df[col_obj].fillna("").tolist(),
-                        "Format these objectives into a bullet list format with clean formatting. Start each bullet with '• ':",
-                    ),
-                    process_text_with_ai_async(
-                        df["Outline"].fillna("").tolist(),
-                        "Format this agenda into a bullet list format with clean formatting. Start each bullet with '• ':",
-                    ),
-                )
             )
-        except Exception as e:
-            _log(e, "async AI gather failed")
-            res = [[""] * len(df)] * 4
-        finally:
-            loop.close()
-        short_desc, long_desc, objectives, agendas = res
-        # prerequisites handled synchronously (tiny)
-        prereq_out = []
-        for p in df[col_prereq].fillna("").tolist():
-            if not p.strip():
-                prereq_out.append(default_prereq)
-            else:
-                try:
-                    prereq_out.append(
-                        asyncio.run(
-                            process_text_with_ai_async(
-                                [p],
-                                "Format these prerequisites into a bullet list format with clean formatting. Start each bullet with '• ':",
-                            )
-                        )[0]
-                    )
-                except Exception as e:
-                    _log(e, "prereq AI failed")
-                    prereq_out.append(default_prereq)
-        # ----------------  DATAFRAME BUILD  ----------------
-        try:
-            df["Short_Description"] = short_desc
-            df["Condensed_Description"] = long_desc
-            df["Formatted_Objectives"] = objectives
-            df["Formatted_Prerequisites"] = prereq_out
-            df["Formatted_Agenda"] = agendas
-        except Exception as e:
-            _log(e, "adding AI columns")
-        # 2. aggregate date/time
-        df = df.sort_values(["Course ID", "Course Start Date"])
-        date_agg = (
-            df.groupby("Course ID")["Course Start Date"]
-            .apply(lambda x: ",".join(x.astype(str).unique()))
-            .reset_index(name="Aggregated_Dates")
-        )
-        time_agg = (
-            df.groupby("Course ID")
-            .apply(
-                lambda d: ",".join(
-                    f"{s}-{e} {tz}"
-                    for s, e, tz in zip(
-                        d["Course Start Time"], d["Course End Time"], d["Time Zone"]
-                    )
-                )
-            )
-            .reset_index(name="Aggregated_Times")
-        )
-        parent = df.drop_duplicates(subset=["Course ID"]).merge(date_agg).merge(time_agg)
-        woo_parent_df = pd.DataFrame(
-            {
-                "Type": "variable",
-                "SKU": parent["Course ID"],
-                "Name": parent["Course Name"],
-                "Published": 1,
-                "Visibility in catalog": "visible",
-                "Short description": parent["Short_Description"],
-                "Description": parent["Condensed_Description"],
-                "Tax status": "taxable",
-                "In stock?": 1,
-                "Regular price": parent["SRP Pricing"].replace("[\\$,]", "", regex=True),
-                "Categories": "courses",
-                "Images": parent["Vendor"].map(brand_logo).fillna(""),
-                "Parent": "",
-                "Brands": parent["Vendor"],
-                "Attribute 1 name": "Date",
-                "Attribute 1 value(s)": parent["Aggregated_Dates"],
-                "Attribute 1 visible": "visible",
-                "Attribute 1 global": 1,
-                "Attribute 2 name": "Location",
-                "Attribute 2 value(s)": "Virtual",
-                "Attribute 2 visible": "visible",
-                "Attribute 2 global": 1,
-                "Attribute 3 name": "Time",
-                "Attribute 3 value(s)": parent["Aggregated_Times"],
-                "Attribute 3 visible": "visible",
-                "Attribute 3 global": 1,
-                "Meta: outline": parent["Formatted_Agenda"],
-                "Meta: days": parent["Duration"],
-                "Meta: location": "Virtual",
-                "Meta: overview": parent["Target Audience"],
-                "Meta: objectives": parent["Formatted_Objectives"],
-                "Meta: prerequisites": parent["Formatted_Prerequisites"],
-                "Meta: agenda": parent["Formatted_Agenda"],
-            }
-        )
-        woo_child_df = pd.DataFrame(
-            {
-                "Type": "variation, virtual",
-                "SKU": df["Course SID"],
-                "Name": df["Course Name"],
-                "Published": 1,
-                "Visibility in catalog": "visible",
-                "Short description": df["Short_Description"],
-                "Description": df["Condensed_Description"],
-                "Tax status": "taxable",
-                "In stock?": 1,
-                "Regular price": df["SRP Pricing"].replace("[\\$,]", "", regex=True),
-                "Categories": "courses",
-                "Images": df["Vendor"].map(brand_logo).fillna(""),
-                "Parent": df["Course ID"],
-                "Brands": df["Vendor"],
-                "Attribute 1 name": "Date",
-                "Attribute 1 value(s)": df["Course Start Date"],
-                "Attribute 1 visible": "visible",
-                "Attribute 1 global": 1,
-                "Attribute 2 name": "Location",
-                "Attribute 2 value(s)": "Virtual",
-                "Attribute 2 visible": "visible",
-                "Attribute 2 global": 1,
-                "Attribute 3 name": "Time",
-                "Attribute 3 value(s)": df.apply(
-                    lambda r: f"{r['Course Start Time']}-{r['Course End Time']} {r['Time Zone']}",
-                    axis=1,
-                ),
-                "Attribute 3 visible": "visible",
-                "Attribute 3 global": 1,
-                "Meta: outline": df["Formatted_Agenda"],
-                "Meta: days": df["Duration"],
-                "Meta: location": "Virtual",
-                "Meta: overview": df["Target Audience"],
-                "Meta: objectives": df["Formatted_Objectives"],
-                "Meta: prerequisites": df["Formatted_Prerequisites"],
-                "Meta: agenda": df["Formatted_Agenda"],
-            }
-        )
-        final_cols = [
-            "Type",
-            "SKU",
-            "Name",
-            "Published",
-            "Visibility in catalog",
-            "Short description",
-            "Description",
-            "Tax status",
-            "In stock?",
-            "Regular price",
-            "Categories",
-            "Images",
-            "Parent",
-            "Brands",
-            "Attribute 1 name",
-            "Attribute 1 value(s)",
-            "Attribute 1 visible",
-            "Attribute 1 global",
-            "Attribute 2 name",
-            "Attribute 2 value(s)",
-            "Attribute 2 visible",
-            "Attribute 2 global",
-            "Attribute 3 name",
-            "Attribute 3 value(s)",
-            "Attribute 3 visible",
-            "Attribute 3 global",
-            "Meta: outline",
-            "Meta: days",
-            "Meta: location",
-            "Meta: overview",
-            "Meta: objectives",
-            "Meta: prerequisites",
-            "Meta: agenda",
-        ]
-        woo_final_df = pd.concat([woo_parent_df, woo_child_df], ignore_index=True)[
-            final_cols
-        ]
-        buf = BytesIO()
-        woo_final_df.to_csv(buf, index=False, encoding="utf-8-sig")
-        buf.seek(0)
-        return buf
-    except Exception as e:
-        _log(e, "fatal transformation error")
-        err_buf = BytesIO()
-        pd.DataFrame({"error": [str(e)]}).to_csv(err_buf, index=False)
-        err_buf.seek(0)
-        return err_buf
-# ──────────────────────────────  GRADIO BINDINGS  ──────────────────────────────
-def process_file(file):
-    try:
-        out_io = process_woocommerce_data_in_memory(file)
-        with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp:
-            tmp.write(out_io.getvalue())
-            return tmp.name
-    except Exception as e:
-        _log(e, "top-level process_file")
-        with tempfile.NamedTemporaryFile(delete=False, suffix=".txt") as tmp:
-            tmp.write(f"Processing failed:\n{e}".encode())
-            return tmp.name
 interface = gr.Interface(
     fn=process_file,
-    inputs=gr.File(label="Upload NetCom Schedule", file_types=[".csv", ".xlsx", ".xls"]),
     outputs=gr.File(label="Download WooCommerce CSV"),
-    title="NetCom → WooCommerce CSV/Excel Processor",
-    description="Upload a NetCom Reseller Schedule CSV or XLSX to generate a WooCommerce-ready CSV.",
     analytics_enabled=False,
 )
-if __name__ == "__main__":  # run
-    if not os.getenv("OPENAI_API_KEY"):
-        print("[WARN] OPENAI_API_KEY not set; AI steps will error out.")
-    interface.launch()

 import gradio as gr
 import pandas as pd
 import tempfile
+import os
 from io import BytesIO
+import re
+import openai
+import hashlib
+import json
+import asyncio
+import aiohttp
 from pathlib import Path
+from concurrent.futures import ThreadPoolExecutor
 from functools import lru_cache
+import gradio_client.utils
 _original_json_schema_to_python_type = gradio_client.utils._json_schema_to_python_type
 def _fixed_json_schema_to_python_type(schema, defs=None):
+    # If the schema is a bool, return a fallback type (e.g. "any")
+    if isinstance(schema, bool):
         return "any"
+    return _original_json_schema_to_python_type(schema, defs)
 gradio_client.utils._json_schema_to_python_type = _fixed_json_schema_to_python_type
+# Create cache directory if it doesn't exist
+CACHE_DIR = Path("ai_response_cache")
+CACHE_DIR.mkdir(exist_ok=True)
+def get_cache_path(prompt):
+    """Generate a unique cache file path based on the prompt content"""
+    prompt_hash = hashlib.md5(prompt.encode('utf-8')).hexdigest()
+    return CACHE_DIR / f"{prompt_hash}.json"
 def get_cached_response(prompt):
+    """Try to get a cached response for the given prompt"""
+    cache_path = get_cache_path(prompt)
+    if cache_path.exists():
+        try:
+            with open(cache_path, 'r', encoding='utf-8') as f:
+                return json.load(f)['response']
+        except Exception as e:
+            print(f"Error reading cache: {e}")
     return None
 def cache_response(prompt, response):
+    """Cache the response for a given prompt"""
+    cache_path = get_cache_path(prompt)
     try:
+        with open(cache_path, 'w', encoding='utf-8') as f:
+            json.dump({'prompt': prompt, 'response': response}, f)
     except Exception as e:
+        print(f"Error writing to cache: {e}")
+async def process_text_batch_async(client, batch_prompts):
+    """Process a batch of prompts asynchronously"""
+    results = []
+    # First check cache for each prompt
+    for prompt in batch_prompts:
+        cached = get_cached_response(prompt)
+        if cached:
+            results.append((prompt, cached))
+    # Filter out prompts that were found in cache
+    uncached_prompts = [p for p in batch_prompts if not any(p == cached_prompt for cached_prompt, _ in results)]
+    if uncached_prompts:
+        # Process uncached prompts in parallel
+        async def process_single_prompt(prompt):
+            try:
+                response = await client.chat.completions.create(
+                    model="gpt-4o-mini",
+                    messages=[{"role": "user", "content": prompt}],
+                    temperature=0
+                )
+                result = response.choices[0].message.content
+                # Cache the result
+                cache_response(prompt, result)
+                return prompt, result
+            except Exception as e:
+                print(f"Error processing prompt: {e}")
+                return prompt, f"Error: {str(e)}"
+        # Create tasks for all uncached prompts
+        tasks = [process_single_prompt(prompt) for prompt in uncached_prompts]
+        # Run all tasks concurrently and wait for them to complete
+        uncached_results = await asyncio.gather(*tasks)
+        # Combine cached and newly processed results
+        results.extend(uncached_results)
+    # Sort results to match original order of batch_prompts
+    prompt_to_result = {prompt: result for prompt, result in results}
+    return [prompt_to_result[prompt] for prompt in batch_prompts]
 async def process_text_with_ai_async(texts, instruction):
+    """Process text with GPT-4o-mini asynchronously in batches"""
     if not texts:
         return []
+    results = []
+    batch_size = 500
+    # Create OpenAI async client
     client = openai.AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+    # Process in batches
     for i in range(0, len(texts), batch_size):
+        batch = texts[i:i+batch_size]
+        batch_prompts = [f"{instruction}\n\nText: {text}" for text in batch]
+        batch_results = await process_text_batch_async(client, batch_prompts)
+        results.extend(batch_results)
+    return results
+def process_woocommerce_data_in_memory(netcom_file):
+    """
+    Reads the uploaded NetCom CSV file in-memory, processes it to the WooCommerce format,
+    and returns the resulting CSV as bytes, suitable for download.
+    """
+    # Define the brand-to-logo mapping with updated URLs
+    brand_logo_map = {
+        "Amazon Web Services": "/wp-content/uploads/2025/04/aws.png",
+        "Cisco": "/wp-content/uploads/2025/04/cisco-e1738593292198-1.webp",
+        "Microsoft": "/wp-content/uploads/2025/04/Microsoft-e1737494120985-1.png",
+        "Google Cloud": "/wp-content/uploads/2025/04/Google_Cloud.png",
+        "EC Council": "/wp-content/uploads/2025/04/Ec_Council.png",
+        "ITIL": "/wp-content/uploads/2025/04/ITIL.webp",
+        "PMI": "/wp-content/uploads/2025/04/PMI.png",
+        "Comptia": "/wp-content/uploads/2025/04/Comptia.png",
+        "Autodesk": "/wp-content/uploads/2025/04/autodesk.png",
+        "ISC2": "/wp-content/uploads/2025/04/ISC2.png",
+        "AICerts": "/wp-content/uploads/2025/04/aicerts-logo-1.png"
+    }
+    # Default prerequisite text for courses without prerequisites
+    default_prerequisite = "No specific prerequisites are required for this course. Basic computer literacy and familiarity with fundamental concepts in the subject area are recommended for the best learning experience."
+    # 1. Read the uploaded CSV into a DataFrame
+    netcom_df = pd.read_csv(netcom_file.name, encoding='latin1')
+    netcom_df.columns = netcom_df.columns.str.strip()  # standardize column names
+    # Prepare descriptions for AI processing
+    descriptions = netcom_df['Decription'].fillna("").tolist()
+    objectives = netcom_df['Objectives'].fillna("").tolist()
+    prerequisites = netcom_df['RequiredPrerequisite'].fillna("").tolist()
+    agendas = netcom_df['Outline'].fillna("").tolist()
+    # Process with AI asynchronously
+    loop = asyncio.new_event_loop()
+    asyncio.set_event_loop(loop)
+    # Run all processing tasks concurrently
+    tasks = [
+        process_text_with_ai_async(
+            descriptions,
+            "Create a concise 250-character summary of this course description:"
+        ),
+        process_text_with_ai_async(
+            descriptions,
+            "Condense this description to maximum 750 characters in paragraph format, with clean formatting:"
+        ),
+        process_text_with_ai_async(
+            objectives,
+            "Format these objectives into a bullet list format with clean formatting. Start each bullet with '• ':"
+        ),
+        process_text_with_ai_async(
+            agendas,
+            "Format this agenda into a bullet list format with clean formatting. Start each bullet with '• ':"
+        )
+    ]
+    # Process prerequisites separately to handle default case
+    formatted_prerequisites_task = []
+    for prereq in prerequisites:
+        if not prereq or pd.isna(prereq) or prereq.strip() == "":
+            formatted_prerequisites_task.append(default_prerequisite)
+        else:
+            # For non-empty prerequisites, we'll process them with AI
+            prereq_result = loop.run_until_complete(process_text_with_ai_async(
+                [prereq],
+                "Format these prerequisites into a bullet list format with clean formatting. Start each bullet with '• ':"
+            ))
+            formatted_prerequisites_task.append(prereq_result[0])
+    # Run all tasks and get results
+    results = loop.run_until_complete(asyncio.gather(*tasks))
+    loop.close()
+    short_descriptions, condensed_descriptions, formatted_objectives, formatted_agendas = results
+    # Add processed text to dataframe
+    netcom_df['Short_Description'] = short_descriptions
+    netcom_df['Condensed_Description'] = condensed_descriptions
+    netcom_df['Formatted_Objectives'] = formatted_objectives
+    netcom_df['Formatted_Prerequisites'] = formatted_prerequisites_task
+    netcom_df['Formatted_Agenda'] = formatted_agendas
+    # 2. Create aggregated dates and times for each Course ID
+    # Sort by Course ID and date first
+    netcom_df = netcom_df.sort_values(['Course ID', 'Course Start Date'])
+    date_agg = (
+        netcom_df.groupby('Course ID')['Course Start Date']
+        .apply(lambda x: ','.join(x.astype(str).unique()))
+        .reset_index(name='Aggregated_Dates')
+    )
+    time_agg = (
+        netcom_df.groupby('Course ID')
+        .apply(
+            lambda df: ','.join(
+                f"{st}-{et} {tz}"
+                for st, et, tz in zip(df['Course Start Time'],
+                                      df['Course End Time'],
+                                      df['Time Zone'])
             )
+        )
+        .reset_index(name='Aggregated_Times')
+    )
+    # 3. Extract unique parent products
+    parent_products = netcom_df.drop_duplicates(subset=['Course ID'])
+    # 4. Merge aggregated dates and times
+    parent_products = parent_products.merge(date_agg, on='Course ID', how='left')
+    parent_products = parent_products.merge(time_agg, on='Course ID', how='left')
+    # 5. Create parent (variable) products
+    woo_parent_df = pd.DataFrame({
+        'Type': 'variable',
+        'SKU': parent_products['Course ID'],
+        'Name': parent_products['Course Name'],
+        'Published': 1,
+        'Visibility in catalog': 'visible',
+        'Short description': parent_products['Short_Description'],
+        'Description': parent_products['Condensed_Description'],
+        'Tax status': 'taxable',
+        'In stock?': 1,
+        'Regular price': parent_products['SRP Pricing'].replace('[\$,]', '', regex=True),
+        'Categories': 'courses',
+        'Images': parent_products['Vendor'].map(brand_logo_map).fillna(''),
+        'Parent': '',
+        'Brands': parent_products['Vendor'],
+        'Attribute 1 name': 'Date',
+        'Attribute 1 value(s)': parent_products['Aggregated_Dates'],
+        'Attribute 1 visible': 'visible',
+        'Attribute 1 global': 1,
+        'Attribute 2 name': 'Location',
+        'Attribute 2 value(s)': 'Virtual',
+        'Attribute 2 visible': 'visible',
+        'Attribute 2 global': 1,
+        'Attribute 3 name': 'Time',
+        'Attribute 3 value(s)': parent_products['Aggregated_Times'],
+        'Attribute 3 visible': 'visible',
+        'Attribute 3 global': 1,
+        'Meta: outline': parent_products['Formatted_Agenda'],
+        'Meta: days': parent_products['Duration'],
+        'Meta: location': 'Virtual',
+        'Meta: overview': parent_products['Target Audience'],
+        'Meta: objectives': parent_products['Formatted_Objectives'],
+        'Meta: prerequisites': parent_products['Formatted_Prerequisites'],
+        'Meta: agenda': parent_products['Formatted_Agenda']
+    })
+    # 6. Create child (variation) products
+    woo_child_df = pd.DataFrame({
+        'Type': 'variation, virtual',
+        'SKU': netcom_df['Course SID'],
+        'Name': netcom_df['Course Name'],
+        'Published': 1,
+        'Visibility in catalog': 'visible',
+        'Short description': netcom_df['Short_Description'],
+        'Description': netcom_df['Condensed_Description'],
+        'Tax status': 'taxable',
+        'In stock?': 1,
+        'Regular price': netcom_df['SRP Pricing'].replace('[\$,]', '', regex=True),
+        'Categories': 'courses',
+        'Images': netcom_df['Vendor'].map(brand_logo_map).fillna(''),
+        'Parent': netcom_df['Course ID'],
+        'Brands': netcom_df['Vendor'],
+        'Attribute 1 name': 'Date',
+        'Attribute 1 value(s)': netcom_df['Course Start Date'],
+        'Attribute 1 visible': 'visible',
+        'Attribute 1 global': 1,
+        'Attribute 2 name': 'Location',
+        'Attribute 2 value(s)': 'Virtual',
+        'Attribute 2 visible': 'visible',
+        'Attribute 2 global': 1,
+        'Attribute 3 name': 'Time',
+        'Attribute 3 value(s)': netcom_df.apply(
+            lambda row: f"{row['Course Start Time']}-{row['Course End Time']} {row['Time Zone']}", axis=1
+        ),
+        'Attribute 3 visible': 'visible',
+        'Attribute 3 global': 1,
+        'Meta: outline': netcom_df['Formatted_Agenda'],
+        'Meta: days': netcom_df['Duration'],
+        'Meta: location': 'Virtual',
+        'Meta: overview': netcom_df['Target Audience'],
+        'Meta: objectives': netcom_df['Formatted_Objectives'],
+        'Meta: prerequisites': netcom_df['Formatted_Prerequisites'],
+        'Meta: agenda': netcom_df['Formatted_Agenda']
+    })
+    # 7. Combine parent + child
+    woo_final_df = pd.concat([woo_parent_df, woo_child_df], ignore_index=True)
+    # 8. Desired column order (removed Stock and Sold individually?)
+    column_order = [
+        'Type', 'SKU', 'Name', 'Published', 'Visibility in catalog',
+        'Short description', 'Description', 'Tax status', 'In stock?',
+        'Regular price', 'Categories', 'Images',
+        'Parent', 'Brands', 'Attribute 1 name', 'Attribute 1 value(s)', 'Attribute 1 visible',
+        'Attribute 1 global', 'Attribute 2 name', 'Attribute 2 value(s)', 'Attribute 2 visible',
+        'Attribute 2 global', 'Attribute 3 name', 'Attribute 3 value(s)', 'Attribute 3 visible',
+        'Attribute 3 global', 'Meta: outline', 'Meta: days', 'Meta: location', 'Meta: overview',
+        'Meta: objectives', 'Meta: prerequisites', 'Meta: agenda'
+    ]
+    woo_final_df = woo_final_df[column_order]
+    # 9. Convert to CSV (in memory)
+    output_buffer = BytesIO()
+    woo_final_df.to_csv(output_buffer, index=False, encoding='utf-8-sig')
+    output_buffer.seek(0)
+    return output_buffer
+def process_file(uploaded_file):
+    """
+    Takes the uploaded file, processes it, and returns the CSV as a file-like object
+    """
+    processed_csv_io = process_woocommerce_data_in_memory(uploaded_file)
+    # Create a temporary file to save the CSV data
+    with tempfile.NamedTemporaryFile(delete=False, suffix='.csv') as temp_file:
+        temp_file.write(processed_csv_io.getvalue())
+        temp_path = temp_file.name
+    return temp_path
 interface = gr.Interface(
     fn=process_file,
+    inputs=gr.File(label="Upload NetCom CSV", file_types=[".csv"]),
     outputs=gr.File(label="Download WooCommerce CSV"),
+    title="NetCom to WooCommerce CSV Processor",
+    description="Upload your NetCom Reseller Schedule CSV to generate the WooCommerce import-ready CSV.",
     analytics_enabled=False,
 )
+if __name__ == "__main__":
+    openai_api_key = os.getenv("OPENAI_API_KEY")
+    if not openai_api_key:
+        print("Warning: OPENAI_API_KEY environment variable not set")
+    interface.launch()