codys12 commited on
Commit
b0ead86
·
verified ·
1 Parent(s): f86c87e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +181 -310
app.py CHANGED
@@ -2,132 +2,105 @@ import gradio as gr
2
  import pandas as pd
3
  import tempfile
4
  import os
5
- from io import BytesIO
6
- import re
7
- import openai
8
- import hashlib
9
  import json
 
10
  import asyncio
11
- import aiohttp
12
  from pathlib import Path
13
- from concurrent.futures import ThreadPoolExecutor
14
- from functools import lru_cache
15
-
16
  import gradio_client.utils
17
 
18
- _original_json_schema_to_python_type = gradio_client.utils._json_schema_to_python_type
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
  def _fixed_json_schema_to_python_type(schema, defs=None):
21
- # If the schema is a bool, return a fallback type (e.g. "any")
22
- if isinstance(schema, bool):
23
  return "any"
24
- return _original_json_schema_to_python_type(schema, defs)
25
 
26
- gradio_client.utils._json_schema_to_python_type = _fixed_json_schema_to_python_type
27
 
 
 
 
 
28
 
29
- # Create cache directory if it doesn't exist
30
- CACHE_DIR = Path("ai_response_cache")
31
- CACHE_DIR.mkdir(exist_ok=True)
32
 
33
- def get_cache_path(prompt):
34
- """Generate a unique cache file path based on the prompt content"""
35
- prompt_hash = hashlib.md5(prompt.encode('utf-8')).hexdigest()
36
- return CACHE_DIR / f"{prompt_hash}.json"
37
 
38
- def get_cached_response(prompt):
39
- """Try to get a cached response for the given prompt"""
40
- cache_path = get_cache_path(prompt)
41
- if cache_path.exists():
42
- try:
43
- with open(cache_path, 'r', encoding='utf-8') as f:
44
- return json.load(f)['response']
45
- except Exception as e:
46
- print(f"Error reading cache: {e}")
47
- return None
48
 
49
- def cache_response(prompt, response):
50
- """Cache the response for a given prompt"""
51
- cache_path = get_cache_path(prompt)
52
  try:
53
- with open(cache_path, 'w', encoding='utf-8') as f:
54
- json.dump({'prompt': prompt, 'response': response}, f)
55
- except Exception as e:
56
- print(f"Error writing to cache: {e}")
57
-
58
-
59
- async def process_text_batch_async(client, batch_prompts):
60
- """Process a batch of prompts asynchronously"""
61
- results = []
62
-
63
- # First check cache for each prompt
64
- for prompt in batch_prompts:
65
- cached = get_cached_response(prompt)
66
- if cached:
67
- results.append((prompt, cached))
68
-
69
- # Filter out prompts that were found in cache
70
- uncached_prompts = [p for p in batch_prompts if not any(p == cached_prompt for cached_prompt, _ in results)]
71
-
72
- if uncached_prompts:
73
- # Process uncached prompts in parallel
74
- async def process_single_prompt(prompt):
75
- try:
76
- response = await client.chat.completions.create(
77
- model="gpt-4o-mini",
78
- messages=[{"role": "user", "content": prompt}],
79
- temperature=0
80
- )
81
- result = response.choices[0].message.content
82
- # Cache the result
83
- cache_response(prompt, result)
84
- return prompt, result
85
- except Exception as e:
86
- print(f"Error processing prompt: {e}")
87
- return prompt, f"Error: {str(e)}"
88
-
89
- # Create tasks for all uncached prompts
90
- tasks = [process_single_prompt(prompt) for prompt in uncached_prompts]
91
-
92
- # Run all tasks concurrently and wait for them to complete
93
- uncached_results = await asyncio.gather(*tasks)
94
-
95
- # Combine cached and newly processed results
96
- results.extend(uncached_results)
97
-
98
- # Sort results to match original order of batch_prompts
99
- prompt_to_result = {prompt: result for prompt, result in results}
100
- return [prompt_to_result[prompt] for prompt in batch_prompts]
101
 
102
 
103
- async def process_text_with_ai_async(texts, instruction):
104
- """Process text with GPT-4o-mini asynchronously in batches"""
105
- if not texts:
106
- return []
107
-
108
- results = []
109
- batch_size = 500
110
-
111
- # Create OpenAI async client
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  client = openai.AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))
113
-
114
- # Process in batches
115
- for i in range(0, len(texts), batch_size):
116
- batch = texts[i:i+batch_size]
117
- batch_prompts = [f"{instruction}\n\nText: {text}" for text in batch]
118
-
119
- batch_results = await process_text_batch_async(client, batch_prompts)
120
- results.extend(batch_results)
121
-
122
- return results
123
 
 
 
 
124
 
125
  def process_woocommerce_data_in_memory(netcom_file):
126
- """
127
- Reads the uploaded NetCom CSV file in-memory, processes it to the WooCommerce format,
128
- and returns the resulting CSV as bytes, suitable for download.
129
- """
130
- # Define the brand-to-logo mapping with updated URLs
131
  brand_logo_map = {
132
  "Amazon Web Services": "/wp-content/uploads/2025/04/aws.png",
133
  "Cisco": "/wp-content/uploads/2025/04/cisco-e1738593292198-1.webp",
@@ -139,225 +112,123 @@ def process_woocommerce_data_in_memory(netcom_file):
139
  "Comptia": "/wp-content/uploads/2025/04/Comptia.png",
140
  "Autodesk": "/wp-content/uploads/2025/04/autodesk.png",
141
  "ISC2": "/wp-content/uploads/2025/04/ISC2.png",
142
- "AICerts": "/wp-content/uploads/2025/04/aicerts-logo-1.png"
143
  }
144
-
145
- # Default prerequisite text for courses without prerequisites
146
- default_prerequisite = "No specific prerequisites are required for this course. Basic computer literacy and familiarity with fundamental concepts in the subject area are recommended for the best learning experience."
147
-
148
- # 1. Read the uploaded CSV into a DataFrame
149
- netcom_df = pd.read_csv(netcom_file.name, encoding='latin1')
150
- netcom_df.columns = netcom_df.columns.str.strip() # standardize column names
151
-
152
- # Prepare descriptions for AI processing
153
- descriptions = netcom_df['Decription'].fillna("").tolist()
154
- objectives = netcom_df['Objectives'].fillna("").tolist()
155
- prerequisites = netcom_df['RequiredPrerequisite'].fillna("").tolist()
156
- agendas = netcom_df['Outline'].fillna("").tolist()
157
-
158
- # Process with AI asynchronously
159
- loop = asyncio.new_event_loop()
160
- asyncio.set_event_loop(loop)
161
-
162
- # Run all processing tasks concurrently
163
- tasks = [
164
- process_text_with_ai_async(
165
- descriptions,
166
- "Create a concise 250-character summary of this course description:"
167
- ),
168
- process_text_with_ai_async(
169
- descriptions,
170
- "Condense this description to maximum 750 characters in paragraph format, with clean formatting:"
171
- ),
172
- process_text_with_ai_async(
173
- objectives,
174
- "Format these objectives into a bullet list format with clean formatting. Start each bullet with '• ':"
175
- ),
176
- process_text_with_ai_async(
177
- agendas,
178
- "Format this agenda into a bullet list format with clean formatting. Start each bullet with '• ':"
179
- )
180
- ]
181
-
182
- # Process prerequisites separately to handle default case
183
- formatted_prerequisites_task = []
184
- for prereq in prerequisites:
185
- if not prereq or pd.isna(prereq) or prereq.strip() == "":
186
- formatted_prerequisites_task.append(default_prerequisite)
187
- else:
188
- # For non-empty prerequisites, we'll process them with AI
189
- prereq_result = loop.run_until_complete(process_text_with_ai_async(
190
- [prereq],
191
- "Format these prerequisites into a bullet list format with clean formatting. Start each bullet with '• ':"
192
- ))
193
- formatted_prerequisites_task.append(prereq_result[0])
194
-
195
- # Run all tasks and get results
196
- results = loop.run_until_complete(asyncio.gather(*tasks))
197
- loop.close()
198
-
199
- short_descriptions, condensed_descriptions, formatted_objectives, formatted_agendas = results
200
-
201
- # Add processed text to dataframe
202
- netcom_df['Short_Description'] = short_descriptions
203
- netcom_df['Condensed_Description'] = condensed_descriptions
204
- netcom_df['Formatted_Objectives'] = formatted_objectives
205
- netcom_df['Formatted_Prerequisites'] = formatted_prerequisites_task
206
- netcom_df['Formatted_Agenda'] = formatted_agendas
207
-
208
- # 2. Create aggregated dates and times for each Course ID
209
- # Sort by Course ID and date first
210
- netcom_df = netcom_df.sort_values(['Course ID', 'Course Start Date'])
211
-
212
- date_agg = (
213
- netcom_df.groupby('Course ID')['Course Start Date']
214
- .apply(lambda x: ','.join(x.astype(str).unique()))
215
- .reset_index(name='Aggregated_Dates')
216
- )
217
-
218
- time_agg = (
219
- netcom_df.groupby('Course ID')
220
- .apply(
221
- lambda df: ','.join(
222
- f"{st}-{et} {tz}"
223
- for st, et, tz in zip(df['Course Start Time'],
224
- df['Course End Time'],
225
- df['Time Zone'])
226
- )
227
- )
228
- .reset_index(name='Aggregated_Times')
229
  )
230
-
231
- # 3. Extract unique parent products
232
- parent_products = netcom_df.drop_duplicates(subset=['Course ID'])
233
-
234
- # 4. Merge aggregated dates and times
235
- parent_products = parent_products.merge(date_agg, on='Course ID', how='left')
236
- parent_products = parent_products.merge(time_agg, on='Course ID', how='left')
237
-
238
- # 5. Create parent (variable) products
239
- woo_parent_df = pd.DataFrame({
240
- 'Type': 'variable',
241
- 'SKU': parent_products['Course ID'],
242
- 'Name': parent_products['Course Name'],
243
- 'Published': 1,
244
- 'Visibility in catalog': 'visible',
245
- 'Short description': parent_products['Short_Description'],
246
- 'Description': parent_products['Condensed_Description'],
247
- 'Tax status': 'taxable',
248
- 'In stock?': 1,
249
- 'Regular price': parent_products['SRP Pricing'].replace('[\$,]', '', regex=True),
250
- 'Categories': 'courses',
251
- 'Images': parent_products['Vendor'].map(brand_logo_map).fillna(''),
252
- 'Parent': '',
253
- 'Brands': parent_products['Vendor'],
254
- 'Attribute 1 name': 'Date',
255
- 'Attribute 1 value(s)': parent_products['Aggregated_Dates'],
256
- 'Attribute 1 visible': 'visible',
257
- 'Attribute 1 global': 1,
258
- 'Attribute 2 name': 'Location',
259
- 'Attribute 2 value(s)': 'Virtual',
260
- 'Attribute 2 visible': 'visible',
261
- 'Attribute 2 global': 1,
262
- 'Attribute 3 name': 'Time',
263
- 'Attribute 3 value(s)': parent_products['Aggregated_Times'],
264
- 'Attribute 3 visible': 'visible',
265
- 'Attribute 3 global': 1,
266
- 'Meta: outline': parent_products['Formatted_Agenda'],
267
- 'Meta: days': parent_products['Duration'],
268
- 'Meta: location': 'Virtual',
269
- 'Meta: overview': parent_products['Target Audience'],
270
- 'Meta: objectives': parent_products['Formatted_Objectives'],
271
- 'Meta: prerequisites': parent_products['Formatted_Prerequisites'],
272
- 'Meta: agenda': parent_products['Formatted_Agenda']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
273
  })
274
-
275
- # 6. Create child (variation) products
276
- woo_child_df = pd.DataFrame({
277
- 'Type': 'variation, virtual',
278
- 'SKU': netcom_df['Course SID'],
279
- 'Name': netcom_df['Course Name'],
280
- 'Published': 1,
281
- 'Visibility in catalog': 'visible',
282
- 'Short description': netcom_df['Short_Description'],
283
- 'Description': netcom_df['Condensed_Description'],
284
- 'Tax status': 'taxable',
285
- 'In stock?': 1,
286
- 'Regular price': netcom_df['SRP Pricing'].replace('[\$,]', '', regex=True),
287
- 'Categories': 'courses',
288
- 'Images': netcom_df['Vendor'].map(brand_logo_map).fillna(''),
289
- 'Parent': netcom_df['Course ID'],
290
- 'Brands': netcom_df['Vendor'],
291
- 'Attribute 1 name': 'Date',
292
- 'Attribute 1 value(s)': netcom_df['Course Start Date'],
293
- 'Attribute 1 visible': 'visible',
294
- 'Attribute 1 global': 1,
295
- 'Attribute 2 name': 'Location',
296
- 'Attribute 2 value(s)': 'Virtual',
297
- 'Attribute 2 visible': 'visible',
298
- 'Attribute 2 global': 1,
299
- 'Attribute 3 name': 'Time',
300
- 'Attribute 3 value(s)': netcom_df.apply(
301
- lambda row: f"{row['Course Start Time']}-{row['Course End Time']} {row['Time Zone']}", axis=1
302
- ),
303
- 'Attribute 3 visible': 'visible',
304
- 'Attribute 3 global': 1,
305
- 'Meta: outline': netcom_df['Formatted_Agenda'],
306
- 'Meta: days': netcom_df['Duration'],
307
- 'Meta: location': 'Virtual',
308
- 'Meta: overview': netcom_df['Target Audience'],
309
- 'Meta: objectives': netcom_df['Formatted_Objectives'],
310
- 'Meta: prerequisites': netcom_df['Formatted_Prerequisites'],
311
- 'Meta: agenda': netcom_df['Formatted_Agenda']
312
  })
313
-
314
- # 7. Combine parent + child
315
- woo_final_df = pd.concat([woo_parent_df, woo_child_df], ignore_index=True)
316
-
317
- # 8. Desired column order (removed Stock and Sold individually?)
318
  column_order = [
319
- 'Type', 'SKU', 'Name', 'Published', 'Visibility in catalog',
320
- 'Short description', 'Description', 'Tax status', 'In stock?',
321
- 'Regular price', 'Categories', 'Images',
322
- 'Parent', 'Brands', 'Attribute 1 name', 'Attribute 1 value(s)', 'Attribute 1 visible',
323
- 'Attribute 1 global', 'Attribute 2 name', 'Attribute 2 value(s)', 'Attribute 2 visible',
324
- 'Attribute 2 global', 'Attribute 3 name', 'Attribute 3 value(s)', 'Attribute 3 visible',
325
- 'Attribute 3 global', 'Meta: outline', 'Meta: days', 'Meta: location', 'Meta: overview',
326
- 'Meta: objectives', 'Meta: prerequisites', 'Meta: agenda'
327
  ]
328
- woo_final_df = woo_final_df[column_order]
 
 
 
 
 
329
 
330
- # 9. Convert to CSV (in memory)
331
- output_buffer = BytesIO()
332
- woo_final_df.to_csv(output_buffer, index=False, encoding='utf-8-sig')
333
- output_buffer.seek(0)
334
-
335
- return output_buffer
336
 
337
- def process_file(uploaded_file):
338
- """
339
- Takes the uploaded file, processes it, and returns the CSV as a file-like object
340
- """
341
- processed_csv_io = process_woocommerce_data_in_memory(uploaded_file)
342
-
343
- # Create a temporary file to save the CSV data
344
- with tempfile.NamedTemporaryFile(delete=False, suffix='.csv') as temp_file:
345
- temp_file.write(processed_csv_io.getvalue())
346
- temp_path = temp_file.name
347
-
348
- return temp_path
349
 
350
  interface = gr.Interface(
351
  fn=process_file,
352
  inputs=gr.File(label="Upload NetCom CSV", file_types=[".csv"]),
353
  outputs=gr.File(label="Download WooCommerce CSV"),
354
- title="NetCom to WooCommerce CSV Processor",
355
- description="Upload your NetCom Reseller Schedule CSV to generate the WooCommerce import-ready CSV.",
356
  analytics_enabled=False,
357
  )
358
 
359
  if __name__ == "__main__":
360
- openai_api_key = os.getenv("OPENAI_API_KEY")
361
- if not openai_api_key:
362
- print("Warning: OPENAI_API_KEY environment variable not set")
363
- interface.launch()
 
2
  import pandas as pd
3
  import tempfile
4
  import os
 
 
 
 
5
  import json
6
+ import hashlib
7
  import asyncio
8
+ from io import BytesIO
9
  from pathlib import Path
10
+ import openai
 
 
11
  import gradio_client.utils
12
 
13
+ """NetCom WooCommerce transformer (Try 1 schema)
14
+ =================================================
15
+ Drop a *Reseller Schedule* CSV and get back a WooCommerce‑ready CSV that matches
16
+ `Try 1 - WooCommerce_Mapped_Data__Fixed_Attributes_and_Agenda_.csv` exactly –
17
+ including `Stock` and `Sold individually?` columns that NetCom doesn’t supply.
18
+
19
+ Highlights
20
+ ----------
21
+ * Empty cells are skipped – no wasted GPT calls.
22
+ * GPT‑4o mini used with a tiny disk cache (`ai_response_cache/`).
23
+ * Brand → logo URLs hard‑coded below (update when media library changes).
24
+ """
25
+
26
+ # ---------------------------------------------------------------------------
27
+ # Gradio JSON‑schema helper hot‑patch (bool schema bug)
28
+ # ---------------------------------------------------------------------------
29
+ _original = gradio_client.utils._json_schema_to_python_type
30
 
31
  def _fixed_json_schema_to_python_type(schema, defs=None):
32
+ if isinstance(schema, bool): # gradio 4.29 bug
 
33
  return "any"
34
+ return _original(schema, defs)
35
 
36
+ gradio_client.utils._json_schema_to_python_type = _fixed_json_schema_to_python_type # type: ignore
37
 
38
+ # ---------------------------------------------------------------------------
39
+ # Tiny disk cache for OpenAI responses
40
+ # ---------------------------------------------------------------------------
41
+ CACHE_DIR = Path("ai_response_cache"); CACHE_DIR.mkdir(exist_ok=True)
42
 
 
 
 
43
 
44
+ def _cache_path(prompt: str) -> Path:
45
+ return CACHE_DIR / f"{hashlib.md5(prompt.encode()).hexdigest()}.json"
 
 
46
 
 
 
 
 
 
 
 
 
 
 
47
 
48
+ def _get_cached(prompt: str):
 
 
49
  try:
50
+ return json.loads(_cache_path(prompt).read_text("utf-8"))["response"]
51
+ except Exception:
52
+ return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
 
55
+ def _set_cache(prompt: str, rsp: str):
56
+ try:
57
+ _cache_path(prompt).write_text(json.dumps({"prompt": prompt, "response": rsp}), "utf-8")
58
+ except Exception:
59
+ pass
60
+
61
+ # ---------------------------------------------------------------------------
62
+ # Async GPT helpers
63
+ # ---------------------------------------------------------------------------
64
+ async def _gpt(client: openai.AsyncOpenAI, prompt: str) -> str:
65
+ cached = _get_cached(prompt)
66
+ if cached is not None:
67
+ return cached
68
+ try:
69
+ cmp = await client.chat.completions.create(
70
+ model="gpt-4o-mini",
71
+ messages=[{"role": "user", "content": prompt}],
72
+ temperature=0,
73
+ )
74
+ txt = cmp.choices[0].message.content
75
+ except Exception as e:
76
+ txt = f"Error: {e}"
77
+ _set_cache(prompt, txt)
78
+ return txt
79
+
80
+
81
+ async def _batch(texts: list[str], instruction: str) -> list[str]:
82
+ """Return len(texts) list. Blank inputs remain blank."""
83
+ res = ["" for _ in texts]
84
+ idx, prompts = [], []
85
+ for i, t in enumerate(texts):
86
+ if isinstance(t, str) and t.strip():
87
+ idx.append(i); prompts.append(f"{instruction}\n\nText: {t}")
88
+ if not prompts:
89
+ return res
90
  client = openai.AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))
91
+ tasks = [_gpt(client, p) for p in prompts]
92
+ outs = await asyncio.gather(*tasks)
93
+ for k, v in enumerate(outs):
94
+ res[idx[k]] = v
95
+ return res
 
 
 
 
 
96
 
97
+ # ---------------------------------------------------------------------------
98
+ # Main converter
99
+ # ---------------------------------------------------------------------------
100
 
101
  def process_woocommerce_data_in_memory(netcom_file):
102
+ """Return BytesIO of Woo CSV."""
103
+ # Brand logos
 
 
 
104
  brand_logo_map = {
105
  "Amazon Web Services": "/wp-content/uploads/2025/04/aws.png",
106
  "Cisco": "/wp-content/uploads/2025/04/cisco-e1738593292198-1.webp",
 
112
  "Comptia": "/wp-content/uploads/2025/04/Comptia.png",
113
  "Autodesk": "/wp-content/uploads/2025/04/autodesk.png",
114
  "ISC2": "/wp-content/uploads/2025/04/ISC2.png",
115
+ "AICerts": "/wp-content/uploads/2025/04/aicerts-logo-1.png",
116
  }
117
+ default_prereq = (
118
+ "No specific prerequisites are required for this course. "
119
+ "Basic computer literacy and familiarity with fundamental concepts in the subject area are recommended for the best learning experience."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  )
121
+ # Load NetCom CSV
122
+ df = pd.read_csv(netcom_file.name, encoding="latin1"); df.columns = df.columns.str.strip()
123
+ def _col(opts):
124
+ return next((c for c in opts if c in df.columns), None)
125
+ # Column aliases
126
+ col_desc = _col(["Description", "Decription"])
127
+ col_obj = _col(["Objectives", "objectives"])
128
+ col_pre = _col(["RequiredPrerequisite", "Required Pre-requisite"])
129
+ col_out = _col(["Outline"])
130
+ col_dur = _col(["Duration"])
131
+ col_sid = _col(["Course SID", "Course SID"])
132
+ if col_dur is None:
133
+ df["Duration"] = ""; col_dur = "Duration"
134
+ # AI prep lists
135
+ descs, objs, pres, outs = (df.get(c, pd.Series([""]*len(df))).fillna("").tolist() for c in (col_desc, col_obj, col_pre, col_out))
136
+ loop = asyncio.new_event_loop(); asyncio.set_event_loop(loop)
137
+ short_d, long_d, fmt_obj, fmt_out = loop.run_until_complete(asyncio.gather(
138
+ _batch(descs, "Create a concise 250-character summary of this course description:"),
139
+ _batch(descs, "Condense this description to a maximum of 750 characters in paragraph format, with clean formatting:"),
140
+ _batch(objs, "Format these objectives into a bullet list with clean formatting. Start each bullet with ' ':"),
141
+ _batch(outs, "Format this agenda into a bullet list with clean formatting. Start each bullet with '':"),
142
+ )); loop.close()
143
+ fmt_pre = [default_prereq if not str(p).strip() else asyncio.run(_batch([p], "Format these prerequisites into a bullet list with clean formatting. Start each bullet with '':"))[0] for p in pres]
144
+ # Attach processed cols
145
+ df["Short_Description"] = short_d; df["Condensed_Description"] = long_d
146
+ df["Formatted_Objectives"] = fmt_obj; df["Formatted_Agenda"] = fmt_out; df["Formatted_Prerequisites"] = fmt_pre
147
+ # Dates
148
+ df["Course Start Date"] = pd.to_datetime(df["Course Start Date"], errors="coerce")
149
+ df["Date_fmt"] = df["Course Start Date"].dt.strftime("%-m/%-d/%Y")
150
+ df_sorted = df.sort_values(["Course ID", "Course Start Date"])
151
+ date_agg = df_sorted.groupby("Course ID")["Date_fmt"].apply(lambda s: ",".join(s.dropna().unique())).reset_index(name="Aggregated_Dates")
152
+ time_agg = df_sorted.groupby("Course ID").apply(lambda g: ",".join(f"{st}-{et} {tz}" for st, et, tz in zip(g["Course Start Time"], g["Course End Time"], g["Time Zone"]))).reset_index(name="Aggregated_Times")
153
+ parents = df_sorted.drop_duplicates("Course ID").merge(date_agg).merge(time_agg)
154
+ # Parent rows
155
+ woo_parent = pd.DataFrame({
156
+ "Type": "variable",
157
+ "SKU": parents["Course ID"],
158
+ "Name": parents["Course Name"],
159
+ "Published": 1,
160
+ "Visibility in catalog": "visible",
161
+ "Short description": parents["Short_Description"],
162
+ "Description": parents["Condensed_Description"],
163
+ "Tax status": "taxable",
164
+ "In stock?": 1,
165
+ "Stock": 1,
166
+ "Sold individually?": 1,
167
+ "Regular price": parents["SRP Pricing"].replace("[\\$,]", "", regex=True),
168
+ "Categories": "courses",
169
+ "Images": parents["Vendor"].map(brand_logo_map).fillna(""),
170
+ "Parent": "",
171
+ "Brands": parents["Vendor"],
172
+ # Attributes
173
+ "Attribute 1 name": "Date", "Attribute 1 value(s)": parents["Aggregated_Dates"], "Attribute 1 visible": "visible", "Attribute 1 global": 1,
174
+ "Attribute 2 name": "Location", "Attribute 2 value(s)": "Virtual", "Attribute 2 visible": "visible", "Attribute 2 global": 1,
175
+ "Attribute 3 name": "Time", "Attribute 3 value(s)": parents["Aggregated_Times"], "Attribute 3 visible": "visible", "Attribute 3 global": 1,
176
+ # Meta
177
+ "Meta: outline": parents["Formatted_Agenda"], "Meta: days": parents[col_dur], "Meta: location": "Virtual",
178
+ "Meta: overview": parents["Target Audience"], "Meta: objectives": parents["Formatted_Objectives"],
179
+ "Meta: prerequisites": parents["Formatted_Prerequisites"], "Meta: agenda": parents["Formatted_Agenda"],
180
  })
181
+ # Child rows
182
+ woo_child = pd.DataFrame({
183
+ "Type": "variation, virtual",
184
+ "SKU": df_sorted[col_sid].astype(str).str.strip(),
185
+ "Name": df_sorted["Course Name"],
186
+ "Published": 1,
187
+ "Visibility in catalog": "visible",
188
+ "Short description": df_sorted["Short_Description"],
189
+ "Description": df_sorted["Condensed_Description"],
190
+ "Tax status": "taxable",
191
+ "In stock?": 1,
192
+ "Stock": 1,
193
+ "Sold individually?": 1,
194
+ "Regular price": df_sorted["SRP Pricing"].replace("[\\$,]", "", regex=True),
195
+ "Categories": "courses",
196
+ "Images": df_sorted["Vendor"].map(brand_logo_map).fillna(""),
197
+ "Parent": df_sorted["Course ID"],
198
+ "Brands": df_sorted["Vendor"],
199
+ "Attribute 1 name": "Date", "Attribute 1 value(s)": df_sorted["Date_fmt"], "Attribute 1 visible": "visible", "Attribute 1 global": 1,
200
+ "Attribute 2 name": "Location", "Attribute 2 value(s)": "Virtual", "Attribute 2 visible": "visible", "Attribute 2 global": 1,
201
+ "Attribute 3 name": "Time", "Attribute 3 value(s)": df_sorted.apply(lambda r: f"{r['Course Start Time']}-{r['Course End Time']} {r['Time Zone']}", axis=1), "Attribute 3 visible": "visible", "Attribute 3 global": 1,
202
+ "Meta: outline": df_sorted["Formatted_Agenda"], "Meta: days": df_sorted[col_dur], "Meta: location": "Virtual",
203
+ "Meta: overview": df_sorted["Target Audience"], "Meta: objectives": df_sorted["Formatted_Objectives"],
204
+ "Meta: prerequisites": df_sorted["Formatted_Prerequisites"], "Meta: agenda": df_sorted["Formatted_Agenda"],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
205
  })
206
+ # Combine & order
207
+ combined = pd.concat([woo_parent, woo_child], ignore_index=True)
 
 
 
208
  column_order = [
209
+ "Type","SKU","Name","Published","Visibility in catalog","Short description","Description","Tax status","In stock?","Stock","Sold individually?","Regular price","Categories","Images","Parent","Brands", "Attribute 1 name","Attribute 1 value(s)","Attribute 1 visible","Attribute 1 global","Attribute 2 name","Attribute 2 value(s)","Attribute 2 visible","Attribute 2 global","Attribute 3 name","Attribute 3 value(s)","Attribute 3 visible","Attribute 3 global","Meta: outline","Meta: days","Meta: location","Meta: overview","Meta: objectives","Meta: prerequisites","Meta: agenda"
 
 
 
 
 
 
 
210
  ]
211
+ combined = combined[column_order]
212
+ buf = BytesIO(); combined.to_csv(buf, index=False, encoding="utf-8-sig"); buf.seek(0); return buf
213
+
214
+ # ---------------------------------------------------------------------------
215
+ # Gradio wrapper
216
+ # ---------------------------------------------------------------------------
217
 
218
+ def process_file(upload):
219
+ return process_woocommerce_data_in_memory(upload)
 
 
 
 
220
 
 
 
 
 
 
 
 
 
 
 
 
 
221
 
222
  interface = gr.Interface(
223
  fn=process_file,
224
  inputs=gr.File(label="Upload NetCom CSV", file_types=[".csv"]),
225
  outputs=gr.File(label="Download WooCommerce CSV"),
226
+ title="NetCom WooCommerce CSV Processor",
227
+ description="Upload a NetCom Reseller Schedule CSV to generate a WooCommerceimport CSV (Try 1 schema).",
228
  analytics_enabled=False,
229
  )
230
 
231
  if __name__ == "__main__":
232
+ if not os.getenv("OPENAI_API_KEY"):
233
+ print("⚠️ OPENAI_API_KEY not set – AI paraphrasing will error out")
234
+ interface.launch()