codys12 commited on
Commit
f86c87e
Β·
verified Β·
1 Parent(s): 8b2b5a0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +312 -362
app.py CHANGED
@@ -1,413 +1,363 @@
1
- #!/usr/bin/env python
2
- # -*- coding: utf-8 -*-
3
-
4
- """
5
- *NetCom β†’ WooCommerce CSV/Excel Processor*
6
- Robust edition – catches and logs every recoverable error so one failure never
7
- brings the whole pipeline down. Only small, surgical changes were made.
8
- """
9
-
10
  import gradio as gr
11
  import pandas as pd
12
  import tempfile
13
- import os, sys, json, re, hashlib, asyncio, aiohttp, traceback
14
  from io import BytesIO
 
 
 
 
 
 
15
  from pathlib import Path
 
16
  from functools import lru_cache
17
- import openai
18
- import gradio_client.utils
19
 
20
- # ────────────────────────────── HELPERS ──────────────────────────────
21
- def _log(err: Exception, msg: str = ""):
22
- """Log errors without stopping execution."""
23
- print(f"[WARN] {msg}: {err}", file=sys.stderr)
24
- traceback.print_exception(err)
25
 
26
- # Patch: tolerate bad JSON-schemas produced by some OpenAI tools
27
  _original_json_schema_to_python_type = gradio_client.utils._json_schema_to_python_type
 
28
  def _fixed_json_schema_to_python_type(schema, defs=None):
29
- try:
30
- if isinstance(schema, bool):
31
- return "any"
32
- return _original_json_schema_to_python_type(schema, defs)
33
- except Exception as e: # last-chance fallback
34
- _log(e, "json_schema_to_python_type failed")
35
  return "any"
 
 
36
  gradio_client.utils._json_schema_to_python_type = _fixed_json_schema_to_python_type
37
 
38
- # ────────────────────────────── DISK CACHE ──────────────────────────────
39
- CACHE_DIR = Path("ai_response_cache"); CACHE_DIR.mkdir(exist_ok=True)
40
- def _cache_path(prompt): # deterministic path
41
- return CACHE_DIR / f"{hashlib.md5(prompt.encode()).hexdigest()}.json"
 
 
 
 
 
42
 
43
  def get_cached_response(prompt):
44
- try:
45
- p = _cache_path(prompt)
46
- if p.exists():
47
- return json.loads(p.read_text(encoding="utf-8"))["response"]
48
- except Exception as e:
49
- _log(e, "reading cache")
 
 
50
  return None
51
 
52
  def cache_response(prompt, response):
 
 
53
  try:
54
- _cache_path(prompt).write_text(
55
- json.dumps({"prompt": prompt, "response": response}), encoding="utf-8"
56
- )
57
  except Exception as e:
58
- _log(e, "writing cache")
59
 
60
- # ────────────────────────────── OPENAI ──────────────────────────────
61
- async def _call_openai(client, prompt):
62
- """Single protected OpenAI call."""
63
- try:
64
- rsp = await client.chat.completions.create(
65
- model="gpt-4o-mini",
66
- messages=[{"role": "user", "content": prompt}],
67
- temperature=0,
68
- )
69
- return rsp.choices[0].message.content
70
- except Exception as e:
71
- _log(e, "OpenAI error")
72
- return f"Error: {e}"
73
 
74
- async def process_text_batch_async(client, prompts):
75
- """Return results in original order, resilient to any error."""
76
- results, tasks = {}, []
77
- for p in prompts:
78
- cached = get_cached_response(p)
79
- if cached is not None:
80
- results[p] = cached
81
- else:
82
- tasks.append(asyncio.create_task(_call_openai(client, p)))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
- for prompt, task in zip([p for p in prompts if p not in results], tasks):
85
- try:
86
- res = await task
87
- except Exception as e:
88
- _log(e, "async OpenAI task")
89
- res = f"Error: {e}"
90
- cache_response(prompt, res)
91
- results[prompt] = res
92
- return [results[p] for p in prompts]
93
 
94
  async def process_text_with_ai_async(texts, instruction):
 
95
  if not texts:
96
  return []
 
 
 
 
 
97
  client = openai.AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))
98
- batch_size, out = 500, []
 
99
  for i in range(0, len(texts), batch_size):
100
- prompts = [f"{instruction}\n\nText: {t}" for t in texts[i : i + batch_size]]
101
- out.extend(await process_text_batch_async(client, prompts))
102
- return out
103
-
104
- # ────────────────────────────── MAIN TRANSFORM ──────────────────────────────
105
- def process_woocommerce_data_in_memory(upload):
106
- """Convert NetCom β†’ Woo CSV/XLSX; every stage guarded."""
107
- try:
108
- # brand β†’ logo mapping
109
- brand_logo = {
110
- "Amazon Web Services": "/wp-content/uploads/2025/04/aws.png",
111
- "Cisco": "/wp-content/uploads/2025/04/cisco-e1738593292198-1.webp",
112
- "Microsoft": "/wp-content/uploads/2025/04/Microsoft-e1737494120985-1.png",
113
- "Google Cloud": "/wp-content/uploads/2025/04/Google_Cloud.png",
114
- "EC Council": "/wp-content/uploads/2025/04/Ec_Council.png",
115
- "ITIL": "/wp-content/uploads/2025/04/ITIL.webp",
116
- "PMI": "/wp-content/uploads/2025/04/PMI.png",
117
- "Comptia": "/wp-content/uploads/2025/04/Comptia.png",
118
- "Autodesk": "/wp-content/uploads/2025/04/autodesk.png",
119
- "ISC2": "/wp-content/uploads/2025/04/ISC2.png",
120
- "AICerts": "/wp-content/uploads/2025/04/aicerts-logo-1.png",
121
- }
122
- default_prereq = (
123
- "No specific prerequisites are required for this course. "
124
- "Basic computer literacy and familiarity with fundamental concepts in the "
125
- "subject area are recommended for the best learning experience."
126
- )
127
 
128
- # ---------------- I/O ----------------
129
- ext = Path(upload.name).suffix.lower()
130
- try:
131
- if ext in {".xlsx", ".xls"}:
132
- try:
133
- df = pd.read_excel(upload.name, sheet_name="Active Schedules")
134
- except Exception as e:
135
- _log(e, "Excel read failed (falling back to first sheet)")
136
- df = pd.read_excel(upload.name, sheet_name=0)
137
- else: # CSV
138
- try:
139
- df = pd.read_csv(upload.name, encoding="latin1")
140
- except Exception as e:
141
- _log(e, "CSV read failed (trying utf-8)")
142
- df = pd.read_csv(upload.name, encoding="utf-8", errors="ignore")
143
- except Exception as e:
144
- _log(e, "file read totally failed")
145
- raise
146
 
147
- df.columns = df.columns.str.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
 
149
- # --------- column harmonisation (new vs old formats) ----------
150
- rename_map = {
151
- "Decription": "Description",
152
- "description": "Description",
153
- "Objectives": "Objectives",
154
- "objectives": "Objectives",
155
- "RequiredPrerequisite": "Required Prerequisite",
156
- "Required Pre-requisite": "Required Prerequisite",
157
- "RequiredPre-requisite": "Required Prerequisite",
158
- }
159
- df.rename(columns={k: v for k, v in rename_map.items() if k in df.columns}, inplace=True)
160
-
161
- # duration if missing
162
- if "Duration" not in df.columns:
163
- try:
164
- df["Duration"] = (
165
- pd.to_datetime(df["Course End Date"]) - pd.to_datetime(df["Course Start Date"])
166
- ).dt.days.add(1)
167
- except Exception as e:
168
- _log(e, "duration calc failed")
169
- df["Duration"] = ""
170
 
171
- # ---------------- ASYNC AI ----------------
172
- loop = asyncio.new_event_loop()
173
- asyncio.set_event_loop(loop)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
 
175
- col_desc = "Description"
176
- col_obj = "Objectives"
177
- col_prereq = "Required Prerequisite"
 
 
 
 
 
 
178
 
179
- try:
180
- res = loop.run_until_complete(
181
- asyncio.gather(
182
- process_text_with_ai_async(
183
- df[col_desc].fillna("").tolist(),
184
- "Create a concise 250-character summary of this course description:",
185
- ),
186
- process_text_with_ai_async(
187
- df[col_desc].fillna("").tolist(),
188
- "Condense this description to maximum 750 characters in paragraph format, with clean formatting:",
189
- ),
190
- process_text_with_ai_async(
191
- df[col_obj].fillna("").tolist(),
192
- "Format these objectives into a bullet list format with clean formatting. Start each bullet with 'β€’ ':",
193
- ),
194
- process_text_with_ai_async(
195
- df["Outline"].fillna("").tolist(),
196
- "Format this agenda into a bullet list format with clean formatting. Start each bullet with 'β€’ ':",
197
- ),
198
- )
199
  )
200
- except Exception as e:
201
- _log(e, "async AI gather failed")
202
- res = [[""] * len(df)] * 4
203
- finally:
204
- loop.close()
205
-
206
- short_desc, long_desc, objectives, agendas = res
207
 
208
- # prerequisites handled synchronously (tiny)
209
- prereq_out = []
210
- for p in df[col_prereq].fillna("").tolist():
211
- if not p.strip():
212
- prereq_out.append(default_prereq)
213
- else:
214
- try:
215
- prereq_out.append(
216
- asyncio.run(
217
- process_text_with_ai_async(
218
- [p],
219
- "Format these prerequisites into a bullet list format with clean formatting. Start each bullet with 'β€’ ':",
220
- )
221
- )[0]
222
- )
223
- except Exception as e:
224
- _log(e, "prereq AI failed")
225
- prereq_out.append(default_prereq)
226
 
227
- # ---------------- DATAFRAME BUILD ----------------
228
- try:
229
- df["Short_Description"] = short_desc
230
- df["Condensed_Description"] = long_desc
231
- df["Formatted_Objectives"] = objectives
232
- df["Formatted_Prerequisites"] = prereq_out
233
- df["Formatted_Agenda"] = agendas
234
- except Exception as e:
235
- _log(e, "adding AI columns")
236
 
237
- # 2. aggregate date/time
238
- df = df.sort_values(["Course ID", "Course Start Date"])
239
- date_agg = (
240
- df.groupby("Course ID")["Course Start Date"]
241
- .apply(lambda x: ",".join(x.astype(str).unique()))
242
- .reset_index(name="Aggregated_Dates")
243
- )
244
- time_agg = (
245
- df.groupby("Course ID")
246
- .apply(
247
- lambda d: ",".join(
248
- f"{s}-{e} {tz}"
249
- for s, e, tz in zip(
250
- d["Course Start Time"], d["Course End Time"], d["Time Zone"]
251
- )
252
- )
253
- )
254
- .reset_index(name="Aggregated_Times")
255
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
256
 
257
- parent = df.drop_duplicates(subset=["Course ID"]).merge(date_agg).merge(time_agg)
258
- woo_parent_df = pd.DataFrame(
259
- {
260
- "Type": "variable",
261
- "SKU": parent["Course ID"],
262
- "Name": parent["Course Name"],
263
- "Published": 1,
264
- "Visibility in catalog": "visible",
265
- "Short description": parent["Short_Description"],
266
- "Description": parent["Condensed_Description"],
267
- "Tax status": "taxable",
268
- "In stock?": 1,
269
- "Regular price": parent["SRP Pricing"].replace("[\\$,]", "", regex=True),
270
- "Categories": "courses",
271
- "Images": parent["Vendor"].map(brand_logo).fillna(""),
272
- "Parent": "",
273
- "Brands": parent["Vendor"],
274
- "Attribute 1 name": "Date",
275
- "Attribute 1 value(s)": parent["Aggregated_Dates"],
276
- "Attribute 1 visible": "visible",
277
- "Attribute 1 global": 1,
278
- "Attribute 2 name": "Location",
279
- "Attribute 2 value(s)": "Virtual",
280
- "Attribute 2 visible": "visible",
281
- "Attribute 2 global": 1,
282
- "Attribute 3 name": "Time",
283
- "Attribute 3 value(s)": parent["Aggregated_Times"],
284
- "Attribute 3 visible": "visible",
285
- "Attribute 3 global": 1,
286
- "Meta: outline": parent["Formatted_Agenda"],
287
- "Meta: days": parent["Duration"],
288
- "Meta: location": "Virtual",
289
- "Meta: overview": parent["Target Audience"],
290
- "Meta: objectives": parent["Formatted_Objectives"],
291
- "Meta: prerequisites": parent["Formatted_Prerequisites"],
292
- "Meta: agenda": parent["Formatted_Agenda"],
293
- }
294
- )
295
-
296
- woo_child_df = pd.DataFrame(
297
- {
298
- "Type": "variation, virtual",
299
- "SKU": df["Course SID"],
300
- "Name": df["Course Name"],
301
- "Published": 1,
302
- "Visibility in catalog": "visible",
303
- "Short description": df["Short_Description"],
304
- "Description": df["Condensed_Description"],
305
- "Tax status": "taxable",
306
- "In stock?": 1,
307
- "Regular price": df["SRP Pricing"].replace("[\\$,]", "", regex=True),
308
- "Categories": "courses",
309
- "Images": df["Vendor"].map(brand_logo).fillna(""),
310
- "Parent": df["Course ID"],
311
- "Brands": df["Vendor"],
312
- "Attribute 1 name": "Date",
313
- "Attribute 1 value(s)": df["Course Start Date"],
314
- "Attribute 1 visible": "visible",
315
- "Attribute 1 global": 1,
316
- "Attribute 2 name": "Location",
317
- "Attribute 2 value(s)": "Virtual",
318
- "Attribute 2 visible": "visible",
319
- "Attribute 2 global": 1,
320
- "Attribute 3 name": "Time",
321
- "Attribute 3 value(s)": df.apply(
322
- lambda r: f"{r['Course Start Time']}-{r['Course End Time']} {r['Time Zone']}",
323
- axis=1,
324
- ),
325
- "Attribute 3 visible": "visible",
326
- "Attribute 3 global": 1,
327
- "Meta: outline": df["Formatted_Agenda"],
328
- "Meta: days": df["Duration"],
329
- "Meta: location": "Virtual",
330
- "Meta: overview": df["Target Audience"],
331
- "Meta: objectives": df["Formatted_Objectives"],
332
- "Meta: prerequisites": df["Formatted_Prerequisites"],
333
- "Meta: agenda": df["Formatted_Agenda"],
334
- }
335
- )
336
 
337
- final_cols = [
338
- "Type",
339
- "SKU",
340
- "Name",
341
- "Published",
342
- "Visibility in catalog",
343
- "Short description",
344
- "Description",
345
- "Tax status",
346
- "In stock?",
347
- "Regular price",
348
- "Categories",
349
- "Images",
350
- "Parent",
351
- "Brands",
352
- "Attribute 1 name",
353
- "Attribute 1 value(s)",
354
- "Attribute 1 visible",
355
- "Attribute 1 global",
356
- "Attribute 2 name",
357
- "Attribute 2 value(s)",
358
- "Attribute 2 visible",
359
- "Attribute 2 global",
360
- "Attribute 3 name",
361
- "Attribute 3 value(s)",
362
- "Attribute 3 visible",
363
- "Attribute 3 global",
364
- "Meta: outline",
365
- "Meta: days",
366
- "Meta: location",
367
- "Meta: overview",
368
- "Meta: objectives",
369
- "Meta: prerequisites",
370
- "Meta: agenda",
371
- ]
372
 
373
- woo_final_df = pd.concat([woo_parent_df, woo_child_df], ignore_index=True)[
374
- final_cols
375
- ]
 
 
 
 
 
 
 
 
 
376
 
377
- buf = BytesIO()
378
- woo_final_df.to_csv(buf, index=False, encoding="utf-8-sig")
379
- buf.seek(0)
380
- return buf
381
- except Exception as e:
382
- _log(e, "fatal transformation error")
383
- err_buf = BytesIO()
384
- pd.DataFrame({"error": [str(e)]}).to_csv(err_buf, index=False)
385
- err_buf.seek(0)
386
- return err_buf
387
 
388
- # ────────────────────────────── GRADIO BINDINGS ──────────────────────────────
389
- def process_file(file):
390
- try:
391
- out_io = process_woocommerce_data_in_memory(file)
392
- with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp:
393
- tmp.write(out_io.getvalue())
394
- return tmp.name
395
- except Exception as e:
396
- _log(e, "top-level process_file")
397
- with tempfile.NamedTemporaryFile(delete=False, suffix=".txt") as tmp:
398
- tmp.write(f"Processing failed:\n{e}".encode())
399
- return tmp.name
400
 
401
  interface = gr.Interface(
402
  fn=process_file,
403
- inputs=gr.File(label="Upload NetCom Schedule", file_types=[".csv", ".xlsx", ".xls"]),
404
  outputs=gr.File(label="Download WooCommerce CSV"),
405
- title="NetCom β†’ WooCommerce CSV/Excel Processor",
406
- description="Upload a NetCom Reseller Schedule CSV or XLSX to generate a WooCommerce-ready CSV.",
407
  analytics_enabled=False,
408
  )
409
 
410
- if __name__ == "__main__": # run
411
- if not os.getenv("OPENAI_API_KEY"):
412
- print("[WARN] OPENAI_API_KEY not set; AI steps will error out.")
413
- interface.launch()
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  import pandas as pd
3
  import tempfile
4
+ import os
5
  from io import BytesIO
6
+ import re
7
+ import openai
8
+ import hashlib
9
+ import json
10
+ import asyncio
11
+ import aiohttp
12
  from pathlib import Path
13
+ from concurrent.futures import ThreadPoolExecutor
14
  from functools import lru_cache
 
 
15
 
16
+ import gradio_client.utils
 
 
 
 
17
 
 
18
  _original_json_schema_to_python_type = gradio_client.utils._json_schema_to_python_type
19
+
20
  def _fixed_json_schema_to_python_type(schema, defs=None):
21
+ # If the schema is a bool, return a fallback type (e.g. "any")
22
+ if isinstance(schema, bool):
 
 
 
 
23
  return "any"
24
+ return _original_json_schema_to_python_type(schema, defs)
25
+
26
  gradio_client.utils._json_schema_to_python_type = _fixed_json_schema_to_python_type
27
 
28
+
29
+ # Create cache directory if it doesn't exist
30
+ CACHE_DIR = Path("ai_response_cache")
31
+ CACHE_DIR.mkdir(exist_ok=True)
32
+
33
+ def get_cache_path(prompt):
34
+ """Generate a unique cache file path based on the prompt content"""
35
+ prompt_hash = hashlib.md5(prompt.encode('utf-8')).hexdigest()
36
+ return CACHE_DIR / f"{prompt_hash}.json"
37
 
38
  def get_cached_response(prompt):
39
+ """Try to get a cached response for the given prompt"""
40
+ cache_path = get_cache_path(prompt)
41
+ if cache_path.exists():
42
+ try:
43
+ with open(cache_path, 'r', encoding='utf-8') as f:
44
+ return json.load(f)['response']
45
+ except Exception as e:
46
+ print(f"Error reading cache: {e}")
47
  return None
48
 
49
  def cache_response(prompt, response):
50
+ """Cache the response for a given prompt"""
51
+ cache_path = get_cache_path(prompt)
52
  try:
53
+ with open(cache_path, 'w', encoding='utf-8') as f:
54
+ json.dump({'prompt': prompt, 'response': response}, f)
 
55
  except Exception as e:
56
+ print(f"Error writing to cache: {e}")
57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
+ async def process_text_batch_async(client, batch_prompts):
60
+ """Process a batch of prompts asynchronously"""
61
+ results = []
62
+
63
+ # First check cache for each prompt
64
+ for prompt in batch_prompts:
65
+ cached = get_cached_response(prompt)
66
+ if cached:
67
+ results.append((prompt, cached))
68
+
69
+ # Filter out prompts that were found in cache
70
+ uncached_prompts = [p for p in batch_prompts if not any(p == cached_prompt for cached_prompt, _ in results)]
71
+
72
+ if uncached_prompts:
73
+ # Process uncached prompts in parallel
74
+ async def process_single_prompt(prompt):
75
+ try:
76
+ response = await client.chat.completions.create(
77
+ model="gpt-4o-mini",
78
+ messages=[{"role": "user", "content": prompt}],
79
+ temperature=0
80
+ )
81
+ result = response.choices[0].message.content
82
+ # Cache the result
83
+ cache_response(prompt, result)
84
+ return prompt, result
85
+ except Exception as e:
86
+ print(f"Error processing prompt: {e}")
87
+ return prompt, f"Error: {str(e)}"
88
+
89
+ # Create tasks for all uncached prompts
90
+ tasks = [process_single_prompt(prompt) for prompt in uncached_prompts]
91
+
92
+ # Run all tasks concurrently and wait for them to complete
93
+ uncached_results = await asyncio.gather(*tasks)
94
+
95
+ # Combine cached and newly processed results
96
+ results.extend(uncached_results)
97
+
98
+ # Sort results to match original order of batch_prompts
99
+ prompt_to_result = {prompt: result for prompt, result in results}
100
+ return [prompt_to_result[prompt] for prompt in batch_prompts]
101
 
 
 
 
 
 
 
 
 
 
102
 
103
  async def process_text_with_ai_async(texts, instruction):
104
+ """Process text with GPT-4o-mini asynchronously in batches"""
105
  if not texts:
106
  return []
107
+
108
+ results = []
109
+ batch_size = 500
110
+
111
+ # Create OpenAI async client
112
  client = openai.AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))
113
+
114
+ # Process in batches
115
  for i in range(0, len(texts), batch_size):
116
+ batch = texts[i:i+batch_size]
117
+ batch_prompts = [f"{instruction}\n\nText: {text}" for text in batch]
118
+
119
+ batch_results = await process_text_batch_async(client, batch_prompts)
120
+ results.extend(batch_results)
121
+
122
+ return results
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
 
125
+ def process_woocommerce_data_in_memory(netcom_file):
126
+ """
127
+ Reads the uploaded NetCom CSV file in-memory, processes it to the WooCommerce format,
128
+ and returns the resulting CSV as bytes, suitable for download.
129
+ """
130
+ # Define the brand-to-logo mapping with updated URLs
131
+ brand_logo_map = {
132
+ "Amazon Web Services": "/wp-content/uploads/2025/04/aws.png",
133
+ "Cisco": "/wp-content/uploads/2025/04/cisco-e1738593292198-1.webp",
134
+ "Microsoft": "/wp-content/uploads/2025/04/Microsoft-e1737494120985-1.png",
135
+ "Google Cloud": "/wp-content/uploads/2025/04/Google_Cloud.png",
136
+ "EC Council": "/wp-content/uploads/2025/04/Ec_Council.png",
137
+ "ITIL": "/wp-content/uploads/2025/04/ITIL.webp",
138
+ "PMI": "/wp-content/uploads/2025/04/PMI.png",
139
+ "Comptia": "/wp-content/uploads/2025/04/Comptia.png",
140
+ "Autodesk": "/wp-content/uploads/2025/04/autodesk.png",
141
+ "ISC2": "/wp-content/uploads/2025/04/ISC2.png",
142
+ "AICerts": "/wp-content/uploads/2025/04/aicerts-logo-1.png"
143
+ }
144
 
145
+ # Default prerequisite text for courses without prerequisites
146
+ default_prerequisite = "No specific prerequisites are required for this course. Basic computer literacy and familiarity with fundamental concepts in the subject area are recommended for the best learning experience."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
 
148
+ # 1. Read the uploaded CSV into a DataFrame
149
+ netcom_df = pd.read_csv(netcom_file.name, encoding='latin1')
150
+ netcom_df.columns = netcom_df.columns.str.strip() # standardize column names
151
+
152
+ # Prepare descriptions for AI processing
153
+ descriptions = netcom_df['Decription'].fillna("").tolist()
154
+ objectives = netcom_df['Objectives'].fillna("").tolist()
155
+ prerequisites = netcom_df['RequiredPrerequisite'].fillna("").tolist()
156
+ agendas = netcom_df['Outline'].fillna("").tolist()
157
+
158
+ # Process with AI asynchronously
159
+ loop = asyncio.new_event_loop()
160
+ asyncio.set_event_loop(loop)
161
+
162
+ # Run all processing tasks concurrently
163
+ tasks = [
164
+ process_text_with_ai_async(
165
+ descriptions,
166
+ "Create a concise 250-character summary of this course description:"
167
+ ),
168
+ process_text_with_ai_async(
169
+ descriptions,
170
+ "Condense this description to maximum 750 characters in paragraph format, with clean formatting:"
171
+ ),
172
+ process_text_with_ai_async(
173
+ objectives,
174
+ "Format these objectives into a bullet list format with clean formatting. Start each bullet with 'β€’ ':"
175
+ ),
176
+ process_text_with_ai_async(
177
+ agendas,
178
+ "Format this agenda into a bullet list format with clean formatting. Start each bullet with 'β€’ ':"
179
+ )
180
+ ]
181
+
182
+ # Process prerequisites separately to handle default case
183
+ formatted_prerequisites_task = []
184
+ for prereq in prerequisites:
185
+ if not prereq or pd.isna(prereq) or prereq.strip() == "":
186
+ formatted_prerequisites_task.append(default_prerequisite)
187
+ else:
188
+ # For non-empty prerequisites, we'll process them with AI
189
+ prereq_result = loop.run_until_complete(process_text_with_ai_async(
190
+ [prereq],
191
+ "Format these prerequisites into a bullet list format with clean formatting. Start each bullet with 'β€’ ':"
192
+ ))
193
+ formatted_prerequisites_task.append(prereq_result[0])
194
+
195
+ # Run all tasks and get results
196
+ results = loop.run_until_complete(asyncio.gather(*tasks))
197
+ loop.close()
198
+
199
+ short_descriptions, condensed_descriptions, formatted_objectives, formatted_agendas = results
200
+
201
+ # Add processed text to dataframe
202
+ netcom_df['Short_Description'] = short_descriptions
203
+ netcom_df['Condensed_Description'] = condensed_descriptions
204
+ netcom_df['Formatted_Objectives'] = formatted_objectives
205
+ netcom_df['Formatted_Prerequisites'] = formatted_prerequisites_task
206
+ netcom_df['Formatted_Agenda'] = formatted_agendas
207
 
208
+ # 2. Create aggregated dates and times for each Course ID
209
+ # Sort by Course ID and date first
210
+ netcom_df = netcom_df.sort_values(['Course ID', 'Course Start Date'])
211
+
212
+ date_agg = (
213
+ netcom_df.groupby('Course ID')['Course Start Date']
214
+ .apply(lambda x: ','.join(x.astype(str).unique()))
215
+ .reset_index(name='Aggregated_Dates')
216
+ )
217
 
218
+ time_agg = (
219
+ netcom_df.groupby('Course ID')
220
+ .apply(
221
+ lambda df: ','.join(
222
+ f"{st}-{et} {tz}"
223
+ for st, et, tz in zip(df['Course Start Time'],
224
+ df['Course End Time'],
225
+ df['Time Zone'])
 
 
 
 
 
 
 
 
 
 
 
 
226
  )
227
+ )
228
+ .reset_index(name='Aggregated_Times')
229
+ )
 
 
 
 
230
 
231
+ # 3. Extract unique parent products
232
+ parent_products = netcom_df.drop_duplicates(subset=['Course ID'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
 
234
+ # 4. Merge aggregated dates and times
235
+ parent_products = parent_products.merge(date_agg, on='Course ID', how='left')
236
+ parent_products = parent_products.merge(time_agg, on='Course ID', how='left')
 
 
 
 
 
 
237
 
238
+ # 5. Create parent (variable) products
239
+ woo_parent_df = pd.DataFrame({
240
+ 'Type': 'variable',
241
+ 'SKU': parent_products['Course ID'],
242
+ 'Name': parent_products['Course Name'],
243
+ 'Published': 1,
244
+ 'Visibility in catalog': 'visible',
245
+ 'Short description': parent_products['Short_Description'],
246
+ 'Description': parent_products['Condensed_Description'],
247
+ 'Tax status': 'taxable',
248
+ 'In stock?': 1,
249
+ 'Regular price': parent_products['SRP Pricing'].replace('[\$,]', '', regex=True),
250
+ 'Categories': 'courses',
251
+ 'Images': parent_products['Vendor'].map(brand_logo_map).fillna(''),
252
+ 'Parent': '',
253
+ 'Brands': parent_products['Vendor'],
254
+ 'Attribute 1 name': 'Date',
255
+ 'Attribute 1 value(s)': parent_products['Aggregated_Dates'],
256
+ 'Attribute 1 visible': 'visible',
257
+ 'Attribute 1 global': 1,
258
+ 'Attribute 2 name': 'Location',
259
+ 'Attribute 2 value(s)': 'Virtual',
260
+ 'Attribute 2 visible': 'visible',
261
+ 'Attribute 2 global': 1,
262
+ 'Attribute 3 name': 'Time',
263
+ 'Attribute 3 value(s)': parent_products['Aggregated_Times'],
264
+ 'Attribute 3 visible': 'visible',
265
+ 'Attribute 3 global': 1,
266
+ 'Meta: outline': parent_products['Formatted_Agenda'],
267
+ 'Meta: days': parent_products['Duration'],
268
+ 'Meta: location': 'Virtual',
269
+ 'Meta: overview': parent_products['Target Audience'],
270
+ 'Meta: objectives': parent_products['Formatted_Objectives'],
271
+ 'Meta: prerequisites': parent_products['Formatted_Prerequisites'],
272
+ 'Meta: agenda': parent_products['Formatted_Agenda']
273
+ })
274
 
275
+ # 6. Create child (variation) products
276
+ woo_child_df = pd.DataFrame({
277
+ 'Type': 'variation, virtual',
278
+ 'SKU': netcom_df['Course SID'],
279
+ 'Name': netcom_df['Course Name'],
280
+ 'Published': 1,
281
+ 'Visibility in catalog': 'visible',
282
+ 'Short description': netcom_df['Short_Description'],
283
+ 'Description': netcom_df['Condensed_Description'],
284
+ 'Tax status': 'taxable',
285
+ 'In stock?': 1,
286
+ 'Regular price': netcom_df['SRP Pricing'].replace('[\$,]', '', regex=True),
287
+ 'Categories': 'courses',
288
+ 'Images': netcom_df['Vendor'].map(brand_logo_map).fillna(''),
289
+ 'Parent': netcom_df['Course ID'],
290
+ 'Brands': netcom_df['Vendor'],
291
+ 'Attribute 1 name': 'Date',
292
+ 'Attribute 1 value(s)': netcom_df['Course Start Date'],
293
+ 'Attribute 1 visible': 'visible',
294
+ 'Attribute 1 global': 1,
295
+ 'Attribute 2 name': 'Location',
296
+ 'Attribute 2 value(s)': 'Virtual',
297
+ 'Attribute 2 visible': 'visible',
298
+ 'Attribute 2 global': 1,
299
+ 'Attribute 3 name': 'Time',
300
+ 'Attribute 3 value(s)': netcom_df.apply(
301
+ lambda row: f"{row['Course Start Time']}-{row['Course End Time']} {row['Time Zone']}", axis=1
302
+ ),
303
+ 'Attribute 3 visible': 'visible',
304
+ 'Attribute 3 global': 1,
305
+ 'Meta: outline': netcom_df['Formatted_Agenda'],
306
+ 'Meta: days': netcom_df['Duration'],
307
+ 'Meta: location': 'Virtual',
308
+ 'Meta: overview': netcom_df['Target Audience'],
309
+ 'Meta: objectives': netcom_df['Formatted_Objectives'],
310
+ 'Meta: prerequisites': netcom_df['Formatted_Prerequisites'],
311
+ 'Meta: agenda': netcom_df['Formatted_Agenda']
312
+ })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
313
 
314
+ # 7. Combine parent + child
315
+ woo_final_df = pd.concat([woo_parent_df, woo_child_df], ignore_index=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
316
 
317
+ # 8. Desired column order (removed Stock and Sold individually?)
318
+ column_order = [
319
+ 'Type', 'SKU', 'Name', 'Published', 'Visibility in catalog',
320
+ 'Short description', 'Description', 'Tax status', 'In stock?',
321
+ 'Regular price', 'Categories', 'Images',
322
+ 'Parent', 'Brands', 'Attribute 1 name', 'Attribute 1 value(s)', 'Attribute 1 visible',
323
+ 'Attribute 1 global', 'Attribute 2 name', 'Attribute 2 value(s)', 'Attribute 2 visible',
324
+ 'Attribute 2 global', 'Attribute 3 name', 'Attribute 3 value(s)', 'Attribute 3 visible',
325
+ 'Attribute 3 global', 'Meta: outline', 'Meta: days', 'Meta: location', 'Meta: overview',
326
+ 'Meta: objectives', 'Meta: prerequisites', 'Meta: agenda'
327
+ ]
328
+ woo_final_df = woo_final_df[column_order]
329
 
330
+ # 9. Convert to CSV (in memory)
331
+ output_buffer = BytesIO()
332
+ woo_final_df.to_csv(output_buffer, index=False, encoding='utf-8-sig')
333
+ output_buffer.seek(0)
334
+
335
+ return output_buffer
 
 
 
 
336
 
337
+ def process_file(uploaded_file):
338
+ """
339
+ Takes the uploaded file, processes it, and returns the CSV as a file-like object
340
+ """
341
+ processed_csv_io = process_woocommerce_data_in_memory(uploaded_file)
342
+
343
+ # Create a temporary file to save the CSV data
344
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.csv') as temp_file:
345
+ temp_file.write(processed_csv_io.getvalue())
346
+ temp_path = temp_file.name
347
+
348
+ return temp_path
349
 
350
  interface = gr.Interface(
351
  fn=process_file,
352
+ inputs=gr.File(label="Upload NetCom CSV", file_types=[".csv"]),
353
  outputs=gr.File(label="Download WooCommerce CSV"),
354
+ title="NetCom to WooCommerce CSV Processor",
355
+ description="Upload your NetCom Reseller Schedule CSV to generate the WooCommerce import-ready CSV.",
356
  analytics_enabled=False,
357
  )
358
 
359
+ if __name__ == "__main__":
360
+ openai_api_key = os.getenv("OPENAI_API_KEY")
361
+ if not openai_api_key:
362
+ print("Warning: OPENAI_API_KEY environment variable not set")
363
+ interface.launch()