codys12 commited on
Commit
f88b322
Β·
verified Β·
1 Parent(s): 46bc7fa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +105 -55
app.py CHANGED
@@ -1,17 +1,19 @@
1
- """NetCom β†’ WooCommerce transformer (Try 2 schema β€” persistent cache, 100-parallel,
2
- duplicate-safe, relative-logo paths, cache-preload)
3
- ==================================================================================
4
 
5
- *Accept a NetCom schedule (CSV/XLSX) and **optionally** a *previous* WooCommerce
6
- CSV; output the fresh WooCommerce CSV.*
7
 
8
- New in this revision
9
  --------------------
10
- * **Relative** image paths kept (WooCommerce resolves them to your own domain).
11
- * Second optional file-input lets you *pre-load* the on-disk cache from a prior
12
- run, so already-processed courses skip OpenAI completely.
13
- * Everything else (persistent cache in `/data`, 100-parallel semaphore,
14
- in-flight de-duplication, pandas compatibility fix) remains unchanged.
 
 
15
  """
16
 
17
  from __future__ import annotations
@@ -23,44 +25,64 @@ import os
23
  import tempfile
24
  from io import BytesIO
25
  from pathlib import Path
 
26
 
27
  import gradio as gr
28
  import gradio_client.utils
29
  import openai
30
  import pandas as pd
31
 
32
- # ── Gradio bool-schema hot-patch ─────────────────────────────────────────────
33
  _original = gradio_client.utils._json_schema_to_python_type
 
34
  def _fixed_json_schema_to_python_type(schema, defs=None): # type: ignore
35
  if isinstance(schema, bool):
36
  return "any"
37
  return _original(schema, defs)
 
38
  gradio_client.utils._json_schema_to_python_type = _fixed_json_schema_to_python_type # type: ignore
39
 
40
- # ── Persistent disk cache (HF Spaces uses /data) ─────────────────────────────
41
  _PERSISTENT_ROOT = Path("/data")
42
  CACHE_DIR = (_PERSISTENT_ROOT if _PERSISTENT_ROOT.exists() else Path(".")) / "ai_response_cache"
43
  CACHE_DIR.mkdir(parents=True, exist_ok=True)
44
 
 
45
  def _cache_path(p: str) -> Path:
46
  return CACHE_DIR / f"{hashlib.md5(p.encode()).hexdigest()}.json"
47
 
 
48
  def _get_cached(p: str) -> str | None:
49
  try:
50
  return json.loads(_cache_path(p).read_text("utf-8"))["response"]
51
  except Exception:
52
  return None
53
 
 
54
  def _set_cache(p: str, r: str) -> None:
55
  try:
56
  _cache_path(p).write_text(json.dumps({"prompt": p, "response": r}), "utf-8")
57
  except Exception:
58
  pass
59
 
60
- # ── OpenAI helpers: 100-parallel + de-dup ────────────────────────────────────
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  _SEM = asyncio.Semaphore(100) # ≀100 concurrent OpenAI calls
62
  _inflight: dict[str, asyncio.Future] = {} # prompt β†’ Future
63
 
 
64
  async def _gpt_async(client: openai.AsyncOpenAI, prompt: str) -> str:
65
  cached = _get_cached(prompt)
66
  if cached is not None:
@@ -93,6 +115,7 @@ async def _gpt_async(client: openai.AsyncOpenAI, prompt: str) -> str:
93
  finally:
94
  _inflight.pop(prompt, None)
95
 
 
96
  async def _batch_async(lst, instruction: str, client):
97
  out = ["" for _ in lst]
98
  idx, prompts = [], []
@@ -105,30 +128,30 @@ async def _batch_async(lst, instruction: str, client):
105
 
106
  responses = await asyncio.gather(*[_gpt_async(client, p) for p in prompts])
107
  for j, val in enumerate(responses):
108
- out[idx[j]] = val
109
  return out
110
 
111
- # ── Instructions (reuse across preload & gen) ────────────────────────────────
112
  DESC_SHORT = "Create a concise 250-character summary of this course description:"
113
  DESC_LONG = "Condense this description to a maximum of 750 characters in paragraph format, with clean formatting:"
114
  OBJECTIVES = "Format these objectives into a bullet list with clean formatting. Start each bullet with 'β€’ ':"
115
  AGENDA = "Format this agenda into a bullet list with clean formatting. Start each bullet with 'β€’ ':"
116
  PREREQ = "Format these prerequisites into a bullet list with clean formatting. Start each bullet with 'β€’ ':"
117
 
118
- # ── Logo map (relative paths, with common aliases) ───────────────────────────
119
  logos = {
120
- "Amazon Web Services": "/wp-content/uploads/2025/04/aws.png",
121
- "AWS": "/wp-content/uploads/2025/04/aws.png",
122
- "Cisco": "/wp-content/uploads/2025/04/cisco-e1738593292198-1.webp",
123
- "Microsoft": "/wp-content/uploads/2025/04/Microsoft-e1737494120985-1.png",
124
- "Google Cloud": "/wp-content/uploads/2025/04/Google_Cloud.png",
125
- "EC Council": "/wp-content/uploads/2025/04/Ec_Council.png",
126
- "ITIL": "/wp-content/uploads/2025/04/ITIL.webp",
127
- "PMI": "/wp-content/uploads/2025/04/PMI.png",
128
- "Comptia": "/wp-content/uploads/2025/04/Comptia.png",
129
- "Autodesk": "/wp-content/uploads/2025/04/autodesk.png",
130
- "ISC2": "/wp-content/uploads/2025/04/ISC2.png",
131
- "AICerts": "/wp-content/uploads/2025/04/aicerts-logo-1.png",
132
  }
133
 
134
  DEFAULT_PREREQ = (
@@ -137,9 +160,10 @@ DEFAULT_PREREQ = (
137
  "learning experience."
138
  )
139
 
140
- # ── Cache-preload from previous WooCommerce CSV ──────────────────────────────
 
141
  def _preload_cache(prev_csv: str, df_new: pd.DataFrame, dcol, ocol, pcol, acol):
142
- """Seed the on-disk cache with completions from an earlier WooCommerce CSV."""
143
  try:
144
  prev = pd.read_csv(prev_csv, encoding="utf-8-sig")
145
  except Exception:
@@ -159,14 +183,15 @@ def _preload_cache(prev_csv: str, df_new: pd.DataFrame, dcol, ocol, pcol, acol):
159
  ag = str(row[acol])
160
  pre = str(row[pcol])
161
 
162
- _set_cache(f"{DESC_SHORT}\n\nText: {desc}", old.get("Short description", ""))
163
- _set_cache(f"{DESC_LONG}\n\nText: {desc}", old.get("Description", ""))
164
- _set_cache(f"{OBJECTIVES}\n\nText: {obj}", old.get("Meta: objectives", ""))
165
- _set_cache(f"{AGENDA}\n\nText: {ag}", old.get("Meta: agenda", ""))
166
  if pre.strip():
167
- _set_cache(f"{PREREQ}\n\nText: {pre}", old.get("Meta: prerequisites", ""))
 
 
168
 
169
- # ── Helper: read user file (CSV or Excel) ────────────────────────────────────
170
  def _read(path: str) -> pd.DataFrame:
171
  if path.lower().endswith((".xlsx", ".xls")):
172
  return pd.read_excel(path)
@@ -191,9 +216,17 @@ async def _enrich_dataframe(df, dcol, ocol, pcol, acol):
191
  out = await _batch_async([req], PREREQ, client)
192
  fpre.append(out[0])
193
 
 
 
 
 
 
 
 
194
  return sdesc, ldesc, fobj, fout, fpre
195
 
196
- # ── Main converter ───────────────────────────────────────────────────────────
 
197
  def convert(schedule_path: str, prev_csv_path: str | None = None) -> BytesIO:
198
  df = _read(schedule_path)
199
  df.columns = df.columns.str.strip()
@@ -203,36 +236,43 @@ def convert(schedule_path: str, prev_csv_path: str | None = None) -> BytesIO:
203
  ocol = first_col("Objectives", "objectives")
204
  pcol = first_col("RequiredPrerequisite", "Required Pre-requisite")
205
  acol = first_col("Outline")
206
- dur = first_col("Duration") or "Duration"
207
  sid = first_col("Course SID", "Course SID")
208
 
209
- if dur not in df.columns:
210
- df[dur] = ""
211
-
212
  # optional cache preload
213
  if prev_csv_path:
214
  _preload_cache(prev_csv_path, df, dcol, ocol, pcol, acol)
215
 
216
- # async-enrich via LLM
217
  sdesc, ldesc, fobj, fout, fpre = asyncio.run(
218
  _enrich_dataframe(df, dcol, ocol, pcol, acol)
219
  )
220
- df["Short_Description"] = sdesc
221
- df["Condensed_Description"] = ldesc
222
- df["Formatted_Objectives"] = fobj
223
- df["Formatted_Agenda"] = fout
224
- df["Formatted_Prerequisites"]= fpre
225
 
226
- # schedule aggregation
 
 
 
 
 
 
227
  df["Course Start Date"] = pd.to_datetime(df["Course Start Date"], errors="coerce")
228
  df["Date_fmt"] = df["Course Start Date"].dt.strftime("%-m/%-d/%Y")
229
 
230
  dsorted = df.sort_values(["Course ID", "Course Start Date"])
 
 
 
 
 
 
 
 
231
  d_agg = (
232
  dsorted.groupby("Course ID")["Date_fmt"]
233
- .apply(lambda s: ",".join(s.dropna().unique()))
234
  .reset_index(name="Dates")
235
  )
 
236
  t_agg = (
237
  dsorted.groupby("Course ID", group_keys=False)
238
  .apply(
@@ -246,7 +286,16 @@ def convert(schedule_path: str, prev_csv_path: str | None = None) -> BytesIO:
246
  )
247
  .reset_index(name="Times")
248
  )
249
- parents = dsorted.drop_duplicates("Course ID").merge(d_agg).merge(t_agg)
 
 
 
 
 
 
 
 
 
250
 
251
  parent = pd.DataFrame(
252
  {
@@ -279,7 +328,7 @@ def convert(schedule_path: str, prev_csv_path: str | None = None) -> BytesIO:
279
  "Attribute 3 visible": "visible",
280
  "Attribute 3 global": 1,
281
  "Meta: outline": parents["Formatted_Agenda"],
282
- "Meta: days": parents[dur],
283
  "Meta: location": "Virtual",
284
  "Meta: overview": parents["Target Audience"],
285
  "Meta: objectives": parents["Formatted_Objectives"],
@@ -322,7 +371,7 @@ def convert(schedule_path: str, prev_csv_path: str | None = None) -> BytesIO:
322
  "Attribute 3 visible": "visible",
323
  "Attribute 3 global": 1,
324
  "Meta: outline": dsorted["Formatted_Agenda"],
325
- "Meta: days": dsorted[dur],
326
  "Meta: location": "Virtual",
327
  "Meta: overview": dsorted["Target Audience"],
328
  "Meta: objectives": dsorted["Formatted_Objectives"],
@@ -346,7 +395,8 @@ def convert(schedule_path: str, prev_csv_path: str | None = None) -> BytesIO:
346
  out.seek(0)
347
  return out
348
 
349
- # ── Gradio interface ─────────────────────────────────────────────────────────
 
350
  def process_files(schedule: gr.File, previous: gr.File | None) -> str:
351
  csv_bytes = convert(schedule.name, previous.name if previous else None)
352
  with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp:
@@ -360,7 +410,7 @@ ui = gr.Interface(
360
  gr.File(label="Previous WooCommerce CSV (optional)", file_types=[".csv"]),
361
  ],
362
  outputs=gr.File(label="Download WooCommerce CSV"),
363
- title="NetCom β†’ WooCommerce CSV Processor (Try 2)",
364
  description=(
365
  "1. Upload the **latest NetCom schedule** file.\n"
366
  "2. *(Optional)* Upload the **WooCommerce CSV** generated by a previous run to "
 
1
+ """NetCom β†’ WooCommerce transformer (TryΒ 3Β schema β€” meta‑days calc, sorted attributes, deduped AI sections, persistent cache, 100‑parallel,
2
+ duplicate‑safe, relative‑logo paths, cache‑preload)
3
+ ==============================================================================
4
 
5
+ Accept a NetCom schedule (CSV/XLSX) and **optionally** a *previous* WooCommerce
6
+ CSV; output the fresh WooCommerce CSV.
7
 
8
+ NewΒ in this revision
9
  --------------------
10
+ * **MetaΒ days** automatically calculated as the inclusive span (in days) between
11
+ the earliest and latest course dates for each CourseΒ ID.
12
+ * **AttributeΒ 1 (Date)** lists are now guaranteed to be sorted chronologically.
13
+ * All AI‑generated sections (descriptions, objectives, agenda, prerequisites)
14
+ are post‑processed to **deduplicate any repeated lines** inside each section.
15
+ * Everything else (persistent cache in `/data`, 100‑parallel semaphore,
16
+ in‑flight de‑duplication, pandas compatibility fix) remains unchanged.
17
  """
18
 
19
  from __future__ import annotations
 
25
  import tempfile
26
  from io import BytesIO
27
  from pathlib import Path
28
+ from typing import List
29
 
30
  import gradio as gr
31
  import gradio_client.utils
32
  import openai
33
  import pandas as pd
34
 
35
+ # ── Gradio bool‑schema hot‑patch ────────────────────────────────────────────
36
  _original = gradio_client.utils._json_schema_to_python_type
37
+
38
  def _fixed_json_schema_to_python_type(schema, defs=None): # type: ignore
39
  if isinstance(schema, bool):
40
  return "any"
41
  return _original(schema, defs)
42
+
43
  gradio_client.utils._json_schema_to_python_type = _fixed_json_schema_to_python_type # type: ignore
44
 
45
+ # ── Persistent disk cache (HF Spaces uses /data) ────────────────────────────
46
  _PERSISTENT_ROOT = Path("/data")
47
  CACHE_DIR = (_PERSISTENT_ROOT if _PERSISTENT_ROOT.exists() else Path(".")) / "ai_response_cache"
48
  CACHE_DIR.mkdir(parents=True, exist_ok=True)
49
 
50
+
51
  def _cache_path(p: str) -> Path:
52
  return CACHE_DIR / f"{hashlib.md5(p.encode()).hexdigest()}.json"
53
 
54
+
55
  def _get_cached(p: str) -> str | None:
56
  try:
57
  return json.loads(_cache_path(p).read_text("utf-8"))["response"]
58
  except Exception:
59
  return None
60
 
61
+
62
  def _set_cache(p: str, r: str) -> None:
63
  try:
64
  _cache_path(p).write_text(json.dumps({"prompt": p, "response": r}), "utf-8")
65
  except Exception:
66
  pass
67
 
68
+ # ── Helpers ────────────────────────────────────────────────────────────────
69
+
70
+ def _dedup_lines(txt: str) -> str:
71
+ """Remove duplicated lines while preserving order inside a block of text."""
72
+ seen = set()
73
+ out: List[str] = []
74
+ for raw in txt.splitlines():
75
+ line = raw.rstrip()
76
+ if line and line not in seen:
77
+ out.append(line)
78
+ seen.add(line)
79
+ return "\n".join(out)
80
+
81
+ # ── OpenAI helpers: 100‑parallel + de‑dup ───────────────────────────────────
82
  _SEM = asyncio.Semaphore(100) # ≀100 concurrent OpenAI calls
83
  _inflight: dict[str, asyncio.Future] = {} # prompt β†’ Future
84
 
85
+
86
  async def _gpt_async(client: openai.AsyncOpenAI, prompt: str) -> str:
87
  cached = _get_cached(prompt)
88
  if cached is not None:
 
115
  finally:
116
  _inflight.pop(prompt, None)
117
 
118
+
119
  async def _batch_async(lst, instruction: str, client):
120
  out = ["" for _ in lst]
121
  idx, prompts = [], []
 
128
 
129
  responses = await asyncio.gather(*[_gpt_async(client, p) for p in prompts])
130
  for j, val in enumerate(responses):
131
+ out[idx[j]] = _dedup_lines(val)
132
  return out
133
 
134
+ # ── Instructions (reuse across preload & gen) ───────────────────────────────
135
  DESC_SHORT = "Create a concise 250-character summary of this course description:"
136
  DESC_LONG = "Condense this description to a maximum of 750 characters in paragraph format, with clean formatting:"
137
  OBJECTIVES = "Format these objectives into a bullet list with clean formatting. Start each bullet with 'β€’ ':"
138
  AGENDA = "Format this agenda into a bullet list with clean formatting. Start each bullet with 'β€’ ':"
139
  PREREQ = "Format these prerequisites into a bullet list with clean formatting. Start each bullet with 'β€’ ':"
140
 
141
+ # ── Logo map (relative paths, with common aliases) ──────────────────────────
142
  logos = {
143
+ "Amazon Web Services": "https://staging.greathorizonslearning.com/wp-content/uploads/2025/04/aws.png",
144
+ "AWS": "https://staging.greathorizonslearning.com/wp-content/uploads/2025/04/aws.png",
145
+ "Cisco": "https://staging.greathorizonslearning.com/wp-content/uploads/2025/04/cisco-e1738593292198-1.webp",
146
+ "Microsoft": "https://staging.greathorizonslearning.com/wp-content/uploads/2025/04/Microsoft-e1737494120985-1.png",
147
+ "Google Cloud": "https://staging.greathorizonslearning.com/wp-content/uploads/2025/04/Google_Cloud.png",
148
+ "EC Council": "https://staging.greathorizonslearning.com/wp-content/uploads/2025/04/Ec_Council.png",
149
+ "ITIL": "https://staging.greathorizonslearning.com/wp-content/uploads/2025/04/ITIL.webp",
150
+ "PMI": "https://staging.greathorizonslearning.com/wp-content/uploads/2025/04/PMI.png",
151
+ "Comptia": "https://staging.greathorizonslearning.com/wp-content/uploads/2025/04/Comptia.png",
152
+ "Autodesk": "https://staging.greathorizonslearning.com/wp-content/uploads/2025/04/autodesk.png",
153
+ "ISC2": "https://staging.greathorizonslearning.com/wp-content/uploads/2025/04/ISC2.png",
154
+ "AICerts": "https://staging.greathorizonslearning.com/wp-content/uploads/2025/04/aicerts-logo-1.png",
155
  }
156
 
157
  DEFAULT_PREREQ = (
 
160
  "learning experience."
161
  )
162
 
163
+ # ── Cache‑preload from previous WooCommerce CSV ─────────────────────────────
164
+
165
  def _preload_cache(prev_csv: str, df_new: pd.DataFrame, dcol, ocol, pcol, acol):
166
+ """Seed the on‑disk cache with completions from an earlier WooCommerce CSV."""
167
  try:
168
  prev = pd.read_csv(prev_csv, encoding="utf-8-sig")
169
  except Exception:
 
183
  ag = str(row[acol])
184
  pre = str(row[pcol])
185
 
186
+ _set_cache(f"{DESC_SHORT}\n\nText: {desc}", _dedup_lines(old.get("Short description", "")))
187
+ _set_cache(f"{DESC_LONG}\n\nText: {desc}", _dedup_lines(old.get("Description", "")))
188
+ _set_cache(f"{OBJECTIVES}\n\nText: {obj}", _dedup_lines(old.get("Meta: objectives", "")))
189
+ _set_cache(f"{AGENDA}\n\nText: {ag}", _dedup_lines(old.get("Meta: agenda", "")))
190
  if pre.strip():
191
+ _set_cache(f"{PREREQ}\n\nText: {pre}", _dedup_lines(old.get("Meta: prerequisites", "")))
192
+
193
+ # ── Helper: read user file (CSV or Excel) ───────────────────────────────────
194
 
 
195
  def _read(path: str) -> pd.DataFrame:
196
  if path.lower().endswith((".xlsx", ".xls")):
197
  return pd.read_excel(path)
 
216
  out = await _batch_async([req], PREREQ, client)
217
  fpre.append(out[0])
218
 
219
+ # Ensure everything is deduped (safety).
220
+ sdesc = [_dedup_lines(t) for t in sdesc]
221
+ ldesc = [_dedup_lines(t) for t in ldesc]
222
+ fobj = [_dedup_lines(t) for t in fobj]
223
+ fout = [_dedup_lines(t) for t in fout]
224
+ fpre = [_dedup_lines(t) for t in fpre]
225
+
226
  return sdesc, ldesc, fobj, fout, fpre
227
 
228
+ # ── Main converter ──────────────────────────────────────────────────────────
229
+
230
  def convert(schedule_path: str, prev_csv_path: str | None = None) -> BytesIO:
231
  df = _read(schedule_path)
232
  df.columns = df.columns.str.strip()
 
236
  ocol = first_col("Objectives", "objectives")
237
  pcol = first_col("RequiredPrerequisite", "Required Pre-requisite")
238
  acol = first_col("Outline")
239
+ dur = first_col("Duration") or "Duration" # kept for backward‑compat (unused)
240
  sid = first_col("Course SID", "Course SID")
241
 
 
 
 
242
  # optional cache preload
243
  if prev_csv_path:
244
  _preload_cache(prev_csv_path, df, dcol, ocol, pcol, acol)
245
 
246
+ # async‑enrich via LLM
247
  sdesc, ldesc, fobj, fout, fpre = asyncio.run(
248
  _enrich_dataframe(df, dcol, ocol, pcol, acol)
249
  )
 
 
 
 
 
250
 
251
+ df["Short_Description"] = sdesc
252
+ df["Condensed_Description"] = ldesc
253
+ df["Formatted_Objectives"] = fobj
254
+ df["Formatted_Agenda"] = fout
255
+ df["Formatted_Prerequisites"] = fpre
256
+
257
+ # schedule aggregation & meta‑days calculation
258
  df["Course Start Date"] = pd.to_datetime(df["Course Start Date"], errors="coerce")
259
  df["Date_fmt"] = df["Course Start Date"].dt.strftime("%-m/%-d/%Y")
260
 
261
  dsorted = df.sort_values(["Course ID", "Course Start Date"])
262
+
263
+ # "MetaDays" = inclusive span between earliest & latest dates per CourseΒ ID
264
+ meta_days = (
265
+ dsorted.groupby("Course ID")["Course Start Date"].agg(lambda s: (s.max() - s.min()).days + 1)
266
+ .reset_index(name="MetaDays")
267
+ )
268
+
269
+ # AttributeΒ 1 list β€” ensure chronological order
270
  d_agg = (
271
  dsorted.groupby("Course ID")["Date_fmt"]
272
+ .apply(lambda s: ",".join(sorted(s.dropna().unique(), key=lambda x: pd.to_datetime(x))))
273
  .reset_index(name="Dates")
274
  )
275
+
276
  t_agg = (
277
  dsorted.groupby("Course ID", group_keys=False)
278
  .apply(
 
286
  )
287
  .reset_index(name="Times")
288
  )
289
+
290
+ parents = (
291
+ dsorted.drop_duplicates("Course ID")
292
+ .merge(d_agg)
293
+ .merge(t_agg)
294
+ .merge(meta_days)
295
+ )
296
+
297
+ # propagate MetaDays to each schedule row
298
+ dsorted = dsorted.merge(meta_days, on="Course ID", how="left")
299
 
300
  parent = pd.DataFrame(
301
  {
 
328
  "Attribute 3 visible": "visible",
329
  "Attribute 3 global": 1,
330
  "Meta: outline": parents["Formatted_Agenda"],
331
+ "Meta: days": parents["MetaDays"],
332
  "Meta: location": "Virtual",
333
  "Meta: overview": parents["Target Audience"],
334
  "Meta: objectives": parents["Formatted_Objectives"],
 
371
  "Attribute 3 visible": "visible",
372
  "Attribute 3 global": 1,
373
  "Meta: outline": dsorted["Formatted_Agenda"],
374
+ "Meta: days": dsorted["MetaDays"],
375
  "Meta: location": "Virtual",
376
  "Meta: overview": dsorted["Target Audience"],
377
  "Meta: objectives": dsorted["Formatted_Objectives"],
 
395
  out.seek(0)
396
  return out
397
 
398
+ # ── Gradio interface ────────────────────────────────────────────────────────
399
+
400
  def process_files(schedule: gr.File, previous: gr.File | None) -> str:
401
  csv_bytes = convert(schedule.name, previous.name if previous else None)
402
  with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp:
 
410
  gr.File(label="Previous WooCommerce CSV (optional)", file_types=[".csv"]),
411
  ],
412
  outputs=gr.File(label="Download WooCommerce CSV"),
413
+ title="NetCom β†’ WooCommerce CSV Processor (TryΒ 3)",
414
  description=(
415
  "1. Upload the **latest NetCom schedule** file.\n"
416
  "2. *(Optional)* Upload the **WooCommerce CSV** generated by a previous run to "