Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,11 +1,12 @@
|
|
1 |
-
"""NetCom → WooCommerce transformer (Try 2 schema — 100-parallel
|
2 |
-
|
3 |
*Accept CSV **or** Excel schedule files and output the WooCommerce CSV.*
|
4 |
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
|
|
9 |
"""
|
10 |
|
11 |
from __future__ import annotations
|
@@ -26,51 +27,60 @@ import pandas as pd
|
|
26 |
# -------- Gradio bool-schema hot-patch --------------------------------------
|
27 |
_original = gradio_client.utils._json_schema_to_python_type
|
28 |
|
|
|
29 |
def _fixed_json_schema_to_python_type(schema, defs=None): # type: ignore
|
30 |
if isinstance(schema, bool):
|
31 |
return "any"
|
32 |
return _original(schema, defs)
|
33 |
|
34 |
-
|
|
|
|
|
|
|
35 |
|
36 |
# -------- Tiny disk cache ----------------------------------------------------
|
37 |
CACHE_DIR = Path("ai_response_cache")
|
38 |
CACHE_DIR.mkdir(exist_ok=True)
|
39 |
|
|
|
40 |
def _cache_path(p: str) -> Path:
|
41 |
return CACHE_DIR / f"{hashlib.md5(p.encode()).hexdigest()}.json"
|
42 |
|
|
|
43 |
def _get_cached(p: str) -> str | None:
|
44 |
try:
|
45 |
return json.loads(_cache_path(p).read_text("utf-8"))["response"]
|
46 |
except Exception:
|
47 |
return None
|
48 |
|
|
|
49 |
def _set_cache(p: str, r: str) -> None:
|
50 |
try:
|
51 |
_cache_path(p).write_text(json.dumps({"prompt": p, "response": r}), "utf-8")
|
52 |
except Exception:
|
53 |
pass
|
54 |
|
|
|
55 |
# -------- Async GPT helpers --------------------------------------------------
|
56 |
-
_SEM = asyncio.Semaphore(100)
|
57 |
_inflight: dict[str, asyncio.Future] = {} # prompt → Future
|
58 |
|
|
|
59 |
async def _gpt_async(client: openai.AsyncOpenAI, prompt: str) -> str:
|
60 |
-
"""Single LLM call with
|
61 |
cached = _get_cached(prompt)
|
62 |
if cached is not None:
|
63 |
return cached
|
64 |
|
65 |
-
#
|
66 |
-
|
67 |
-
if
|
68 |
-
return await
|
69 |
|
70 |
loop = asyncio.get_running_loop()
|
71 |
|
72 |
async def _call_api() -> str:
|
73 |
-
async with _SEM:
|
74 |
try:
|
75 |
msg = await client.chat.completions.create(
|
76 |
model="gpt-4o-mini",
|
@@ -90,7 +100,10 @@ async def _gpt_async(client: openai.AsyncOpenAI, prompt: str) -> str:
|
|
90 |
finally:
|
91 |
_inflight.pop(prompt, None)
|
92 |
|
93 |
-
|
|
|
|
|
|
|
94 |
"""Vectorised helper — returns an output list matching *lst* length."""
|
95 |
out: list[str] = ["" for _ in lst]
|
96 |
idx, prompts = [], []
|
@@ -106,6 +119,7 @@ async def _batch_async(lst: list[str], instruction: str, client: openai.AsyncOpe
|
|
106 |
out[idx[j]] = val
|
107 |
return out
|
108 |
|
|
|
109 |
# -------- Core converter -----------------------------------------------------
|
110 |
DEFAULT_PREREQ = (
|
111 |
"No specific prerequisites are required for this course. Basic computer literacy and "
|
@@ -113,11 +127,13 @@ DEFAULT_PREREQ = (
|
|
113 |
"learning experience."
|
114 |
)
|
115 |
|
|
|
116 |
def _read(path: str) -> pd.DataFrame:
|
117 |
if path.lower().endswith((".xlsx", ".xls")):
|
118 |
return pd.read_excel(path)
|
119 |
return pd.read_csv(path, encoding="latin1")
|
120 |
|
|
|
121 |
async def _enrich_dataframe(
|
122 |
df: pd.DataFrame, dcol: str, ocol: str, pcol: str, acol: str
|
123 |
) -> tuple[list[str], list[str], list[str], list[str], list[str]]:
|
@@ -146,7 +162,7 @@ async def _enrich_dataframe(
|
|
146 |
),
|
147 |
)
|
148 |
|
149 |
-
#
|
150 |
prereq_raw = df.get(pcol, "").fillna("").tolist()
|
151 |
fpre: list[str] = []
|
152 |
for req in prereq_raw:
|
@@ -162,9 +178,11 @@ async def _enrich_dataframe(
|
|
162 |
|
163 |
return sdesc, ldesc, fobj, fout, fpre
|
164 |
|
|
|
165 |
def convert(path: str) -> BytesIO:
|
166 |
logos = {
|
167 |
"Amazon Web Services": "/wp-content/uploads/2025/04/aws.png",
|
|
|
168 |
"Cisco": "/wp-content/uploads/2025/04/cisco-e1738593292198-1.webp",
|
169 |
"Microsoft": "/wp-content/uploads/2025/04/Microsoft-e1737494120985-1.png",
|
170 |
"Google Cloud": "/wp-content/uploads/2025/04/Google_Cloud.png",
|
@@ -190,7 +208,7 @@ def convert(path: str) -> BytesIO:
|
|
190 |
sid = first_col("Course SID", "Course SID")
|
191 |
|
192 |
if dur not in df.columns:
|
193 |
-
df[dur] = ""
|
194 |
|
195 |
# ---------- LLM enrichment (async) -------------------------------------
|
196 |
sdesc, ldesc, fobj, fout, fpre = asyncio.run(
|
@@ -223,7 +241,7 @@ def convert(path: str) -> BytesIO:
|
|
223 |
g["Course Start Time"], g["Course End Time"], g["Time Zone"]
|
224 |
)
|
225 |
),
|
226 |
-
include_groups=False,
|
227 |
)
|
228 |
.reset_index(name="Times")
|
229 |
)
|
@@ -358,8 +376,8 @@ def convert(path: str) -> BytesIO:
|
|
358 |
out.seek(0)
|
359 |
return out
|
360 |
|
361 |
-
# -------- Gradio wrappers ----------------------------------------------------
|
362 |
|
|
|
363 |
def process_file(upload: gr.File) -> str:
|
364 |
csv_bytes = convert(upload.name)
|
365 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp:
|
@@ -367,6 +385,7 @@ def process_file(upload: gr.File) -> str:
|
|
367 |
path = tmp.name
|
368 |
return path
|
369 |
|
|
|
370 |
ui = gr.Interface(
|
371 |
fn=process_file,
|
372 |
inputs=gr.File(
|
|
|
1 |
+
"""NetCom → WooCommerce transformer (Try 2 schema — 100-parallel, de-dupe, pandas-fix)
|
2 |
+
=====================================================================================
|
3 |
*Accept CSV **or** Excel schedule files and output the WooCommerce CSV.*
|
4 |
|
5 |
+
Latest tweak
|
6 |
+
------------
|
7 |
+
• **Logo map** now contains both `"Amazon Web Services"` *and* `"AWS"` keys
|
8 |
+
so either value in the *Vendor* column resolves to the same upload path.
|
9 |
+
(Everything else is untouched.)
|
10 |
"""
|
11 |
|
12 |
from __future__ import annotations
|
|
|
27 |
# -------- Gradio bool-schema hot-patch --------------------------------------
|
28 |
_original = gradio_client.utils._json_schema_to_python_type
|
29 |
|
30 |
+
|
31 |
def _fixed_json_schema_to_python_type(schema, defs=None): # type: ignore
|
32 |
if isinstance(schema, bool):
|
33 |
return "any"
|
34 |
return _original(schema, defs)
|
35 |
|
36 |
+
|
37 |
+
gradio_client.utils._json_schema_to_python_type = ( # type: ignore
|
38 |
+
_fixed_json_schema_to_python_type
|
39 |
+
)
|
40 |
|
41 |
# -------- Tiny disk cache ----------------------------------------------------
|
42 |
CACHE_DIR = Path("ai_response_cache")
|
43 |
CACHE_DIR.mkdir(exist_ok=True)
|
44 |
|
45 |
+
|
46 |
def _cache_path(p: str) -> Path:
|
47 |
return CACHE_DIR / f"{hashlib.md5(p.encode()).hexdigest()}.json"
|
48 |
|
49 |
+
|
50 |
def _get_cached(p: str) -> str | None:
|
51 |
try:
|
52 |
return json.loads(_cache_path(p).read_text("utf-8"))["response"]
|
53 |
except Exception:
|
54 |
return None
|
55 |
|
56 |
+
|
57 |
def _set_cache(p: str, r: str) -> None:
|
58 |
try:
|
59 |
_cache_path(p).write_text(json.dumps({"prompt": p, "response": r}), "utf-8")
|
60 |
except Exception:
|
61 |
pass
|
62 |
|
63 |
+
|
64 |
# -------- Async GPT helpers --------------------------------------------------
|
65 |
+
_SEM = asyncio.Semaphore(100) # ≤100 concurrent OpenAI calls
|
66 |
_inflight: dict[str, asyncio.Future] = {} # prompt → Future
|
67 |
|
68 |
+
|
69 |
async def _gpt_async(client: openai.AsyncOpenAI, prompt: str) -> str:
|
70 |
+
"""Single LLM call with cache, concurrency cap, and de-duplication."""
|
71 |
cached = _get_cached(prompt)
|
72 |
if cached is not None:
|
73 |
return cached
|
74 |
|
75 |
+
# de-dup identical prompts already in-flight
|
76 |
+
existing = _inflight.get(prompt)
|
77 |
+
if existing is not None:
|
78 |
+
return await existing
|
79 |
|
80 |
loop = asyncio.get_running_loop()
|
81 |
|
82 |
async def _call_api() -> str:
|
83 |
+
async with _SEM:
|
84 |
try:
|
85 |
msg = await client.chat.completions.create(
|
86 |
model="gpt-4o-mini",
|
|
|
100 |
finally:
|
101 |
_inflight.pop(prompt, None)
|
102 |
|
103 |
+
|
104 |
+
async def _batch_async(
|
105 |
+
lst: list[str], instruction: str, client: openai.AsyncOpenAI
|
106 |
+
) -> list[str]:
|
107 |
"""Vectorised helper — returns an output list matching *lst* length."""
|
108 |
out: list[str] = ["" for _ in lst]
|
109 |
idx, prompts = [], []
|
|
|
119 |
out[idx[j]] = val
|
120 |
return out
|
121 |
|
122 |
+
|
123 |
# -------- Core converter -----------------------------------------------------
|
124 |
DEFAULT_PREREQ = (
|
125 |
"No specific prerequisites are required for this course. Basic computer literacy and "
|
|
|
127 |
"learning experience."
|
128 |
)
|
129 |
|
130 |
+
|
131 |
def _read(path: str) -> pd.DataFrame:
|
132 |
if path.lower().endswith((".xlsx", ".xls")):
|
133 |
return pd.read_excel(path)
|
134 |
return pd.read_csv(path, encoding="latin1")
|
135 |
|
136 |
+
|
137 |
async def _enrich_dataframe(
|
138 |
df: pd.DataFrame, dcol: str, ocol: str, pcol: str, acol: str
|
139 |
) -> tuple[list[str], list[str], list[str], list[str], list[str]]:
|
|
|
162 |
),
|
163 |
)
|
164 |
|
165 |
+
# prerequisites
|
166 |
prereq_raw = df.get(pcol, "").fillna("").tolist()
|
167 |
fpre: list[str] = []
|
168 |
for req in prereq_raw:
|
|
|
178 |
|
179 |
return sdesc, ldesc, fobj, fout, fpre
|
180 |
|
181 |
+
|
182 |
def convert(path: str) -> BytesIO:
|
183 |
logos = {
|
184 |
"Amazon Web Services": "/wp-content/uploads/2025/04/aws.png",
|
185 |
+
"AWS": "/wp-content/uploads/2025/04/aws.png",
|
186 |
"Cisco": "/wp-content/uploads/2025/04/cisco-e1738593292198-1.webp",
|
187 |
"Microsoft": "/wp-content/uploads/2025/04/Microsoft-e1737494120985-1.png",
|
188 |
"Google Cloud": "/wp-content/uploads/2025/04/Google_Cloud.png",
|
|
|
208 |
sid = first_col("Course SID", "Course SID")
|
209 |
|
210 |
if dur not in df.columns:
|
211 |
+
df[dur] = ""
|
212 |
|
213 |
# ---------- LLM enrichment (async) -------------------------------------
|
214 |
sdesc, ldesc, fobj, fout, fpre = asyncio.run(
|
|
|
241 |
g["Course Start Time"], g["Course End Time"], g["Time Zone"]
|
242 |
)
|
243 |
),
|
244 |
+
include_groups=False,
|
245 |
)
|
246 |
.reset_index(name="Times")
|
247 |
)
|
|
|
376 |
out.seek(0)
|
377 |
return out
|
378 |
|
|
|
379 |
|
380 |
+
# -------- Gradio wrappers ----------------------------------------------------
|
381 |
def process_file(upload: gr.File) -> str:
|
382 |
csv_bytes = convert(upload.name)
|
383 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp:
|
|
|
385 |
path = tmp.name
|
386 |
return path
|
387 |
|
388 |
+
|
389 |
ui = gr.Interface(
|
390 |
fn=process_file,
|
391 |
inputs=gr.File(
|