Spaces:
Runtime error
Runtime error
import gradio as gr | |
import pandas as pd | |
import tempfile | |
import os | |
from io import BytesIO | |
import re | |
import openai | |
def process_woocommerce_data_in_memory(netcom_file): | |
""" | |
Reads the uploaded NetCom CSV file in-memory, processes it to the WooCommerce format, | |
and returns the resulting CSV as bytes, suitable for download. | |
""" | |
# Define the brand-to-logo mapping with updated URLs | |
brand_logo_map = { | |
"Amazon Web Services": "/wp-content/uploads/2025/04/aws.png", | |
"Cisco": "/wp-content/uploads/2025/04/cisco-e1738593292198-1.webp", | |
"Microsoft": "/wp-content/uploads/2025/04/Microsoft-e1737494120985-1.png", | |
"Google Cloud": "/wp-content/uploads/2025/04/Google_Cloud.png", | |
"EC Council": "/wp-content/uploads/2025/04/Ec_Council.png", | |
"ITIL": "/wp-content/uploads/2025/04/ITIL.webp", | |
"PMI": "/wp-content/uploads/2025/04/PMI.png", | |
"Comptia": "/wp-content/uploads/2025/04/Comptia.png", | |
"Autodesk": "/wp-content/uploads/2025/04/autodesk.png", | |
"ISC2": "/wp-content/uploads/2025/04/ISC2.png", | |
"AICerts": "/wp-content/uploads/2025/04/aicerts-logo-1.png" | |
} | |
# Default prerequisite text for courses without prerequisites | |
default_prerequisite = "No specific prerequisites are required for this course. Basic computer literacy and familiarity with fundamental concepts in the subject area are recommended for the best learning experience." | |
# 1. Read the uploaded CSV into a DataFrame | |
netcom_df = pd.read_csv(netcom_file.name, encoding='latin1') | |
netcom_df.columns = netcom_df.columns.str.strip() # standardize column names | |
# Initialize OpenAI client | |
client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY")) | |
# Process descriptions in batches of 500 | |
def process_text_with_ai(texts, instruction): | |
"""Process text with GPT-4o-mini""" | |
if not texts: | |
return [] | |
results = [] | |
batch_size = 500 | |
for i in range(0, len(texts), batch_size): | |
batch = texts[i:i+batch_size] | |
batch_prompts = [f"{instruction}\n\nText: {text}" for text in batch] | |
batch_results = [] | |
for prompt in batch_prompts: | |
response = client.chat.completions.create( | |
model="gpt-4o-mini", | |
messages=[{"role": "user", "content": prompt}], | |
temperature=0 | |
) | |
batch_results.append(response.choices[0].message.content) | |
results.extend(batch_results) | |
return results | |
# Prepare descriptions for AI processing | |
descriptions = netcom_df['Decription'].fillna("").tolist() | |
objectives = netcom_df['Objectives'].fillna("").tolist() | |
prerequisites = netcom_df['RequiredPrerequisite'].fillna("").tolist() | |
agendas = netcom_df['Outline'].fillna("").tolist() | |
# Process with AI | |
short_descriptions = process_text_with_ai( | |
descriptions, | |
"Create a concise 250-character summary of this course description:" | |
) | |
condensed_descriptions = process_text_with_ai( | |
descriptions, | |
"Condense this description to maximum 750 characters in paragraph format, with clean formatting:" | |
) | |
formatted_objectives = process_text_with_ai( | |
objectives, | |
"Format these objectives into a bullet list format with clean formatting. Start each bullet with '• ':" | |
) | |
formatted_prerequisites = [] | |
for prereq in prerequisites: | |
if not prereq or pd.isna(prereq) or prereq.strip() == "": | |
formatted_prerequisites.append(default_prerequisite) | |
else: | |
formatted_prereq = process_text_with_ai( | |
[prereq], | |
"Format these prerequisites into a bullet list format with clean formatting. Start each bullet with '• ':" | |
)[0] | |
formatted_prerequisites.append(formatted_prereq) | |
formatted_agendas = process_text_with_ai( | |
agendas, | |
"Format this agenda into a bullet list format with clean formatting. Start each bullet with '• ':" | |
) | |
# Add processed text to dataframe | |
netcom_df['Short_Description'] = short_descriptions | |
netcom_df['Condensed_Description'] = condensed_descriptions | |
netcom_df['Formatted_Objectives'] = formatted_objectives | |
netcom_df['Formatted_Prerequisites'] = formatted_prerequisites | |
netcom_df['Formatted_Agenda'] = formatted_agendas | |
# 2. Create aggregated dates and times for each Course ID | |
# Sort by Course ID and date first | |
netcom_df = netcom_df.sort_values(['Course ID', 'Course Start Date']) | |
date_agg = ( | |
netcom_df.groupby('Course ID')['Course Start Date'] | |
.apply(lambda x: ','.join(x.astype(str).unique())) | |
.reset_index(name='Aggregated_Dates') | |
) | |
time_agg = ( | |
netcom_df.groupby('Course ID') | |
.apply( | |
lambda df: ','.join( | |
f"{st}-{et} {tz}" | |
for st, et, tz in zip(df['Course Start Time'], | |
df['Course End Time'], | |
df['Time Zone']) | |
) | |
) | |
.reset_index(name='Aggregated_Times') | |
) | |
# 3. Extract unique parent products | |
parent_products = netcom_df.drop_duplicates(subset=['Course ID']) | |
# 4. Merge aggregated dates and times | |
parent_products = parent_products.merge(date_agg, on='Course ID', how='left') | |
parent_products = parent_products.merge(time_agg, on='Course ID', how='left') | |
# 5. Create parent (variable) products | |
woo_parent_df = pd.DataFrame({ | |
'Type': 'variable', | |
'SKU': parent_products['Course ID'], | |
'Name': parent_products['Course Name'], | |
'Published': 1, | |
'Visibility in catalog': 'visible', | |
'Short description': parent_products['Short_Description'], | |
'Description': parent_products['Condensed_Description'], | |
'Tax status': 'taxable', | |
'In stock?': 1, | |
'Regular price': parent_products['SRP Pricing'].replace('[\$,]', '', regex=True), | |
'Categories': 'courses', | |
'Images': parent_products['Vendor'].map(brand_logo_map).fillna(''), | |
'Parent': '', | |
'Brands': parent_products['Vendor'], | |
'Attribute 1 name': 'Date', | |
'Attribute 1 value(s)': parent_products['Aggregated_Dates'], | |
'Attribute 1 visible': 'visible', | |
'Attribute 1 global': 1, | |
'Attribute 2 name': 'Location', | |
'Attribute 2 value(s)': 'Virtual', | |
'Attribute 2 visible': 'visible', | |
'Attribute 2 global': 1, | |
'Attribute 3 name': 'Time', | |
'Attribute 3 value(s)': parent_products['Aggregated_Times'], | |
'Attribute 3 visible': 'visible', | |
'Attribute 3 global': 1, | |
'Meta: outline': parent_products['Formatted_Agenda'], | |
'Meta: days': parent_products['Duration'], | |
'Meta: location': 'Virtual', | |
'Meta: overview': parent_products['Target Audience'], | |
'Meta: objectives': parent_products['Formatted_Objectives'], | |
'Meta: prerequisites': parent_products['Formatted_Prerequisites'], | |
'Meta: agenda': parent_products['Formatted_Agenda'] | |
}) | |
# 6. Create child (variation) products | |
woo_child_df = pd.DataFrame({ | |
'Type': 'variation, virtual', | |
'SKU': netcom_df['Course SID'], | |
'Name': netcom_df['Course Name'], | |
'Published': 1, | |
'Visibility in catalog': 'visible', | |
'Short description': netcom_df['Short_Description'], | |
'Description': netcom_df['Condensed_Description'], | |
'Tax status': 'taxable', | |
'In stock?': 1, | |
'Regular price': netcom_df['SRP Pricing'].replace('[\$,]', '', regex=True), | |
'Categories': 'courses', | |
'Images': netcom_df['Vendor'].map(brand_logo_map).fillna(''), | |
'Parent': netcom_df['Course ID'], | |
'Brands': netcom_df['Vendor'], | |
'Attribute 1 name': 'Date', | |
'Attribute 1 value(s)': netcom_df['Course Start Date'], | |
'Attribute 1 visible': 'visible', | |
'Attribute 1 global': 1, | |
'Attribute 2 name': 'Location', | |
'Attribute 2 value(s)': 'Virtual', | |
'Attribute 2 visible': 'visible', | |
'Attribute 2 global': 1, | |
'Attribute 3 name': 'Time', | |
'Attribute 3 value(s)': netcom_df.apply( | |
lambda row: f"{row['Course Start Time']}-{row['Course End Time']} {row['Time Zone']}", axis=1 | |
), | |
'Attribute 3 visible': 'visible', | |
'Attribute 3 global': 1, | |
'Meta: outline': netcom_df['Formatted_Agenda'], | |
'Meta: days': netcom_df['Duration'], | |
'Meta: location': 'Virtual', | |
'Meta: overview': netcom_df['Target Audience'], | |
'Meta: objectives': netcom_df['Formatted_Objectives'], | |
'Meta: prerequisites': netcom_df['Formatted_Prerequisites'], | |
'Meta: agenda': netcom_df['Formatted_Agenda'] | |
}) | |
# 7. Combine parent + child | |
woo_final_df = pd.concat([woo_parent_df, woo_child_df], ignore_index=True) | |
# 8. Desired column order (removed Stock and Sold individually?) | |
column_order = [ | |
'Type', 'SKU', 'Name', 'Published', 'Visibility in catalog', | |
'Short description', 'Description', 'Tax status', 'In stock?', | |
'Regular price', 'Categories', 'Images', | |
'Parent', 'Brands', 'Attribute 1 name', 'Attribute 1 value(s)', 'Attribute 1 visible', | |
'Attribute 1 global', 'Attribute 2 name', 'Attribute 2 value(s)', 'Attribute 2 visible', | |
'Attribute 2 global', 'Attribute 3 name', 'Attribute 3 value(s)', 'Attribute 3 visible', | |
'Attribute 3 global', 'Meta: outline', 'Meta: days', 'Meta: location', 'Meta: overview', | |
'Meta: objectives', 'Meta: prerequisites', 'Meta: agenda' | |
] | |
woo_final_df = woo_final_df[column_order] | |
# 9. Convert to CSV (in memory) | |
output_buffer = BytesIO() | |
woo_final_df.to_csv(output_buffer, index=False, encoding='utf-8-sig') | |
output_buffer.seek(0) | |
return output_buffer | |
def process_woocommerce_data_in_memory(netcom_file): | |
""" | |
Reads the uploaded NetCom CSV file in-memory, processes it to the WooCommerce format, | |
and returns the resulting CSV as bytes, suitable for download. | |
""" | |
# [Keep all your existing processing code exactly the same until the end] | |
# 9. Convert to CSV (in memory) | |
output_buffer = BytesIO() | |
woo_final_df.to_csv(output_buffer, index=False, encoding='utf-8-sig') | |
output_buffer.seek(0) | |
return output_buffer | |
def process_file(uploaded_file): | |
""" | |
Takes the uploaded file, processes it, and returns the CSV as a file-like object | |
""" | |
processed_csv_io = process_woocommerce_data_in_memory(uploaded_file) | |
return processed_csv_io | |
interface = gr.Interface( | |
fn=process_file, | |
inputs=gr.File(label="Upload NetCom CSV", file_types=[".csv"]), | |
outputs=gr.File(label="Download WooCommerce CSV"), | |
title="NetCom to WooCommerce CSV Processor", | |
description="Upload your NetCom Reseller Schedule CSV to generate the WooCommerce import-ready CSV." | |
) | |
if __name__ == "__main__": | |
openai_api_key = os.getenv("OPENAI_API_KEY") | |
if not openai_api_key: | |
print("Warning: OPENAI_API_KEY environment variable not set") | |
interface.launch() | |