import gradio as gr import pandas as pd import tempfile import os from io import BytesIO import re import openai def process_woocommerce_data_in_memory(netcom_file): """ Reads the uploaded NetCom CSV file in-memory, processes it to the WooCommerce format, and returns the resulting CSV as bytes, suitable for download. """ # Define the brand-to-logo mapping with updated URLs brand_logo_map = { "Amazon Web Services": "/wp-content/uploads/2025/04/aws.png", "Cisco": "/wp-content/uploads/2025/04/cisco-e1738593292198-1.webp", "Microsoft": "/wp-content/uploads/2025/04/Microsoft-e1737494120985-1.png", "Google Cloud": "/wp-content/uploads/2025/04/Google_Cloud.png", "EC Council": "/wp-content/uploads/2025/04/Ec_Council.png", "ITIL": "/wp-content/uploads/2025/04/ITIL.webp", "PMI": "/wp-content/uploads/2025/04/PMI.png", "Comptia": "/wp-content/uploads/2025/04/Comptia.png", "Autodesk": "/wp-content/uploads/2025/04/autodesk.png", "ISC2": "/wp-content/uploads/2025/04/ISC2.png", "AICerts": "/wp-content/uploads/2025/04/aicerts-logo-1.png" } # Default prerequisite text for courses without prerequisites default_prerequisite = "No specific prerequisites are required for this course. Basic computer literacy and familiarity with fundamental concepts in the subject area are recommended for the best learning experience." # 1. Read the uploaded CSV into a DataFrame netcom_df = pd.read_csv(netcom_file.name, encoding='latin1') netcom_df.columns = netcom_df.columns.str.strip() # standardize column names # Initialize OpenAI client client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY")) # Process descriptions in batches of 500 def process_text_with_ai(texts, instruction): """Process text with GPT-4o-mini""" if not texts: return [] results = [] batch_size = 500 for i in range(0, len(texts), batch_size): batch = texts[i:i+batch_size] batch_prompts = [f"{instruction}\n\nText: {text}" for text in batch] batch_results = [] for prompt in batch_prompts: response = client.chat.completions.create( model="gpt-4o-mini", messages=[{"role": "user", "content": prompt}], temperature=0 ) batch_results.append(response.choices[0].message.content) results.extend(batch_results) return results # Prepare descriptions for AI processing descriptions = netcom_df['Decription'].fillna("").tolist() objectives = netcom_df['Objectives'].fillna("").tolist() prerequisites = netcom_df['RequiredPrerequisite'].fillna("").tolist() agendas = netcom_df['Outline'].fillna("").tolist() # Process with AI short_descriptions = process_text_with_ai( descriptions, "Create a concise 250-character summary of this course description:" ) condensed_descriptions = process_text_with_ai( descriptions, "Condense this description to maximum 750 characters in paragraph format, with clean formatting:" ) formatted_objectives = process_text_with_ai( objectives, "Format these objectives into a bullet list format with clean formatting. Start each bullet with '• ':" ) formatted_prerequisites = [] for prereq in prerequisites: if not prereq or pd.isna(prereq) or prereq.strip() == "": formatted_prerequisites.append(default_prerequisite) else: formatted_prereq = process_text_with_ai( [prereq], "Format these prerequisites into a bullet list format with clean formatting. Start each bullet with '• ':" )[0] formatted_prerequisites.append(formatted_prereq) formatted_agendas = process_text_with_ai( agendas, "Format this agenda into a bullet list format with clean formatting. Start each bullet with '• ':" ) # Add processed text to dataframe netcom_df['Short_Description'] = short_descriptions netcom_df['Condensed_Description'] = condensed_descriptions netcom_df['Formatted_Objectives'] = formatted_objectives netcom_df['Formatted_Prerequisites'] = formatted_prerequisites netcom_df['Formatted_Agenda'] = formatted_agendas # 2. Create aggregated dates and times for each Course ID # Sort by Course ID and date first netcom_df = netcom_df.sort_values(['Course ID', 'Course Start Date']) date_agg = ( netcom_df.groupby('Course ID')['Course Start Date'] .apply(lambda x: ','.join(x.astype(str).unique())) .reset_index(name='Aggregated_Dates') ) time_agg = ( netcom_df.groupby('Course ID') .apply( lambda df: ','.join( f"{st}-{et} {tz}" for st, et, tz in zip(df['Course Start Time'], df['Course End Time'], df['Time Zone']) ) ) .reset_index(name='Aggregated_Times') ) # 3. Extract unique parent products parent_products = netcom_df.drop_duplicates(subset=['Course ID']) # 4. Merge aggregated dates and times parent_products = parent_products.merge(date_agg, on='Course ID', how='left') parent_products = parent_products.merge(time_agg, on='Course ID', how='left') # 5. Create parent (variable) products woo_parent_df = pd.DataFrame({ 'Type': 'variable', 'SKU': parent_products['Course ID'], 'Name': parent_products['Course Name'], 'Published': 1, 'Visibility in catalog': 'visible', 'Short description': parent_products['Short_Description'], 'Description': parent_products['Condensed_Description'], 'Tax status': 'taxable', 'In stock?': 1, 'Regular price': parent_products['SRP Pricing'].replace('[\$,]', '', regex=True), 'Categories': 'courses', 'Images': parent_products['Vendor'].map(brand_logo_map).fillna(''), 'Parent': '', 'Brands': parent_products['Vendor'], 'Attribute 1 name': 'Date', 'Attribute 1 value(s)': parent_products['Aggregated_Dates'], 'Attribute 1 visible': 'visible', 'Attribute 1 global': 1, 'Attribute 2 name': 'Location', 'Attribute 2 value(s)': 'Virtual', 'Attribute 2 visible': 'visible', 'Attribute 2 global': 1, 'Attribute 3 name': 'Time', 'Attribute 3 value(s)': parent_products['Aggregated_Times'], 'Attribute 3 visible': 'visible', 'Attribute 3 global': 1, 'Meta: outline': parent_products['Formatted_Agenda'], 'Meta: days': parent_products['Duration'], 'Meta: location': 'Virtual', 'Meta: overview': parent_products['Target Audience'], 'Meta: objectives': parent_products['Formatted_Objectives'], 'Meta: prerequisites': parent_products['Formatted_Prerequisites'], 'Meta: agenda': parent_products['Formatted_Agenda'] }) # 6. Create child (variation) products woo_child_df = pd.DataFrame({ 'Type': 'variation, virtual', 'SKU': netcom_df['Course SID'], 'Name': netcom_df['Course Name'], 'Published': 1, 'Visibility in catalog': 'visible', 'Short description': netcom_df['Short_Description'], 'Description': netcom_df['Condensed_Description'], 'Tax status': 'taxable', 'In stock?': 1, 'Regular price': netcom_df['SRP Pricing'].replace('[\$,]', '', regex=True), 'Categories': 'courses', 'Images': netcom_df['Vendor'].map(brand_logo_map).fillna(''), 'Parent': netcom_df['Course ID'], 'Brands': netcom_df['Vendor'], 'Attribute 1 name': 'Date', 'Attribute 1 value(s)': netcom_df['Course Start Date'], 'Attribute 1 visible': 'visible', 'Attribute 1 global': 1, 'Attribute 2 name': 'Location', 'Attribute 2 value(s)': 'Virtual', 'Attribute 2 visible': 'visible', 'Attribute 2 global': 1, 'Attribute 3 name': 'Time', 'Attribute 3 value(s)': netcom_df.apply( lambda row: f"{row['Course Start Time']}-{row['Course End Time']} {row['Time Zone']}", axis=1 ), 'Attribute 3 visible': 'visible', 'Attribute 3 global': 1, 'Meta: outline': netcom_df['Formatted_Agenda'], 'Meta: days': netcom_df['Duration'], 'Meta: location': 'Virtual', 'Meta: overview': netcom_df['Target Audience'], 'Meta: objectives': netcom_df['Formatted_Objectives'], 'Meta: prerequisites': netcom_df['Formatted_Prerequisites'], 'Meta: agenda': netcom_df['Formatted_Agenda'] }) # 7. Combine parent + child woo_final_df = pd.concat([woo_parent_df, woo_child_df], ignore_index=True) # 8. Desired column order (removed Stock and Sold individually?) column_order = [ 'Type', 'SKU', 'Name', 'Published', 'Visibility in catalog', 'Short description', 'Description', 'Tax status', 'In stock?', 'Regular price', 'Categories', 'Images', 'Parent', 'Brands', 'Attribute 1 name', 'Attribute 1 value(s)', 'Attribute 1 visible', 'Attribute 1 global', 'Attribute 2 name', 'Attribute 2 value(s)', 'Attribute 2 visible', 'Attribute 2 global', 'Attribute 3 name', 'Attribute 3 value(s)', 'Attribute 3 visible', 'Attribute 3 global', 'Meta: outline', 'Meta: days', 'Meta: location', 'Meta: overview', 'Meta: objectives', 'Meta: prerequisites', 'Meta: agenda' ] woo_final_df = woo_final_df[column_order] # 9. Convert to CSV (in memory) output_buffer = BytesIO() woo_final_df.to_csv(output_buffer, index=False, encoding='utf-8-sig') output_buffer.seek(0) return output_buffer def process_woocommerce_data_in_memory(netcom_file): """ Reads the uploaded NetCom CSV file in-memory, processes it to the WooCommerce format, and returns the resulting CSV as bytes, suitable for download. """ # [Keep all your existing processing code exactly the same until the end] # 9. Convert to CSV (in memory) output_buffer = BytesIO() woo_final_df.to_csv(output_buffer, index=False, encoding='utf-8-sig') output_buffer.seek(0) return output_buffer def process_file(uploaded_file): """ Takes the uploaded file, processes it, and returns the CSV as a file-like object """ processed_csv_io = process_woocommerce_data_in_memory(uploaded_file) return processed_csv_io interface = gr.Interface( fn=process_file, inputs=gr.File(label="Upload NetCom CSV", file_types=[".csv"]), outputs=gr.File(label="Download WooCommerce CSV"), title="NetCom to WooCommerce CSV Processor", description="Upload your NetCom Reseller Schedule CSV to generate the WooCommerce import-ready CSV." ) if __name__ == "__main__": openai_api_key = os.getenv("OPENAI_API_KEY") if not openai_api_key: print("Warning: OPENAI_API_KEY environment variable not set") interface.launch()