Spaces:

codys12
/

NetCom-to-WooComerce

Runtime error

App Files Files Community

codys12 commited on Apr 10

Commit

e570bda

verified ·

1 Parent(s): d9c493b

Update app.py

Browse files

Files changed (1) hide show

app.py +138 -53

app.py CHANGED Viewed

@@ -5,6 +5,13 @@ import os
 from io import BytesIO
 import re
 import openai
 import gradio_client.utils
@@ -19,6 +26,102 @@ def _fixed_json_schema_to_python_type(schema, defs=None):
 gradio_client.utils._json_schema_to_python_type = _fixed_json_schema_to_python_type
 def process_woocommerce_data_in_memory(netcom_file):
     """
     Reads the uploaded NetCom CSV file in-memory, processes it to the WooCommerce format,
@@ -45,35 +148,6 @@ def process_woocommerce_data_in_memory(netcom_file):
     # 1. Read the uploaded CSV into a DataFrame
     netcom_df = pd.read_csv(netcom_file.name, encoding='latin1')
     netcom_df.columns = netcom_df.columns.str.strip()  # standardize column names
-    # Initialize OpenAI client
-    client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
-    # Process descriptions in batches of 500
-    def process_text_with_ai(texts, instruction):
-        """Process text with GPT-4o-mini"""
-        if not texts:
-            return []
-        results = []
-        batch_size = 500
-        for i in range(0, len(texts), batch_size):
-            batch = texts[i:i+batch_size]
-            batch_prompts = [f"{instruction}\n\nText: {text}" for text in batch]
-            batch_results = []
-            for prompt in batch_prompts:
-                response = client.chat.completions.create(
-                    model="gpt-4o-mini",
-                    messages=[{"role": "user", "content": prompt}],
-                    temperature=0
-                )
-                batch_results.append(response.choices[0].message.content)
-            results.extend(batch_results)
-        return results
     # Prepare descriptions for AI processing
     descriptions = netcom_df['Decription'].fillna("").tolist()
@@ -81,43 +155,54 @@ def process_woocommerce_data_in_memory(netcom_file):
     prerequisites = netcom_df['RequiredPrerequisite'].fillna("").tolist()
     agendas = netcom_df['Outline'].fillna("").tolist()
-    # Process with AI
-    short_descriptions = process_text_with_ai(
-        descriptions,
-        "Create a concise 250-character summary of this course description:"
-    )
-    condensed_descriptions = process_text_with_ai(
-        descriptions,
-        "Condense this description to maximum 750 characters in paragraph format, with clean formatting:"
-    )
-    formatted_objectives = process_text_with_ai(
-        objectives,
-        "Format these objectives into a bullet list format with clean formatting. Start each bullet with '• ':"
-    )
-    formatted_prerequisites = []
     for prereq in prerequisites:
         if not prereq or pd.isna(prereq) or prereq.strip() == "":
-            formatted_prerequisites.append(default_prerequisite)
         else:
-            formatted_prereq = process_text_with_ai(
                 [prereq],
                 "Format these prerequisites into a bullet list format with clean formatting. Start each bullet with '• ':"
-            )[0]
-            formatted_prerequisites.append(formatted_prereq)
-    formatted_agendas = process_text_with_ai(
-        agendas,
-        "Format this agenda into a bullet list format with clean formatting. Start each bullet with '• ':"
-    )
     # Add processed text to dataframe
     netcom_df['Short_Description'] = short_descriptions
     netcom_df['Condensed_Description'] = condensed_descriptions
     netcom_df['Formatted_Objectives'] = formatted_objectives
-    netcom_df['Formatted_Prerequisites'] = formatted_prerequisites
     netcom_df['Formatted_Agenda'] = formatted_agendas
     # 2. Create aggregated dates and times for each Course ID

 from io import BytesIO
 import re
 import openai
+import hashlib
+import json
+import asyncio
+import aiohttp
+from pathlib import Path
+from concurrent.futures import ThreadPoolExecutor
+from functools import lru_cache
 import gradio_client.utils
 gradio_client.utils._json_schema_to_python_type = _fixed_json_schema_to_python_type
+# Create cache directory if it doesn't exist
+CACHE_DIR = Path("ai_response_cache")
+CACHE_DIR.mkdir(exist_ok=True)
+def get_cache_path(prompt):
+    """Generate a unique cache file path based on the prompt content"""
+    prompt_hash = hashlib.md5(prompt.encode('utf-8')).hexdigest()
+    return CACHE_DIR / f"{prompt_hash}.json"
+def get_cached_response(prompt):
+    """Try to get a cached response for the given prompt"""
+    cache_path = get_cache_path(prompt)
+    if cache_path.exists():
+        try:
+            with open(cache_path, 'r', encoding='utf-8') as f:
+                return json.load(f)['response']
+        except Exception as e:
+            print(f"Error reading cache: {e}")
+    return None
+def cache_response(prompt, response):
+    """Cache the response for a given prompt"""
+    cache_path = get_cache_path(prompt)
+    try:
+        with open(cache_path, 'w', encoding='utf-8') as f:
+            json.dump({'prompt': prompt, 'response': response}, f)
+    except Exception as e:
+        print(f"Error writing to cache: {e}")
+async def process_text_batch_async(client, batch_prompts):
+    """Process a batch of prompts asynchronously"""
+    results = []
+    # First check cache for each prompt
+    for prompt in batch_prompts:
+        cached = get_cached_response(prompt)
+        if cached:
+            results.append((prompt, cached))
+    # Filter out prompts that were found in cache
+    uncached_prompts = [p for p in batch_prompts if not any(p == cached_prompt for cached_prompt, _ in results)]
+    if uncached_prompts:
+        # Process uncached prompts in parallel
+        async def process_single_prompt(prompt):
+            try:
+                response = await client.chat.completions.create(
+                    model="gpt-4o-mini",
+                    messages=[{"role": "user", "content": prompt}],
+                    temperature=0
+                )
+                result = response.choices[0].message.content
+                # Cache the result
+                cache_response(prompt, result)
+                return prompt, result
+            except Exception as e:
+                print(f"Error processing prompt: {e}")
+                return prompt, f"Error: {str(e)}"
+        # Create tasks for all uncached prompts
+        tasks = [process_single_prompt(prompt) for prompt in uncached_prompts]
+        # Run all tasks concurrently and wait for them to complete
+        uncached_results = await asyncio.gather(*tasks)
+        # Combine cached and newly processed results
+        results.extend(uncached_results)
+    # Sort results to match original order of batch_prompts
+    prompt_to_result = {prompt: result for prompt, result in results}
+    return [prompt_to_result[prompt] for prompt in batch_prompts]
+async def process_text_with_ai_async(texts, instruction):
+    """Process text with GPT-4o-mini asynchronously in batches"""
+    if not texts:
+        return []
+    results = []
+    batch_size = 500
+    # Create OpenAI async client
+    client = openai.AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+    # Process in batches
+    for i in range(0, len(texts), batch_size):
+        batch = texts[i:i+batch_size]
+        batch_prompts = [f"{instruction}\n\nText: {text}" for text in batch]
+        batch_results = await process_text_batch_async(client, batch_prompts)
+        results.extend(batch_results)
+    return results
 def process_woocommerce_data_in_memory(netcom_file):
     """
     Reads the uploaded NetCom CSV file in-memory, processes it to the WooCommerce format,
     # 1. Read the uploaded CSV into a DataFrame
     netcom_df = pd.read_csv(netcom_file.name, encoding='latin1')
     netcom_df.columns = netcom_df.columns.str.strip()  # standardize column names
     # Prepare descriptions for AI processing
     descriptions = netcom_df['Decription'].fillna("").tolist()
     prerequisites = netcom_df['RequiredPrerequisite'].fillna("").tolist()
     agendas = netcom_df['Outline'].fillna("").tolist()
+    # Process with AI asynchronously
+    loop = asyncio.new_event_loop()
+    asyncio.set_event_loop(loop)
+    # Run all processing tasks concurrently
+    tasks = [
+        process_text_with_ai_async(
+            descriptions,
+            "Create a concise 250-character summary of this course description:"
+        ),
+        process_text_with_ai_async(
+            descriptions,
+            "Condense this description to maximum 750 characters in paragraph format, with clean formatting:"
+        ),
+        process_text_with_ai_async(
+            objectives,
+            "Format these objectives into a bullet list format with clean formatting. Start each bullet with '• ':"
+        ),
+        process_text_with_ai_async(
+            agendas,
+            "Format this agenda into a bullet list format with clean formatting. Start each bullet with '• ':"
+        )
+    ]
+    # Process prerequisites separately to handle default case
+    formatted_prerequisites_task = []
     for prereq in prerequisites:
         if not prereq or pd.isna(prereq) or prereq.strip() == "":
+            formatted_prerequisites_task.append(default_prerequisite)
         else:
+            # For non-empty prerequisites, we'll process them with AI
+            prereq_result = loop.run_until_complete(process_text_with_ai_async(
                 [prereq],
                 "Format these prerequisites into a bullet list format with clean formatting. Start each bullet with '• ':"
+            ))
+            formatted_prerequisites_task.append(prereq_result[0])
+    # Run all tasks and get results
+    results = loop.run_until_complete(asyncio.gather(*tasks))
+    loop.close()
+    short_descriptions, condensed_descriptions, formatted_objectives, formatted_agendas = results
     # Add processed text to dataframe
     netcom_df['Short_Description'] = short_descriptions
     netcom_df['Condensed_Description'] = condensed_descriptions
     netcom_df['Formatted_Objectives'] = formatted_objectives
+    netcom_df['Formatted_Prerequisites'] = formatted_prerequisites_task
     netcom_df['Formatted_Agenda'] = formatted_agendas
     # 2. Create aggregated dates and times for each Course ID