codys12 commited on
Commit
e570bda
·
verified ·
1 Parent(s): d9c493b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +138 -53
app.py CHANGED
@@ -5,6 +5,13 @@ import os
5
  from io import BytesIO
6
  import re
7
  import openai
 
 
 
 
 
 
 
8
 
9
  import gradio_client.utils
10
 
@@ -19,6 +26,102 @@ def _fixed_json_schema_to_python_type(schema, defs=None):
19
  gradio_client.utils._json_schema_to_python_type = _fixed_json_schema_to_python_type
20
 
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  def process_woocommerce_data_in_memory(netcom_file):
23
  """
24
  Reads the uploaded NetCom CSV file in-memory, processes it to the WooCommerce format,
@@ -45,35 +148,6 @@ def process_woocommerce_data_in_memory(netcom_file):
45
  # 1. Read the uploaded CSV into a DataFrame
46
  netcom_df = pd.read_csv(netcom_file.name, encoding='latin1')
47
  netcom_df.columns = netcom_df.columns.str.strip() # standardize column names
48
-
49
- # Initialize OpenAI client
50
- client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
51
-
52
- # Process descriptions in batches of 500
53
- def process_text_with_ai(texts, instruction):
54
- """Process text with GPT-4o-mini"""
55
- if not texts:
56
- return []
57
-
58
- results = []
59
- batch_size = 500
60
-
61
- for i in range(0, len(texts), batch_size):
62
- batch = texts[i:i+batch_size]
63
- batch_prompts = [f"{instruction}\n\nText: {text}" for text in batch]
64
-
65
- batch_results = []
66
- for prompt in batch_prompts:
67
- response = client.chat.completions.create(
68
- model="gpt-4o-mini",
69
- messages=[{"role": "user", "content": prompt}],
70
- temperature=0
71
- )
72
- batch_results.append(response.choices[0].message.content)
73
-
74
- results.extend(batch_results)
75
-
76
- return results
77
 
78
  # Prepare descriptions for AI processing
79
  descriptions = netcom_df['Decription'].fillna("").tolist()
@@ -81,43 +155,54 @@ def process_woocommerce_data_in_memory(netcom_file):
81
  prerequisites = netcom_df['RequiredPrerequisite'].fillna("").tolist()
82
  agendas = netcom_df['Outline'].fillna("").tolist()
83
 
84
- # Process with AI
85
- short_descriptions = process_text_with_ai(
86
- descriptions,
87
- "Create a concise 250-character summary of this course description:"
88
- )
89
-
90
- condensed_descriptions = process_text_with_ai(
91
- descriptions,
92
- "Condense this description to maximum 750 characters in paragraph format, with clean formatting:"
93
- )
94
 
95
- formatted_objectives = process_text_with_ai(
96
- objectives,
97
- "Format these objectives into a bullet list format with clean formatting. Start each bullet with '• ':"
98
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
 
100
- formatted_prerequisites = []
 
101
  for prereq in prerequisites:
102
  if not prereq or pd.isna(prereq) or prereq.strip() == "":
103
- formatted_prerequisites.append(default_prerequisite)
104
  else:
105
- formatted_prereq = process_text_with_ai(
 
106
  [prereq],
107
  "Format these prerequisites into a bullet list format with clean formatting. Start each bullet with '• ':"
108
- )[0]
109
- formatted_prerequisites.append(formatted_prereq)
110
 
111
- formatted_agendas = process_text_with_ai(
112
- agendas,
113
- "Format this agenda into a bullet list format with clean formatting. Start each bullet with '• ':"
114
- )
 
115
 
116
  # Add processed text to dataframe
117
  netcom_df['Short_Description'] = short_descriptions
118
  netcom_df['Condensed_Description'] = condensed_descriptions
119
  netcom_df['Formatted_Objectives'] = formatted_objectives
120
- netcom_df['Formatted_Prerequisites'] = formatted_prerequisites
121
  netcom_df['Formatted_Agenda'] = formatted_agendas
122
 
123
  # 2. Create aggregated dates and times for each Course ID
 
5
  from io import BytesIO
6
  import re
7
  import openai
8
+ import hashlib
9
+ import json
10
+ import asyncio
11
+ import aiohttp
12
+ from pathlib import Path
13
+ from concurrent.futures import ThreadPoolExecutor
14
+ from functools import lru_cache
15
 
16
  import gradio_client.utils
17
 
 
26
  gradio_client.utils._json_schema_to_python_type = _fixed_json_schema_to_python_type
27
 
28
 
29
+ # Create cache directory if it doesn't exist
30
+ CACHE_DIR = Path("ai_response_cache")
31
+ CACHE_DIR.mkdir(exist_ok=True)
32
+
33
+ def get_cache_path(prompt):
34
+ """Generate a unique cache file path based on the prompt content"""
35
+ prompt_hash = hashlib.md5(prompt.encode('utf-8')).hexdigest()
36
+ return CACHE_DIR / f"{prompt_hash}.json"
37
+
38
+ def get_cached_response(prompt):
39
+ """Try to get a cached response for the given prompt"""
40
+ cache_path = get_cache_path(prompt)
41
+ if cache_path.exists():
42
+ try:
43
+ with open(cache_path, 'r', encoding='utf-8') as f:
44
+ return json.load(f)['response']
45
+ except Exception as e:
46
+ print(f"Error reading cache: {e}")
47
+ return None
48
+
49
+ def cache_response(prompt, response):
50
+ """Cache the response for a given prompt"""
51
+ cache_path = get_cache_path(prompt)
52
+ try:
53
+ with open(cache_path, 'w', encoding='utf-8') as f:
54
+ json.dump({'prompt': prompt, 'response': response}, f)
55
+ except Exception as e:
56
+ print(f"Error writing to cache: {e}")
57
+
58
+
59
+ async def process_text_batch_async(client, batch_prompts):
60
+ """Process a batch of prompts asynchronously"""
61
+ results = []
62
+
63
+ # First check cache for each prompt
64
+ for prompt in batch_prompts:
65
+ cached = get_cached_response(prompt)
66
+ if cached:
67
+ results.append((prompt, cached))
68
+
69
+ # Filter out prompts that were found in cache
70
+ uncached_prompts = [p for p in batch_prompts if not any(p == cached_prompt for cached_prompt, _ in results)]
71
+
72
+ if uncached_prompts:
73
+ # Process uncached prompts in parallel
74
+ async def process_single_prompt(prompt):
75
+ try:
76
+ response = await client.chat.completions.create(
77
+ model="gpt-4o-mini",
78
+ messages=[{"role": "user", "content": prompt}],
79
+ temperature=0
80
+ )
81
+ result = response.choices[0].message.content
82
+ # Cache the result
83
+ cache_response(prompt, result)
84
+ return prompt, result
85
+ except Exception as e:
86
+ print(f"Error processing prompt: {e}")
87
+ return prompt, f"Error: {str(e)}"
88
+
89
+ # Create tasks for all uncached prompts
90
+ tasks = [process_single_prompt(prompt) for prompt in uncached_prompts]
91
+
92
+ # Run all tasks concurrently and wait for them to complete
93
+ uncached_results = await asyncio.gather(*tasks)
94
+
95
+ # Combine cached and newly processed results
96
+ results.extend(uncached_results)
97
+
98
+ # Sort results to match original order of batch_prompts
99
+ prompt_to_result = {prompt: result for prompt, result in results}
100
+ return [prompt_to_result[prompt] for prompt in batch_prompts]
101
+
102
+
103
+ async def process_text_with_ai_async(texts, instruction):
104
+ """Process text with GPT-4o-mini asynchronously in batches"""
105
+ if not texts:
106
+ return []
107
+
108
+ results = []
109
+ batch_size = 500
110
+
111
+ # Create OpenAI async client
112
+ client = openai.AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))
113
+
114
+ # Process in batches
115
+ for i in range(0, len(texts), batch_size):
116
+ batch = texts[i:i+batch_size]
117
+ batch_prompts = [f"{instruction}\n\nText: {text}" for text in batch]
118
+
119
+ batch_results = await process_text_batch_async(client, batch_prompts)
120
+ results.extend(batch_results)
121
+
122
+ return results
123
+
124
+
125
  def process_woocommerce_data_in_memory(netcom_file):
126
  """
127
  Reads the uploaded NetCom CSV file in-memory, processes it to the WooCommerce format,
 
148
  # 1. Read the uploaded CSV into a DataFrame
149
  netcom_df = pd.read_csv(netcom_file.name, encoding='latin1')
150
  netcom_df.columns = netcom_df.columns.str.strip() # standardize column names
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
 
152
  # Prepare descriptions for AI processing
153
  descriptions = netcom_df['Decription'].fillna("").tolist()
 
155
  prerequisites = netcom_df['RequiredPrerequisite'].fillna("").tolist()
156
  agendas = netcom_df['Outline'].fillna("").tolist()
157
 
158
+ # Process with AI asynchronously
159
+ loop = asyncio.new_event_loop()
160
+ asyncio.set_event_loop(loop)
 
 
 
 
 
 
 
161
 
162
+ # Run all processing tasks concurrently
163
+ tasks = [
164
+ process_text_with_ai_async(
165
+ descriptions,
166
+ "Create a concise 250-character summary of this course description:"
167
+ ),
168
+ process_text_with_ai_async(
169
+ descriptions,
170
+ "Condense this description to maximum 750 characters in paragraph format, with clean formatting:"
171
+ ),
172
+ process_text_with_ai_async(
173
+ objectives,
174
+ "Format these objectives into a bullet list format with clean formatting. Start each bullet with '• ':"
175
+ ),
176
+ process_text_with_ai_async(
177
+ agendas,
178
+ "Format this agenda into a bullet list format with clean formatting. Start each bullet with '• ':"
179
+ )
180
+ ]
181
 
182
+ # Process prerequisites separately to handle default case
183
+ formatted_prerequisites_task = []
184
  for prereq in prerequisites:
185
  if not prereq or pd.isna(prereq) or prereq.strip() == "":
186
+ formatted_prerequisites_task.append(default_prerequisite)
187
  else:
188
+ # For non-empty prerequisites, we'll process them with AI
189
+ prereq_result = loop.run_until_complete(process_text_with_ai_async(
190
  [prereq],
191
  "Format these prerequisites into a bullet list format with clean formatting. Start each bullet with '• ':"
192
+ ))
193
+ formatted_prerequisites_task.append(prereq_result[0])
194
 
195
+ # Run all tasks and get results
196
+ results = loop.run_until_complete(asyncio.gather(*tasks))
197
+ loop.close()
198
+
199
+ short_descriptions, condensed_descriptions, formatted_objectives, formatted_agendas = results
200
 
201
  # Add processed text to dataframe
202
  netcom_df['Short_Description'] = short_descriptions
203
  netcom_df['Condensed_Description'] = condensed_descriptions
204
  netcom_df['Formatted_Objectives'] = formatted_objectives
205
+ netcom_df['Formatted_Prerequisites'] = formatted_prerequisites_task
206
  netcom_df['Formatted_Agenda'] = formatted_agendas
207
 
208
  # 2. Create aggregated dates and times for each Course ID