MrSimple01 commited on
Commit
187f669
·
verified ·
1 Parent(s): bec109d

Update quiz_processing.py

Browse files
Files changed (1) hide show
  1. quiz_processing.py +6 -12
quiz_processing.py CHANGED
@@ -31,7 +31,7 @@ def clean_text(text):
31
 
32
  def split_text_by_tokens(text, max_tokens=12000):
33
  text = clean_text(text)
34
- tokens = tokenizer.encode(text)
35
 
36
  if len(tokens) <= max_tokens:
37
  return [text]
@@ -45,7 +45,7 @@ def split_text_by_tokens(text, max_tokens=12000):
45
 
46
  current_tokens = 0
47
  for sentence in sentences:
48
- sentence_tokens = len(tokenizer.encode(sentence))
49
 
50
  if current_tokens + sentence_tokens <= split_point:
51
  first_half.append(sentence)
@@ -79,11 +79,6 @@ class TextSegmentAnalysis(BaseModel):
79
  course_info: CourseInfo = Field(description="Information about the course")
80
  segments: List[Segment] = Field(description="List of text segments with analysis")
81
 
82
-
83
- # System prompt
84
- system_prompt = """You are an expert educational content analyzer. Your task is to analyze text content,
85
- identify distinct segments, and create high-quality educational quiz questions for each segment."""
86
-
87
  def clean_text(text):
88
  text = re.sub(r'\[speaker_\d+\]', '', text)
89
  text = re.sub(r'\s+', ' ', text).strip()
@@ -91,7 +86,7 @@ def clean_text(text):
91
 
92
  def split_text_by_tokens(text, max_tokens=8000):
93
  text = clean_text(text)
94
- tokens = tokenizer.encode(text)
95
 
96
  if len(tokens) <= max_tokens:
97
  return [text]
@@ -105,7 +100,7 @@ def split_text_by_tokens(text, max_tokens=8000):
105
 
106
  current_tokens = 0
107
  for sentence in sentences:
108
- sentence_tokens = len(tokenizer.encode(sentence))
109
 
110
  if current_tokens + sentence_tokens <= split_point:
111
  first_half.append(sentence)
@@ -147,7 +142,7 @@ def generate_with_claude(text, api_key, course_name="", section_name="", lesson_
147
  model=CLAUDE_MODEL,
148
  max_tokens=8192,
149
  temperature=DEFAULT_TEMPERATURE,
150
- system=system_prompt,
151
  messages=[
152
  {
153
  "role": "user",
@@ -292,9 +287,8 @@ def analyze_document(text, gemini_api_key, claude_api_key, course_name, section_
292
 
293
  end_time = time.time()
294
  total_time = end_time - start_time
295
- print(f"Total quiz processing time: {total_time}s")
296
 
297
- # Format the results for display
298
  formatted_text = format_quiz_for_display(all_results)
299
  # formatted_text = f"Total processing time: {total_time:.2f} seconds\n\n" + formatted_text
300
 
 
31
 
32
  def split_text_by_tokens(text, max_tokens=12000):
33
  text = clean_text(text)
34
+ tokens = TOKENIZER_MODEL.encode(text)
35
 
36
  if len(tokens) <= max_tokens:
37
  return [text]
 
45
 
46
  current_tokens = 0
47
  for sentence in sentences:
48
+ sentence_tokens = len(TOKENIZER_MODEL.encode(sentence))
49
 
50
  if current_tokens + sentence_tokens <= split_point:
51
  first_half.append(sentence)
 
79
  course_info: CourseInfo = Field(description="Information about the course")
80
  segments: List[Segment] = Field(description="List of text segments with analysis")
81
 
 
 
 
 
 
82
  def clean_text(text):
83
  text = re.sub(r'\[speaker_\d+\]', '', text)
84
  text = re.sub(r'\s+', ' ', text).strip()
 
86
 
87
  def split_text_by_tokens(text, max_tokens=8000):
88
  text = clean_text(text)
89
+ tokens = TOKENIZER_MODEL.encode(text)
90
 
91
  if len(tokens) <= max_tokens:
92
  return [text]
 
100
 
101
  current_tokens = 0
102
  for sentence in sentences:
103
+ sentence_tokens = len(TOKENIZER_MODEL.encode(sentence))
104
 
105
  if current_tokens + sentence_tokens <= split_point:
106
  first_half.append(sentence)
 
142
  model=CLAUDE_MODEL,
143
  max_tokens=8192,
144
  temperature=DEFAULT_TEMPERATURE,
145
+ system=SYSTEM_PROMPT,
146
  messages=[
147
  {
148
  "role": "user",
 
287
 
288
  end_time = time.time()
289
  total_time = end_time - start_time
290
+ print(f"Total quiz processing time: {total_time:.2f}s")
291
 
 
292
  formatted_text = format_quiz_for_display(all_results)
293
  # formatted_text = f"Total processing time: {total_time:.2f} seconds\n\n" + formatted_text
294