Spaces:

MrSimple01
/

2_full_demo_webinarium

Sleeping

App Files Files Community

MrSimple01 commited on Apr 9

Commit

187f669

verified ·

1 Parent(s): bec109d

Update quiz_processing.py

Browse files

Files changed (1) hide show

quiz_processing.py +6 -12

quiz_processing.py CHANGED Viewed

@@ -31,7 +31,7 @@ def clean_text(text):
 def split_text_by_tokens(text, max_tokens=12000):
     text = clean_text(text)
-    tokens = tokenizer.encode(text)
     if len(tokens) <= max_tokens:
         return [text]
@@ -45,7 +45,7 @@ def split_text_by_tokens(text, max_tokens=12000):
     current_tokens = 0
     for sentence in sentences:
-        sentence_tokens = len(tokenizer.encode(sentence))
         if current_tokens + sentence_tokens <= split_point:
             first_half.append(sentence)
@@ -79,11 +79,6 @@ class TextSegmentAnalysis(BaseModel):
     course_info: CourseInfo = Field(description="Information about the course")
     segments: List[Segment] = Field(description="List of text segments with analysis")
-# System prompt
-system_prompt = """You are an expert educational content analyzer. Your task is to analyze text content,
-identify distinct segments, and create high-quality educational quiz questions for each segment."""
 def clean_text(text):
     text = re.sub(r'\[speaker_\d+\]', '', text)
     text = re.sub(r'\s+', ' ', text).strip()
@@ -91,7 +86,7 @@ def clean_text(text):
 def split_text_by_tokens(text, max_tokens=8000):
     text = clean_text(text)
-    tokens = tokenizer.encode(text)
     if len(tokens) <= max_tokens:
         return [text]
@@ -105,7 +100,7 @@ def split_text_by_tokens(text, max_tokens=8000):
     current_tokens = 0
     for sentence in sentences:
-        sentence_tokens = len(tokenizer.encode(sentence))
         if current_tokens + sentence_tokens <= split_point:
             first_half.append(sentence)
@@ -147,7 +142,7 @@ def generate_with_claude(text, api_key, course_name="", section_name="", lesson_
             model=CLAUDE_MODEL,
             max_tokens=8192,
             temperature=DEFAULT_TEMPERATURE,
-            system=system_prompt,
             messages=[
                 {
                     "role": "user",
@@ -292,9 +287,8 @@ def analyze_document(text, gemini_api_key, claude_api_key, course_name, section_
         end_time = time.time()
         total_time = end_time - start_time
-        print(f"Total quiz processing time: {total_time}s")
-        # Format the results for display
         formatted_text = format_quiz_for_display(all_results)
         # formatted_text = f"Total processing time: {total_time:.2f} seconds\n\n" + formatted_text

 def split_text_by_tokens(text, max_tokens=12000):
     text = clean_text(text)
+    tokens = TOKENIZER_MODEL.encode(text)
     if len(tokens) <= max_tokens:
         return [text]
     current_tokens = 0
     for sentence in sentences:
+        sentence_tokens = len(TOKENIZER_MODEL.encode(sentence))
         if current_tokens + sentence_tokens <= split_point:
             first_half.append(sentence)
     course_info: CourseInfo = Field(description="Information about the course")
     segments: List[Segment] = Field(description="List of text segments with analysis")
 def clean_text(text):
     text = re.sub(r'\[speaker_\d+\]', '', text)
     text = re.sub(r'\s+', ' ', text).strip()
 def split_text_by_tokens(text, max_tokens=8000):
     text = clean_text(text)
+    tokens = TOKENIZER_MODEL.encode(text)
     if len(tokens) <= max_tokens:
         return [text]
     current_tokens = 0
     for sentence in sentences:
+        sentence_tokens = len(TOKENIZER_MODEL.encode(sentence))
         if current_tokens + sentence_tokens <= split_point:
             first_half.append(sentence)
             model=CLAUDE_MODEL,
             max_tokens=8192,
             temperature=DEFAULT_TEMPERATURE,
+            system=SYSTEM_PROMPT,
             messages=[
                 {
                     "role": "user",
         end_time = time.time()
         total_time = end_time - start_time
+        print(f"Total quiz processing time: {total_time:.2f}s")
         formatted_text = format_quiz_for_display(all_results)
         # formatted_text = f"Total processing time: {total_time:.2f} seconds\n\n" + formatted_text