Spaces:
Sleeping
Sleeping
Update quiz_processing.py
Browse files- quiz_processing.py +6 -12
quiz_processing.py
CHANGED
@@ -31,7 +31,7 @@ def clean_text(text):
|
|
31 |
|
32 |
def split_text_by_tokens(text, max_tokens=12000):
|
33 |
text = clean_text(text)
|
34 |
-
tokens =
|
35 |
|
36 |
if len(tokens) <= max_tokens:
|
37 |
return [text]
|
@@ -45,7 +45,7 @@ def split_text_by_tokens(text, max_tokens=12000):
|
|
45 |
|
46 |
current_tokens = 0
|
47 |
for sentence in sentences:
|
48 |
-
sentence_tokens = len(
|
49 |
|
50 |
if current_tokens + sentence_tokens <= split_point:
|
51 |
first_half.append(sentence)
|
@@ -79,11 +79,6 @@ class TextSegmentAnalysis(BaseModel):
|
|
79 |
course_info: CourseInfo = Field(description="Information about the course")
|
80 |
segments: List[Segment] = Field(description="List of text segments with analysis")
|
81 |
|
82 |
-
|
83 |
-
# System prompt
|
84 |
-
system_prompt = """You are an expert educational content analyzer. Your task is to analyze text content,
|
85 |
-
identify distinct segments, and create high-quality educational quiz questions for each segment."""
|
86 |
-
|
87 |
def clean_text(text):
|
88 |
text = re.sub(r'\[speaker_\d+\]', '', text)
|
89 |
text = re.sub(r'\s+', ' ', text).strip()
|
@@ -91,7 +86,7 @@ def clean_text(text):
|
|
91 |
|
92 |
def split_text_by_tokens(text, max_tokens=8000):
|
93 |
text = clean_text(text)
|
94 |
-
tokens =
|
95 |
|
96 |
if len(tokens) <= max_tokens:
|
97 |
return [text]
|
@@ -105,7 +100,7 @@ def split_text_by_tokens(text, max_tokens=8000):
|
|
105 |
|
106 |
current_tokens = 0
|
107 |
for sentence in sentences:
|
108 |
-
sentence_tokens = len(
|
109 |
|
110 |
if current_tokens + sentence_tokens <= split_point:
|
111 |
first_half.append(sentence)
|
@@ -147,7 +142,7 @@ def generate_with_claude(text, api_key, course_name="", section_name="", lesson_
|
|
147 |
model=CLAUDE_MODEL,
|
148 |
max_tokens=8192,
|
149 |
temperature=DEFAULT_TEMPERATURE,
|
150 |
-
system=
|
151 |
messages=[
|
152 |
{
|
153 |
"role": "user",
|
@@ -292,9 +287,8 @@ def analyze_document(text, gemini_api_key, claude_api_key, course_name, section_
|
|
292 |
|
293 |
end_time = time.time()
|
294 |
total_time = end_time - start_time
|
295 |
-
print(f"Total quiz processing time: {total_time}s")
|
296 |
|
297 |
-
# Format the results for display
|
298 |
formatted_text = format_quiz_for_display(all_results)
|
299 |
# formatted_text = f"Total processing time: {total_time:.2f} seconds\n\n" + formatted_text
|
300 |
|
|
|
31 |
|
32 |
def split_text_by_tokens(text, max_tokens=12000):
|
33 |
text = clean_text(text)
|
34 |
+
tokens = TOKENIZER_MODEL.encode(text)
|
35 |
|
36 |
if len(tokens) <= max_tokens:
|
37 |
return [text]
|
|
|
45 |
|
46 |
current_tokens = 0
|
47 |
for sentence in sentences:
|
48 |
+
sentence_tokens = len(TOKENIZER_MODEL.encode(sentence))
|
49 |
|
50 |
if current_tokens + sentence_tokens <= split_point:
|
51 |
first_half.append(sentence)
|
|
|
79 |
course_info: CourseInfo = Field(description="Information about the course")
|
80 |
segments: List[Segment] = Field(description="List of text segments with analysis")
|
81 |
|
|
|
|
|
|
|
|
|
|
|
82 |
def clean_text(text):
|
83 |
text = re.sub(r'\[speaker_\d+\]', '', text)
|
84 |
text = re.sub(r'\s+', ' ', text).strip()
|
|
|
86 |
|
87 |
def split_text_by_tokens(text, max_tokens=8000):
|
88 |
text = clean_text(text)
|
89 |
+
tokens = TOKENIZER_MODEL.encode(text)
|
90 |
|
91 |
if len(tokens) <= max_tokens:
|
92 |
return [text]
|
|
|
100 |
|
101 |
current_tokens = 0
|
102 |
for sentence in sentences:
|
103 |
+
sentence_tokens = len(TOKENIZER_MODEL.encode(sentence))
|
104 |
|
105 |
if current_tokens + sentence_tokens <= split_point:
|
106 |
first_half.append(sentence)
|
|
|
142 |
model=CLAUDE_MODEL,
|
143 |
max_tokens=8192,
|
144 |
temperature=DEFAULT_TEMPERATURE,
|
145 |
+
system=SYSTEM_PROMPT,
|
146 |
messages=[
|
147 |
{
|
148 |
"role": "user",
|
|
|
287 |
|
288 |
end_time = time.time()
|
289 |
total_time = end_time - start_time
|
290 |
+
print(f"Total quiz processing time: {total_time:.2f}s")
|
291 |
|
|
|
292 |
formatted_text = format_quiz_for_display(all_results)
|
293 |
# formatted_text = f"Total processing time: {total_time:.2f} seconds\n\n" + formatted_text
|
294 |
|