Spaces:
Running
Running
File size: 3,175 Bytes
5798cfc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
from typing import List, Tuple
import math
def split_text_into_chunks(text: str, chunk_size: int) -> List[str]:
"""
Splits the text into chunks of a specified maximum size.
"""
# Trim the text to remove leading/trailing whitespace and reduce multiple spaces to a single space
cleaned_text = " ".join(text.split())
words = cleaned_text.split(" ")
chunks = []
current_chunk = []
current_length = 0
for word in words:
if current_length + len(word) + 1 > chunk_size:
chunks.append(" ".join(current_chunk))
current_chunk = [word]
current_length = len(word)
else:
current_chunk.append(word)
current_length += len(word) + 1
if current_chunk:
chunks.append(" ".join(current_chunk))
return chunks
def distribute_questions_across_chunks(n_chunks: int, n_questions: int) -> List[int]:
"""
Distributes a specified number of questions across a specified number of chunks.
"""
# Initial allocation of at least one question to early chunks if possible
questions_per_chunk = [1] * min(n_chunks, n_questions)
remaining_questions = n_questions - len(questions_per_chunk)
# Distribute remaining questions evenly across chunks
if remaining_questions > 0:
for i in range(len(questions_per_chunk)):
if remaining_questions == 0:
break
questions_per_chunk[i] += 1
remaining_questions -= 1
# If chunks remain, add zeros to match the total chunks.
while len(questions_per_chunk) < n_chunks:
questions_per_chunk.append(0)
return questions_per_chunk
def generate_questions_for_text(text: str, chunk_size: int, n_questions: int) -> List[Tuple[str, int]]:
"""
Splits the text into chunks, distributes questions across them, and returns a list of
(chunk, number of questions).
"""
chunks = split_text_into_chunks(text, chunk_size)
n_chunks = len(chunks)
questions_distribution = distribute_questions_across_chunks(n_chunks, n_questions)
return list(zip(chunks, questions_distribution))
# Example usage
text = (
"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Proin hendrerit urna "
"vel erat bibendum, eget condimentum ipsum interdum. Nulla facilisi. Quisque dictum "
"eros eu velit varius, eget faucibus mauris euismod. Etiam placerat nisi at urna maximus "
"viverra. Integer ut odio nec justo volutpat varius ut quis quam. Suspendisse potenti. "
"Donec vulputate quam quis metus sagittis, sed commodo justo ultricies. Nam ut velit "
"finibus, venenatis eros vel, consectetur arcu. Praesent vulputate at ligula non elementum. "
"Nulla varius condimentum justo, non placerat nisl ullamcorper eu."
)
chunk_size = 100 # Max length of each chunk in characters
n_questions = 5 # Total number of questions to be asked
result = generate_questions_for_text(text, chunk_size, n_questions)
for i, (chunk, num_questions) in enumerate(result):
print(f"Chunk {i + 1} ({len(chunk.split())} words):")
print(f"Questions: {num_questions}")
print(chunk)
print("-" * 40)
|