Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -224,15 +224,31 @@ from textstat import textstat
|
|
224 |
# return 0.4 * ((words / sentences_count) + 100 * (complex_words / words))
|
225 |
|
226 |
def pre_process_text(text):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
227 |
sentences_list = re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s', text)
|
|
|
228 |
# Split the elements of the list by newline characters
|
229 |
split_sentences = []
|
230 |
for sentence in sentences_list:
|
231 |
split_sentences.extend(re.split(r'\n+', sentence))
|
|
|
232 |
# Remove empty elements
|
233 |
cleaned_sentences = [sentence for sentence in split_sentences if sentence.strip()]
|
234 |
-
|
235 |
-
|
|
|
|
|
236 |
|
237 |
def flesch_kincaid_grade_level(text):
|
238 |
sentences = pre_process_text(text)
|
|
|
224 |
# return 0.4 * ((words / sentences_count) + 100 * (complex_words / words))
|
225 |
|
226 |
def pre_process_text(text):
|
227 |
+
# Normalize line breaks and whitespace
|
228 |
+
text = re.sub(r'\n\s*\n', '\n\n', text.strip())
|
229 |
+
|
230 |
+
# Split the text into sections
|
231 |
+
sections = re.split(r'\n{2,}', text)
|
232 |
+
|
233 |
+
# Remove empty strings from the split result
|
234 |
+
sections = [section.strip() for section in sections if section.strip()]
|
235 |
+
|
236 |
+
# Combine sections into a single string
|
237 |
+
combined_text = '\n\n'.join(sections)
|
238 |
+
|
239 |
sentences_list = re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s', text)
|
240 |
+
print(sentences_list)
|
241 |
# Split the elements of the list by newline characters
|
242 |
split_sentences = []
|
243 |
for sentence in sentences_list:
|
244 |
split_sentences.extend(re.split(r'\n+', sentence))
|
245 |
+
|
246 |
# Remove empty elements
|
247 |
cleaned_sentences = [sentence for sentence in split_sentences if sentence.strip()]
|
248 |
+
|
249 |
+
combined_text = (" ".join(cleaned_sentences))
|
250 |
+
|
251 |
+
return combined_text
|
252 |
|
253 |
def flesch_kincaid_grade_level(text):
|
254 |
sentences = pre_process_text(text)
|