Spaces:

rodrigomasini
/

recurrentGPT

Sleeping

rodrigomasini commited on May 24, 2024

Commit

21f2eeb

verified ·

1 Parent(s): 4044da4

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -224,15 +224,31 @@ from textstat import textstat
 #    return 0.4 * ((words / sentences_count) + 100 * (complex_words / words))
 def pre_process_text(text):
     sentences_list = re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s', text)
     # Split the elements of the list by newline characters
     split_sentences = []
     for sentence in sentences_list:
         split_sentences.extend(re.split(r'\n+', sentence))
     # Remove empty elements
     cleaned_sentences = [sentence for sentence in split_sentences if sentence.strip()]
-    string_sentences = (" ".join(cleaned_sentences))
-    return string_sentences
 def flesch_kincaid_grade_level(text):
     sentences = pre_process_text(text)

 #    return 0.4 * ((words / sentences_count) + 100 * (complex_words / words))
 def pre_process_text(text):
+    # Normalize line breaks and whitespace
+    text = re.sub(r'\n\s*\n', '\n\n', text.strip())
+    # Split the text into sections
+    sections = re.split(r'\n{2,}', text)
+    # Remove empty strings from the split result
+    sections = [section.strip() for section in sections if section.strip()]
+    # Combine sections into a single string
+    combined_text = '\n\n'.join(sections)
     sentences_list = re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s', text)
+    print(sentences_list)
     # Split the elements of the list by newline characters
     split_sentences = []
     for sentence in sentences_list:
         split_sentences.extend(re.split(r'\n+', sentence))
     # Remove empty elements
     cleaned_sentences = [sentence for sentence in split_sentences if sentence.strip()]
+    combined_text = (" ".join(cleaned_sentences))
+    return combined_text
 def flesch_kincaid_grade_level(text):
     sentences = pre_process_text(text)