rodrigomasini commited on
Commit
1c44592
·
verified ·
1 Parent(s): 42cdc01

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -35
app.py CHANGED
@@ -172,6 +172,56 @@ def on_select(instruction1, instruction2, instruction3, evt: gr.SelectData):
172
  #----------------#
173
  # Grammar metrics
174
  import re
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
 
176
  def pre_process_text(text):
177
  sentences_list = re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s', text)
@@ -181,47 +231,21 @@ def pre_process_text(text):
181
  split_sentences.extend(re.split(r'\n+', sentence))
182
  # Remove empty elements
183
  cleaned_sentences = [sentence for sentence in split_sentences if sentence.strip()]
184
- sentences_number = len(cleaned_sentences)
185
- return cleaned_sentences, sentences_number
186
-
187
- # Function to clean the sentences list and return words only
188
- def extract_words(sentences):
189
- words = []
190
- for sentence in sentences:
191
- # Extract words using regex, ignoring special characters
192
- words.extend(re.findall(r'\b\w+\b', sentence))
193
- return words
194
-
195
- def count_syllables(word):
196
- return len(re.findall(r'[aeiouyAEIOUY]', word))
197
 
198
  def flesch_kincaid_grade_level(text):
199
- sentences, sentences_count = pre_process_text(text)
200
- words = extract_words(sentences)
201
- syllables = sum([count_syllables(word) for word in text.split()])
202
-
203
- if sentences_count == 0 or words == 0:
204
- return float('nan') # Return NaN to indicate an error
205
- return 0.39 * (words / sentences_count) + 11.8 * (syllables / words) - 15.59
206
 
207
  def flesch_reading_ease(text):
208
- sentences, sentences_count = pre_process_text(text)
209
- words = extract_words(sentences)
210
- syllables = sum([count_syllables(word) for word in words])
211
-
212
- if sentences_count == 0 or words == 0:
213
- return float('nan') # Return NaN to indicate an error
214
- return 206.835 - 1.015 * (words / sentences_count) - 84.6 * (syllables / words)
215
 
216
  def gunning_fog_index(text):
217
- sentences, sentences_count = pre_process_text(text)
218
- words = extract_words(sentences)
219
- complex_words = len([word for word in words if count_syllables(word) >= 3])
220
-
221
- if sentences_count == 0 or words == 0:
222
- return float('nan') # Return NaN to indicate an error
223
- return 0.4 * ((words / sentences_count) + 100 * (complex_words / words))
224
-
225
  def calculate_readability_metrics(text):
226
  fk_grade_level = flesch_kincaid_grade_level(text)
227
  fk_reading_ease = flesch_reading_ease(text)
 
172
  #----------------#
173
  # Grammar metrics
174
  import re
175
+ from textstat import textstat
176
+
177
+ #def pre_process_text(text):
178
+ # sentences_list = re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s', text)
179
+ # # Split the elements of the list by newline characters
180
+ # split_sentences = []
181
+ # for sentence in sentences_list:
182
+ # split_sentences.extend(re.split(r'\n+', sentence))
183
+ # # Remove empty elements
184
+ # cleaned_sentences = [sentence for sentence in split_sentences if sentence.strip()]
185
+ # sentences_number = len(cleaned_sentences)
186
+ # return cleaned_sentences, sentences_number
187
+
188
+ # Function to clean the sentences list and return words only
189
+ #def extract_words(sentences):
190
+ # words = []
191
+ # for sentence in sentences:
192
+ # # Extract words using regex, ignoring special characters
193
+ # words.extend(re.findall(r'\b\w+\b', sentence))
194
+ # return words
195
+
196
+ #def count_syllables(word):
197
+ # return len(re.findall(r'[aeiouyAEIOUY]', word))
198
+
199
+ #def flesch_kincaid_grade_level(text):
200
+ # sentences, sentences_count = pre_process_text(text)
201
+ # words = extract_words(sentences)
202
+ # syllables = sum([count_syllables(word) for word in text.split()])
203
+ #
204
+ # if sentences_count == 0 or words == 0:
205
+ # return float('nan') # Return NaN to indicate an error
206
+ # return 0.39 * (words / sentences_count) + 11.8 * (syllables / words) - 15.59
207
+
208
+ #def flesch_reading_ease(text):
209
+ # sentences, sentences_count = pre_process_text(text)
210
+ # words = extract_words(sentences)
211
+ # syllables = sum([count_syllables(word) for word in words])
212
+ #
213
+ # if sentences_count == 0 or words == 0:
214
+ # return float('nan') # Return NaN to indicate an error
215
+ # return 206.835 - 1.015 * (words / sentences_count) - 84.6 * (syllables / words)
216
+
217
+ #def gunning_fog_index(text):
218
+ # sentences, sentences_count = pre_process_text(text)
219
+ # words = extract_words(sentences)
220
+ # complex_words = len([word for word in words if count_syllables(word) >= 3])
221
+ #
222
+ # if sentences_count == 0 or words == 0:
223
+ # return float('nan') # Return NaN to indicate an error
224
+ # return 0.4 * ((words / sentences_count) + 100 * (complex_words / words))
225
 
226
  def pre_process_text(text):
227
  sentences_list = re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s', text)
 
231
  split_sentences.extend(re.split(r'\n+', sentence))
232
  # Remove empty elements
233
  cleaned_sentences = [sentence for sentence in split_sentences if sentence.strip()]
234
+ string_sentences = (" ".join(cleaned_sentences))
235
+ return string_sentences
 
 
 
 
 
 
 
 
 
 
 
236
 
237
  def flesch_kincaid_grade_level(text):
238
+ sentences = pre_process_text(text)
239
+ return textstat.flesch_kincaid_grade(sentences)
 
 
 
 
 
240
 
241
  def flesch_reading_ease(text):
242
+ sentences = pre_process_text(text)
243
+ return textstat.flesch_reading_ease(sentences)
 
 
 
 
 
244
 
245
  def gunning_fog_index(text):
246
+ sentences = pre_process_text(text)
247
+ return textstat.gunning_fog(sentences)
248
+
 
 
 
 
 
249
  def calculate_readability_metrics(text):
250
  fk_grade_level = flesch_kincaid_grade_level(text)
251
  fk_reading_ease = flesch_reading_ease(text)