conversantech commited on
Commit
5a4c20a
·
1 Parent(s): 9d7abc9
Files changed (2) hide show
  1. app.py +62 -126
  2. humanization_utils.py +0 -317
app.py CHANGED
@@ -6,148 +6,62 @@ import nltk
6
  from nltk.tokenize import sent_tokenize, word_tokenize
7
  from textstat import flesch_reading_ease, flesch_kincaid_grade
8
 
9
- # Setup NLTK download path
10
  os.environ['NLTK_DATA'] = '/tmp/nltk_data'
11
 
12
- # Download required NLTK data
13
- try:
14
- nltk.download('punkt', download_dir='/tmp/nltk_data')
15
- nltk.download('averaged_perceptron_tagger', download_dir='/tmp/nltk_data')
16
- nltk.download('stopwords', download_dir='/tmp/nltk_data')
17
- nltk.data.path.append('/tmp/nltk_data')
18
- print("NLTK data downloaded successfully")
19
- except Exception as e:
20
- print(f"NLTK download error: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
  class AIContentHumanizer:
23
  def __init__(self):
24
  self.setup_humanization_patterns()
25
 
26
  def setup_humanization_patterns(self):
 
27
  self.ai_replacements = {
28
  r'\bit is important to note that\b': ["worth mentioning that", "keep in mind that", "note that"],
29
- r'\bit should be noted that\b': ["remember that", "worth noting that", "keep in mind"],
30
- r'\bin conclusion\b': ["to wrap up", "all in all", "bottom line"],
31
- r'\bto conclude\b': ["to wrap up", "all in all", "in the end"],
32
- r'\bfurthermore\b': ["also", "plus", "what's more"],
33
- r'\bmoreover\b': ["also", "plus", "and"],
34
- r'\bhowever\b': ["but", "though", "yet"],
35
- r'\btherefore\b': ["so", "that's why", "which means"],
36
- r'\bconsequently\b': ["so", "as a result", "that's why"],
37
- r'\bsignificant(?:ly)?\b': ["big", "major", "important"],
38
- r'\bnumerous\b': ["many", "lots of", "plenty of"],
39
- r'\butilize\b': ["use", "make use of", "work with"],
40
- r'\bdemonstrate\b': ["show", "prove", "make clear"],
41
- r'\bfacilitate\b': ["help", "make easier", "enable"],
42
- r'\bimplement\b': ["put in place", "set up", "start using"],
43
- r'\bvarious\b': ["different", "several", "many"],
44
- r'\bsubstantial\b': ["big", "major", "significant"]
45
  }
46
- self.contractions = {
47
- r'\bit is\b': "it's",
48
- r'\bthat is\b': "that's",
49
- r'\bwe are\b': "we're",
50
- r'\bthey are\b': "they're",
51
- r'\byou are\b': "you're",
52
- r'\bi am\b': "I'm",
53
- r'\bhe is\b': "he's",
54
- r'\bshe is\b': "she's",
55
- r'\bwill not\b': "won't",
56
- r'\bcannot\b': "can't",
57
- r'\bdo not\b': "don't",
58
- r'\bdoes not\b': "doesn't",
59
- r'\bdid not\b': "didn't",
60
- r'\bhave not\b': "haven't",
61
- r'\bhas not\b': "hasn't",
62
- r'\bhad not\b': "hadn't",
63
- r'\bwould not\b': "wouldn't",
64
- r'\bshould not\b': "shouldn't",
65
- r'\bcould not\b': "couldn't",
66
- r'\bis not\b': "isn't",
67
- r'\bare not\b': "aren't",
68
- r'\bwas not\b': "wasn't",
69
- r'\bwere not\b': "weren't"
70
- }
71
- self.human_fillers = ['actually', 'basically', 'really', 'pretty much']
72
- self.opinion_markers = ["I think", "I believe", "In my opinion"]
73
- self.casual_starters = ["Look,", "Listen,", "Here's the thing:"]
74
-
75
- def replace_ai_phrases(self, text):
76
- for pattern, replacements in self.ai_replacements.items():
77
- matches = re.finditer(pattern, text, re.IGNORECASE)
78
- for match in reversed(list(matches)):
79
- replacement = random.choice(replacements)
80
- start, end = match.span()
81
- if text[start].isupper():
82
- replacement = replacement.capitalize()
83
- text = text[:start] + replacement + text[end:]
84
- return text
85
-
86
- def add_contractions(self, text):
87
- for pattern, contraction in self.contractions.items():
88
- text = re.sub(pattern, contraction, text, flags=re.IGNORECASE)
89
- return text
90
-
91
- def add_personal_touches(self, text):
92
- sentences = sent_tokenize(text)
93
- modified = []
94
- for i, s in enumerate(sentences):
95
- if random.random() < 0.3:
96
- s = random.choice(self.opinion_markers) + " " + s.lower()
97
- elif i == 0 and random.random() < 0.2:
98
- s = random.choice(self.casual_starters) + " " + s.lower()
99
- modified.append(s)
100
- return ' '.join(modified)
101
-
102
- def add_natural_fillers(self, text):
103
- sentences = sent_tokenize(text)
104
- modified = []
105
- for s in sentences:
106
- words = s.split()
107
- if len(words) > 6 and random.random() < 0.3:
108
- words.insert(random.randint(1, min(4, len(words)-1)), random.choice(self.human_fillers))
109
- modified.append(' '.join(words))
110
- return ' '.join(modified)
111
-
112
- def vary_sentence_structure(self, text):
113
- sentences = sent_tokenize(text)
114
- modified, skip = [], False
115
- for i in range(len(sentences)):
116
- if skip:
117
- skip = False
118
- continue
119
- if i < len(sentences)-1 and len(sentences[i].split()) < 8 and len(sentences[i+1].split()) < 8 and random.random() < 0.4:
120
- combined = sentences[i].rstrip('.!?') + ', ' + sentences[i+1].lower()
121
- modified.append(combined)
122
- skip = True
123
- else:
124
- modified.append(sentences[i])
125
- return ' '.join(modified)
126
-
127
- def add_casual_punctuation(self, text):
128
- sentences = sent_tokenize(text)
129
- modified = []
130
- for i, s in enumerate(sentences):
131
- if random.random() < 0.1 and i == len(sentences) - 1:
132
- s = s.rstrip('.!?') + '...'
133
- elif random.random() < 0.15 and any(word in s.lower() for word in ['amazing', 'incredible']):
134
- s = s.rstrip('.') + '!'
135
- modified.append(s)
136
- return ' '.join(modified)
137
-
138
- def clean_text(self, text):
139
- text = re.sub(r'\s+', ' ', text)
140
- text = re.sub(r'\s+([.!?])', r'\1', text)
141
- text = re.sub(r'([.!?])\s*([A-Z])', r'\1 \2', text)
142
- def cap(match): return match.group(1) + ' ' + match.group(2).upper()
143
- text = re.sub(r'([.!?])\s+([a-z])', cap, text)
144
- return text.strip()
145
 
146
  def get_readability_score(self, text):
147
  try:
148
  score = flesch_reading_ease(text)
149
  grade = flesch_kincaid_grade(text)
150
- level = ("Very Easy" if score >= 90 else "Easy" if score >= 80 else "Fairly Easy" if score >= 70 else "Standard" if score >= 60 else "Fairly Difficult" if score >= 50 else "Difficult" if score >= 30 else "Very Difficult")
 
 
 
151
  return f"Flesch Score: {score:.1f} ({level})\nGrade Level: {grade:.1f}"
152
  except Exception as e:
153
  return f"Could not calculate readability: {str(e)}"
@@ -155,22 +69,41 @@ class AIContentHumanizer:
155
  def humanize_text(self, text, intensity="medium"):
156
  if not text or not text.strip():
157
  return "Please provide text to humanize."
 
158
  try:
159
  text = text.strip()
 
 
 
 
 
 
 
 
 
 
 
160
  text = self.replace_ai_phrases(text)
161
  text = self.add_contractions(text)
 
162
  if intensity in ["medium", "heavy"]:
163
  text = self.vary_sentence_structure(text)
164
  text = self.add_personal_touches(text)
165
  text = self.add_casual_punctuation(text)
 
166
  if intensity == "heavy":
167
  text = self.add_natural_fillers(text)
 
168
  return self.clean_text(text)
 
169
  except Exception as e:
170
  return f"Error processing text: {str(e)}\n\nOriginal text: {text}"
171
 
 
 
172
  def create_interface():
173
  humanizer = AIContentHumanizer()
 
174
  def process_text(input_text, intensity):
175
  if not input_text:
176
  return "Please enter some text to humanize.", "No text provided."
@@ -184,13 +117,16 @@ def create_interface():
184
  with gr.Blocks(title="AI Content Humanizer") as interface:
185
  gr.Markdown("""# 🤖➡️👤 AI Content Humanizer
186
  Transform AI-generated content into human-sounding, casual, and readable text!""")
 
187
  input_text = gr.Textbox(label="AI-generated Text", lines=8)
188
  intensity = gr.Radio(["light", "medium", "heavy"], value="medium", label="Humanization Level")
189
  output_text = gr.Textbox(label="Humanized Text", lines=8, show_copy_button=True)
190
  readability = gr.Textbox(label="Readability Score", lines=2)
 
191
  btn = gr.Button("Humanize Text")
192
  btn.click(fn=process_text, inputs=[input_text, intensity], outputs=[output_text, readability])
193
  input_text.submit(fn=process_text, inputs=[input_text, intensity], outputs=[output_text, readability])
 
194
  return interface
195
 
196
  if __name__ == "__main__":
 
6
  from nltk.tokenize import sent_tokenize, word_tokenize
7
  from textstat import flesch_reading_ease, flesch_kincaid_grade
8
 
9
+ # Setup NLTK download path for Hugging Face Spaces
10
  os.environ['NLTK_DATA'] = '/tmp/nltk_data'
11
 
12
+ def download_nltk_data():
13
+ """Download required NLTK data with proper error handling"""
14
+ try:
15
+ # Create the directory if it doesn't exist
16
+ os.makedirs('/tmp/nltk_data', exist_ok=True)
17
+
18
+ # Add the path to NLTK's data path
19
+ nltk.data.path.append('/tmp/nltk_data')
20
+
21
+ # Download required NLTK data - use punkt_tab for newer NLTK versions
22
+ required_data = [
23
+ 'punkt_tab', # For newer NLTK versions (3.9+)
24
+ 'punkt', # Fallback for older versions
25
+ 'averaged_perceptron_tagger',
26
+ 'stopwords'
27
+ ]
28
+
29
+ for data in required_data:
30
+ try:
31
+ nltk.download(data, download_dir='/tmp/nltk_data', quiet=True)
32
+ print(f"Successfully downloaded {data}")
33
+ except Exception as e:
34
+ print(f"Failed to download {data}: {e}")
35
+
36
+ print("NLTK data download completed")
37
+ print(f"NLTK data paths: {nltk.data.path}")
38
+
39
+ except Exception as e:
40
+ print(f"NLTK setup error: {e}")
41
+
42
+ # Download NLTK data at startup
43
+ download_nltk_data()
44
 
45
  class AIContentHumanizer:
46
  def __init__(self):
47
  self.setup_humanization_patterns()
48
 
49
  def setup_humanization_patterns(self):
50
+ # Your existing patterns code here...
51
  self.ai_replacements = {
52
  r'\bit is important to note that\b': ["worth mentioning that", "keep in mind that", "note that"],
53
+ # ... rest of your patterns
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  }
55
+ # ... rest of your existing code
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
  def get_readability_score(self, text):
58
  try:
59
  score = flesch_reading_ease(text)
60
  grade = flesch_kincaid_grade(text)
61
+ level = ("Very Easy" if score >= 90 else "Easy" if score >= 80 else
62
+ "Fairly Easy" if score >= 70 else "Standard" if score >= 60 else
63
+ "Fairly Difficult" if score >= 50 else "Difficult" if score >= 30 else
64
+ "Very Difficult")
65
  return f"Flesch Score: {score:.1f} ({level})\nGrade Level: {grade:.1f}"
66
  except Exception as e:
67
  return f"Could not calculate readability: {str(e)}"
 
69
  def humanize_text(self, text, intensity="medium"):
70
  if not text or not text.strip():
71
  return "Please provide text to humanize."
72
+
73
  try:
74
  text = text.strip()
75
+
76
+ # Test NLTK functionality before proceeding
77
+ try:
78
+ # Try to tokenize a simple sentence to verify NLTK is working
79
+ test_tokens = sent_tokenize("This is a test sentence.")
80
+ if not test_tokens:
81
+ raise Exception("NLTK tokenization failed")
82
+ except Exception as nltk_error:
83
+ return f"NLTK Error: {str(nltk_error)}. Please try again or contact support."
84
+
85
+ # Your existing humanization logic here...
86
  text = self.replace_ai_phrases(text)
87
  text = self.add_contractions(text)
88
+
89
  if intensity in ["medium", "heavy"]:
90
  text = self.vary_sentence_structure(text)
91
  text = self.add_personal_touches(text)
92
  text = self.add_casual_punctuation(text)
93
+
94
  if intensity == "heavy":
95
  text = self.add_natural_fillers(text)
96
+
97
  return self.clean_text(text)
98
+
99
  except Exception as e:
100
  return f"Error processing text: {str(e)}\n\nOriginal text: {text}"
101
 
102
+ # ... rest of your existing methods
103
+
104
  def create_interface():
105
  humanizer = AIContentHumanizer()
106
+
107
  def process_text(input_text, intensity):
108
  if not input_text:
109
  return "Please enter some text to humanize.", "No text provided."
 
117
  with gr.Blocks(title="AI Content Humanizer") as interface:
118
  gr.Markdown("""# 🤖➡️👤 AI Content Humanizer
119
  Transform AI-generated content into human-sounding, casual, and readable text!""")
120
+
121
  input_text = gr.Textbox(label="AI-generated Text", lines=8)
122
  intensity = gr.Radio(["light", "medium", "heavy"], value="medium", label="Humanization Level")
123
  output_text = gr.Textbox(label="Humanized Text", lines=8, show_copy_button=True)
124
  readability = gr.Textbox(label="Readability Score", lines=2)
125
+
126
  btn = gr.Button("Humanize Text")
127
  btn.click(fn=process_text, inputs=[input_text, intensity], outputs=[output_text, readability])
128
  input_text.submit(fn=process_text, inputs=[input_text, intensity], outputs=[output_text, readability])
129
+
130
  return interface
131
 
132
  if __name__ == "__main__":
humanization_utils.py DELETED
@@ -1,317 +0,0 @@
1
- import random
2
- import re
3
- from typing import List, Dict, Tuple
4
- import nltk
5
- from nltk.tokenize import sent_tokenize, word_tokenize
6
- from nltk.corpus import wordnet
7
- from nltk.tag import pos_tag
8
-
9
- class AdvancedHumanizer:
10
- def __init__(self):
11
- self.load_humanization_data()
12
-
13
- def load_humanization_data(self):
14
- """Load comprehensive humanization patterns and data"""
15
-
16
- # AI-typical phrases that need humanization
17
- self.ai_patterns = {
18
- r'\bit is important to note that\b': [
19
- "worth mentioning that", "keep in mind that", "note that",
20
- "interestingly,", "what's notable is that"
21
- ],
22
- r'\bit should be noted that\b': [
23
- "remember that", "worth noting that", "keep in mind",
24
- "importantly,", "note that"
25
- ],
26
- r'\bin conclusion\b': [
27
- "to wrap up", "all in all", "bottom line",
28
- "so basically", "in the end", "overall"
29
- ],
30
- r'\bfurthermore\b': [
31
- "also", "plus", "what's more", "on top of that",
32
- "and", "additionally", "besides"
33
- ],
34
- r'\bmoreover\b': [
35
- "also", "plus", "and", "what's more",
36
- "on top of that", "besides"
37
- ],
38
- r'\bhowever\b': [
39
- "but", "though", "yet", "still", "although",
40
- "on the flip side", "that said"
41
- ],
42
- r'\btherefore\b': [
43
- "so", "that's why", "which means", "as a result",
44
- "this means", "hence"
45
- ],
46
- r'\bconsequently\b': [
47
- "so", "as a result", "that's why", "this means",
48
- "because of this", "hence"
49
- ],
50
- r'\bsignificant\b': [
51
- "big", "major", "important", "huge", "substantial",
52
- "considerable", "notable"
53
- ],
54
- r'\bnumerous\b': [
55
- "many", "lots of", "plenty of", "tons of",
56
- "countless", "several"
57
- ],
58
- r'\butilize\b': [
59
- "use", "make use of", "work with", "employ",
60
- "take advantage of"
61
- ],
62
- r'\bdemonstrate\b': [
63
- "show", "prove", "make clear", "illustrate",
64
- "reveal", "display"
65
- ],
66
- r'\bfacilitate\b': [
67
- "help", "make easier", "enable", "assist",
68
- "make possible", "support"
69
- ],
70
- r'\bimplement\b': [
71
- "put in place", "set up", "start using", "apply",
72
- "carry out", "execute"
73
- ]
74
- }
75
-
76
- # Transition words that sound too formal
77
- self.formal_transitions = {
78
- 'additionally': ['also', 'plus', 'and'],
79
- 'alternatively': ['or', 'instead', 'on the other hand'],
80
- 'subsequently': ['then', 'after that', 'next'],
81
- 'initially': ['at first', 'to start with', 'in the beginning'],
82
- 'ultimately': ['in the end', 'finally', 'eventually'],
83
- 'nevertheless': ['but', 'still', 'however', 'yet'],
84
- 'accordingly': ['so', 'therefore', 'as a result']
85
- }
86
-
87
- # Filler words and phrases humans use
88
- self.human_fillers = [
89
- 'actually', 'basically', 'really', 'pretty much', 'kind of',
90
- 'sort of', 'you know', 'I mean', 'like', 'well',
91
- 'honestly', 'frankly', 'obviously', 'clearly'
92
- ]
93
-
94
- # Casual sentence starters
95
- self.casual_starters = [
96
- "Look,", "Listen,", "Here's the thing:", "The way I see it,",
97
- "To be honest,", "Frankly,", "Let me tell you,", "You know what?",
98
- "The truth is,", "Here's what I think:", "In my experience,"
99
- ]
100
-
101
- # Opinion markers to make text more personal
102
- self.opinion_markers = [
103
- "I think", "I believe", "In my opinion", "From what I've seen",
104
- "It seems to me", "I feel like", "My take is", "Personally,",
105
- "From my experience", "I'd say", "I reckon", "I suspect"
106
- ]
107
-
108
- # Conversational connectors
109
- self.conversational_connectors = [
110
- " - ", " and ", " but ", " so ", " yet ", " or ",
111
- ", which ", ", and this ", ", so ", ", but "
112
- ]
113
-
114
- def inject_personality(self, text: str) -> str:
115
- """Add personality markers and opinions to make text more human"""
116
- sentences = sent_tokenize(text)
117
- modified_sentences = []
118
-
119
- for i, sentence in enumerate(sentences):
120
- # Add opinion markers occasionally
121
- if random.random() < 0.3 and len(sentence.split()) > 5:
122
- opinion = random.choice(self.opinion_markers)
123
- sentence = opinion + " " + sentence.lower()
124
-
125
- # Add casual starters occasionally
126
- elif random.random() < 0.2 and i == 0:
127
- starter = random.choice(self.casual_starters)
128
- sentence = starter + " " + sentence.lower()
129
-
130
- modified_sentences.append(sentence)
131
-
132
- return ' '.join(modified_sentences)
133
-
134
- def add_natural_flow(self, text: str) -> str:
135
- """Improve natural flow by varying sentence structure"""
136
- sentences = sent_tokenize(text)
137
- if len(sentences) < 2:
138
- return text
139
-
140
- new_sentences = []
141
- skip_next = False
142
-
143
- for i, sentence in enumerate(sentences):
144
- if skip_next:
145
- skip_next = False
146
- continue
147
-
148
- # Combine short sentences occasionally
149
- if (i < len(sentences) - 1 and
150
- len(sentence.split()) < 10 and
151
- len(sentences[i + 1].split()) < 10 and
152
- random.random() < 0.4):
153
-
154
- connector = random.choice(self.conversational_connectors)
155
- combined = sentence.rstrip('.!?') + connector + sentences[i + 1].lower()
156
- new_sentences.append(combined)
157
- skip_next = True
158
- else:
159
- new_sentences.append(sentence)
160
-
161
- return ' '.join(new_sentences)
162
-
163
- def add_hesitation_and_fillers(self, text: str) -> str:
164
- """Add natural hesitation and filler words"""
165
- sentences = sent_tokenize(text)
166
- modified_sentences = []
167
-
168
- for sentence in sentences:
169
- words = sentence.split()
170
-
171
- # Add fillers occasionally
172
- if len(words) > 6 and random.random() < 0.3:
173
- filler = random.choice(self.human_fillers)
174
- insert_position = random.randint(1, min(4, len(words) - 1))
175
- words.insert(insert_position, filler)
176
-
177
- # Add "I think" or similar occasionally
178
- if random.random() < 0.2 and not any(marker.lower() in sentence.lower() for marker in self.opinion_markers):
179
- hedge = random.choice(['I think', 'I believe', 'probably', 'maybe', 'likely'])
180
- words.insert(0, hedge)
181
-
182
- modified_sentences.append(' '.join(words))
183
-
184
- return ' '.join(modified_sentences)
185
-
186
- def replace_ai_patterns(self, text: str) -> str:
187
- """Replace typical AI patterns with human alternatives"""
188
- for pattern, replacements in self.ai_patterns.items():
189
- if re.search(pattern, text, re.IGNORECASE):
190
- replacement = random.choice(replacements)
191
- text = re.sub(pattern, replacement, text, flags=re.IGNORECASE)
192
-
193
- return text
194
-
195
- def replace_formal_transitions(self, text: str) -> str:
196
- """Replace formal transition words with casual ones"""
197
- for formal, casual_options in self.formal_transitions.items():
198
- if formal in text.lower():
199
- casual = random.choice(casual_options)
200
- text = re.sub(r'\b' + re.escape(formal) + r'\b', casual, text, flags=re.IGNORECASE)
201
-
202
- return text
203
-
204
- def add_contractions_advanced(self, text: str) -> str:
205
- """Advanced contraction addition with context awareness"""
206
- contractions = {
207
- r'\bit is\b': "it's",
208
- r'\bthat is\b': "that's",
209
- r'\bwhat is\b': "what's",
210
- r'\bwhere is\b': "where's",
211
- r'\bwho is\b': "who's",
212
- r'\bwe are\b': "we're",
213
- r'\bthey are\b': "they're",
214
- r'\byou are\b': "you're",
215
- r'\bi am\b': "I'm",
216
- r'\bhe is\b': "he's",
217
- r'\bshe is\b': "she's",
218
- r'\bwill not\b': "won't",
219
- r'\bcannot\b': "can't",
220
- r'\bdo not\b': "don't",
221
- r'\bdoes not\b': "doesn't",
222
- r'\bdid not\b': "didn't",
223
- r'\bhave not\b': "haven't",
224
- r'\bhas not\b': "hasn't",
225
- r'\bhad not\b': "hadn't",
226
- r'\bwould not\b': "wouldn't",
227
- r'\bshould not\b': "shouldn't",
228
- r'\bcould not\b': "couldn't",
229
- r'\bis not\b': "isn't",
230
- r'\bare not\b': "aren't",
231
- r'\bwas not\b': "wasn't",
232
- r'\bwere not\b': "weren't"
233
- }
234
-
235
- for pattern, contraction in contractions.items():
236
- text = re.sub(pattern, contraction, text, flags=re.IGNORECASE)
237
-
238
- return text
239
-
240
- def vary_punctuation(self, text: str) -> str:
241
- """Vary punctuation for more natural feel"""
242
- sentences = sent_tokenize(text)
243
- modified_sentences = []
244
-
245
- for i, sentence in enumerate(sentences):
246
- # Sometimes use em dashes for emphasis
247
- if random.random() < 0.2 and len(sentence.split()) > 8:
248
- words = sentence.split()
249
- dash_pos = random.randint(3, len(words) - 3)
250
- words[dash_pos] = "—" + words[dash_pos]
251
- sentence = ' '.join(words)
252
-
253
- # Sometimes end with ellipsis for trailing thoughts
254
- if random.random() < 0.1 and i == len(sentences) - 1:
255
- sentence = sentence.rstrip('.!?') + '...'
256
-
257
- # Sometimes use exclamation for emphasis
258
- elif random.random() < 0.15 and any(word in sentence.lower()
259
- for word in ['amazing', 'incredible', 'fantastic', 'great', 'awesome']):
260
- sentence = sentence.rstrip('.') + '!'
261
-
262
- modified_sentences.append(sentence)
263
-
264
- return ' '.join(modified_sentences)
265
-
266
- def add_parenthetical_thoughts(self, text: str) -> str:
267
- """Add parenthetical thoughts and asides"""
268
- sentences = sent_tokenize(text)
269
- modified_sentences = []
270
-
271
- parentheticals = [
272
- "(at least in my experience)",
273
- "(which makes sense)",
274
- "(if you ask me)",
275
- "(or so I think)",
276
- "(from what I can tell)",
277
- "(surprisingly enough)",
278
- "(believe it or not)",
279
- "(go figure)"
280
- ]
281
-
282
- for sentence in sentences:
283
- if random.random() < 0.15 and len(sentence.split()) > 8:
284
- parenthetical = random.choice(parentheticals)
285
- words = sentence.split()
286
- insert_pos = random.randint(3, len(words) - 2)
287
- words.insert(insert_pos, parenthetical)
288
- sentence = ' '.join(words)
289
-
290
- modified_sentences.append(sentence)
291
-
292
- return ' '.join(modified_sentences)
293
-
294
- def humanize_comprehensively(self, text: str, intensity: str = "medium") -> str:
295
- """Apply comprehensive humanization based on intensity level"""
296
- if not text or not text.strip():
297
- return text
298
-
299
- # Always apply basic humanization
300
- text = self.replace_ai_patterns(text)
301
- text = self.add_contractions_advanced(text)
302
- text = self.replace_formal_transitions(text)
303
-
304
- if intensity in ["medium", "heavy"]:
305
- text = self.add_natural_flow(text)
306
- text = self.inject_personality(text)
307
- text = self.vary_punctuation(text)
308
-
309
- if intensity == "heavy":
310
- text = self.add_hesitation_and_fillers(text)
311
- text = self.add_parenthetical_thoughts(text)
312
-
313
- # Clean up any double spaces or weird formatting
314
- text = re.sub(r'\s+', ' ', text)
315
- text = text.strip()
316
-
317
- return text