conversantech commited on
Commit
2c89e89
·
1 Parent(s): 7826103
Files changed (1) hide show
  1. app.py +142 -22
app.py CHANGED
@@ -12,19 +12,10 @@ os.environ['NLTK_DATA'] = '/tmp/nltk_data'
12
  def download_nltk_data():
13
  """Download required NLTK data with proper error handling"""
14
  try:
15
- # Create the directory if it doesn't exist
16
  os.makedirs('/tmp/nltk_data', exist_ok=True)
17
-
18
- # Add the path to NLTK's data path
19
  nltk.data.path.append('/tmp/nltk_data')
20
 
21
- # Download required NLTK data - use punkt_tab for newer NLTK versions
22
- required_data = [
23
- 'punkt_tab', # For newer NLTK versions (3.9+)
24
- 'punkt', # Fallback for older versions
25
- 'averaged_perceptron_tagger',
26
- 'stopwords'
27
- ]
28
 
29
  for data in required_data:
30
  try:
@@ -34,12 +25,10 @@ def download_nltk_data():
34
  print(f"Failed to download {data}: {e}")
35
 
36
  print("NLTK data download completed")
37
- print(f"NLTK data paths: {nltk.data.path}")
38
 
39
  except Exception as e:
40
  print(f"NLTK setup error: {e}")
41
 
42
- # Download NLTK data at startup
43
  download_nltk_data()
44
 
45
  class AIContentHumanizer:
@@ -47,14 +36,146 @@ class AIContentHumanizer:
47
  self.setup_humanization_patterns()
48
 
49
  def setup_humanization_patterns(self):
50
- # Your existing patterns code here...
51
  self.ai_replacements = {
52
  r'\bit is important to note that\b': ["worth mentioning that", "keep in mind that", "note that"],
53
- # ... rest of your patterns
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  }
55
- # ... rest of your existing code
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
  def get_readability_score(self, text):
 
58
  try:
59
  score = flesch_reading_ease(text)
60
  grade = flesch_kincaid_grade(text)
@@ -67,22 +188,22 @@ class AIContentHumanizer:
67
  return f"Could not calculate readability: {str(e)}"
68
 
69
  def humanize_text(self, text, intensity="medium"):
 
70
  if not text or not text.strip():
71
  return "Please provide text to humanize."
72
 
73
  try:
74
  text = text.strip()
75
 
76
- # Test NLTK functionality before proceeding
77
  try:
78
- # Try to tokenize a simple sentence to verify NLTK is working
79
  test_tokens = sent_tokenize("This is a test sentence.")
80
  if not test_tokens:
81
  raise Exception("NLTK tokenization failed")
82
  except Exception as nltk_error:
83
  return f"NLTK Error: {str(nltk_error)}. Please try again or contact support."
84
 
85
- # Your existing humanization logic here...
86
  text = self.replace_ai_phrases(text)
87
  text = self.add_contractions(text)
88
 
@@ -99,9 +220,8 @@ class AIContentHumanizer:
99
  except Exception as e:
100
  return f"Error processing text: {str(e)}\n\nOriginal text: {text}"
101
 
102
- # ... rest of your existing methods
103
-
104
  def create_interface():
 
105
  humanizer = AIContentHumanizer()
106
 
107
  def process_text(input_text, intensity):
@@ -118,12 +238,12 @@ def create_interface():
118
  gr.Markdown("""# 🤖➡️👤 AI Content Humanizer
119
  Transform AI-generated content into human-sounding, casual, and readable text!""")
120
 
121
- input_text = gr.Textbox(label="AI-generated Text", lines=8)
122
  intensity = gr.Radio(["light", "medium", "heavy"], value="medium", label="Humanization Level")
123
  output_text = gr.Textbox(label="Humanized Text", lines=8, show_copy_button=True)
124
  readability = gr.Textbox(label="Readability Score", lines=2)
125
 
126
- btn = gr.Button("Humanize Text")
127
  btn.click(fn=process_text, inputs=[input_text, intensity], outputs=[output_text, readability])
128
  input_text.submit(fn=process_text, inputs=[input_text, intensity], outputs=[output_text, readability])
129
 
 
12
  def download_nltk_data():
13
  """Download required NLTK data with proper error handling"""
14
  try:
 
15
  os.makedirs('/tmp/nltk_data', exist_ok=True)
 
 
16
  nltk.data.path.append('/tmp/nltk_data')
17
 
18
+ required_data = ['punkt_tab', 'punkt', 'averaged_perceptron_tagger', 'stopwords']
 
 
 
 
 
 
19
 
20
  for data in required_data:
21
  try:
 
25
  print(f"Failed to download {data}: {e}")
26
 
27
  print("NLTK data download completed")
 
28
 
29
  except Exception as e:
30
  print(f"NLTK setup error: {e}")
31
 
 
32
  download_nltk_data()
33
 
34
  class AIContentHumanizer:
 
36
  self.setup_humanization_patterns()
37
 
38
  def setup_humanization_patterns(self):
39
+ """Setup patterns for AI phrase replacement"""
40
  self.ai_replacements = {
41
  r'\bit is important to note that\b': ["worth mentioning that", "keep in mind that", "note that"],
42
+ r'\bit is worth noting that\b': ["interestingly", "notably", "it's worth mentioning"],
43
+ r'\bin conclusion\b': ["to wrap up", "all in all", "bottom line"],
44
+ r'\bfurthermore\b': ["plus", "also", "on top of that"],
45
+ r'\bmoreover\b': ["what's more", "besides", "additionally"],
46
+ r'\bhowever\b': ["but", "though", "on the flip side"],
47
+ r'\bnevertheless\b': ["still", "even so", "that said"],
48
+ r'\btherefore\b': ["so", "thus", "as a result"],
49
+ r'\bconsequently\b': ["as a result", "so", "because of this"],
50
+ r'\bin order to\b': ["to", "so we can", "for"],
51
+ r'\bdue to the fact that\b': ["because", "since", "given that"],
52
+ r'\bwith regard to\b': ["about", "regarding", "when it comes to"],
53
+ r'\bit should be noted that\b': ["note that", "remember", "keep in mind"],
54
+ r'\bit is essential to\b': ["you need to", "it's crucial to", "make sure to"],
55
+ r'\bsubsequently\b': ["then", "next", "after that"],
56
+ r'\bultimately\b': ["in the end", "finally", "when all is said and done"]
57
+ }
58
+
59
+ self.contractions = {
60
+ r'\bdo not\b': "don't",
61
+ r'\bdoes not\b': "doesn't",
62
+ r'\bdid not\b': "didn't",
63
+ r'\bwill not\b': "won't",
64
+ r'\bwould not\b': "wouldn't",
65
+ r'\bcould not\b': "couldn't",
66
+ r'\bshould not\b': "shouldn't",
67
+ r'\bcannot\b': "can't",
68
+ r'\bis not\b': "isn't",
69
+ r'\bare not\b': "aren't",
70
+ r'\bwas not\b': "wasn't",
71
+ r'\bwere not\b': "weren't",
72
+ r'\bhave not\b': "haven't",
73
+ r'\bhas not\b': "hasn't",
74
+ r'\bhad not\b': "hadn't",
75
+ r'\bI will\b': "I'll",
76
+ r'\byou will\b': "you'll",
77
+ r'\bhe will\b': "he'll",
78
+ r'\bshe will\b': "she'll",
79
+ r'\bwe will\b': "we'll",
80
+ r'\bthey will\b': "they'll",
81
+ r'\bI would\b': "I'd",
82
+ r'\byou would\b': "you'd",
83
+ r'\bI have\b': "I've",
84
+ r'\byou have\b': "you've",
85
+ r'\bwe have\b': "we've",
86
+ r'\bthey have\b': "they've"
87
  }
88
+
89
+ self.casual_fillers = [
90
+ "you know", "I mean", "like", "actually", "basically",
91
+ "honestly", "literally", "obviously", "clearly", "definitely"
92
+ ]
93
+
94
+ self.personal_touches = [
95
+ "I think", "in my opinion", "from what I've seen", "personally",
96
+ "if you ask me", "the way I see it", "from my experience"
97
+ ]
98
+
99
+ def replace_ai_phrases(self, text):
100
+ """Replace formal AI phrases with more casual alternatives"""
101
+ for pattern, replacements in self.ai_replacements.items():
102
+ if re.search(pattern, text, re.IGNORECASE):
103
+ replacement = random.choice(replacements)
104
+ text = re.sub(pattern, replacement, text, flags=re.IGNORECASE)
105
+ return text
106
+
107
+ def add_contractions(self, text):
108
+ """Add contractions to make text more casual"""
109
+ for pattern, contraction in self.contractions.items():
110
+ text = re.sub(pattern, contraction, text, flags=re.IGNORECASE)
111
+ return text
112
+
113
+ def vary_sentence_structure(self, text):
114
+ """Add variety to sentence structure"""
115
+ try:
116
+ sentences = sent_tokenize(text)
117
+ varied_sentences = []
118
+
119
+ for sentence in sentences:
120
+ # Randomly add sentence starters
121
+ if random.random() < 0.3 and len(sentence.split()) > 8:
122
+ starters = ["Well,", "So,", "Now,", "Look,", "Here's the thing -"]
123
+ sentence = f"{random.choice(starters)} {sentence.lower()}"
124
+
125
+ varied_sentences.append(sentence)
126
+
127
+ return " ".join(varied_sentences)
128
+ except Exception:
129
+ return text
130
+
131
+ def add_personal_touches(self, text):
132
+ """Add personal opinions and touches"""
133
+ sentences = sent_tokenize(text)
134
+ if len(sentences) > 2 and random.random() < 0.4:
135
+ insert_pos = random.randint(1, len(sentences) - 1)
136
+ personal_touch = random.choice(self.personal_touches)
137
+ sentences[insert_pos] = f"{personal_touch}, {sentences[insert_pos].lower()}"
138
+
139
+ return " ".join(sentences)
140
+
141
+ def add_casual_punctuation(self, text):
142
+ """Add casual punctuation like dashes and ellipses"""
143
+ # Replace some periods with dashes for emphasis
144
+ text = re.sub(r'(\w+)\. ([A-Z])', r'\1 - \2', text)
145
+
146
+ # Add occasional ellipses
147
+ if random.random() < 0.3:
148
+ text = re.sub(r'(\w+)\.', r'\1...', text, count=1)
149
+
150
+ return text
151
+
152
+ def add_natural_fillers(self, text):
153
+ """Add natural conversation fillers"""
154
+ sentences = sent_tokenize(text)
155
+ if len(sentences) > 1 and random.random() < 0.5:
156
+ filler_pos = random.randint(0, len(sentences) - 1)
157
+ filler = random.choice(self.casual_fillers)
158
+ sentences[filler_pos] = f"{filler}, {sentences[filler_pos].lower()}"
159
+
160
+ return " ".join(sentences)
161
+
162
+ def clean_text(self, text):
163
+ """Clean up the text formatting"""
164
+ # Fix spacing issues
165
+ text = re.sub(r'\s+', ' ', text)
166
+ text = re.sub(r'\s+([,.!?])', r'\1', text)
167
+
168
+ # Fix capitalization after sentence starters
169
+ text = re.sub(r'([.!?]\s+)([a-z])', lambda m: m.group(1) + m.group(2).upper(), text)
170
+
171
+ # Ensure first letter is capitalized
172
+ if text and text[0].islower():
173
+ text = text[0].upper() + text[1:]
174
+
175
+ return text.strip()
176
 
177
  def get_readability_score(self, text):
178
+ """Calculate readability score"""
179
  try:
180
  score = flesch_reading_ease(text)
181
  grade = flesch_kincaid_grade(text)
 
188
  return f"Could not calculate readability: {str(e)}"
189
 
190
  def humanize_text(self, text, intensity="medium"):
191
+ """Main method to humanize AI-generated text"""
192
  if not text or not text.strip():
193
  return "Please provide text to humanize."
194
 
195
  try:
196
  text = text.strip()
197
 
198
+ # Test NLTK functionality
199
  try:
 
200
  test_tokens = sent_tokenize("This is a test sentence.")
201
  if not test_tokens:
202
  raise Exception("NLTK tokenization failed")
203
  except Exception as nltk_error:
204
  return f"NLTK Error: {str(nltk_error)}. Please try again or contact support."
205
 
206
+ # Apply humanization techniques based on intensity
207
  text = self.replace_ai_phrases(text)
208
  text = self.add_contractions(text)
209
 
 
220
  except Exception as e:
221
  return f"Error processing text: {str(e)}\n\nOriginal text: {text}"
222
 
 
 
223
  def create_interface():
224
+ """Create the Gradio interface"""
225
  humanizer = AIContentHumanizer()
226
 
227
  def process_text(input_text, intensity):
 
238
  gr.Markdown("""# 🤖➡️👤 AI Content Humanizer
239
  Transform AI-generated content into human-sounding, casual, and readable text!""")
240
 
241
+ input_text = gr.Textbox(label="AI-generated Text", lines=8, placeholder="Paste your AI-generated text here...")
242
  intensity = gr.Radio(["light", "medium", "heavy"], value="medium", label="Humanization Level")
243
  output_text = gr.Textbox(label="Humanized Text", lines=8, show_copy_button=True)
244
  readability = gr.Textbox(label="Readability Score", lines=2)
245
 
246
+ btn = gr.Button("Humanize Text", variant="primary")
247
  btn.click(fn=process_text, inputs=[input_text, intensity], outputs=[output_text, readability])
248
  input_text.submit(fn=process_text, inputs=[input_text, intensity], outputs=[output_text, readability])
249