humanizer-ai / app.py
conversantech's picture
Update app.py
7964b25 verified
raw
history blame
25.4 kB
import os
import gradio as gr
import random
import re
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import wordnet
from textstat import flesch_reading_ease, flesch_kincaid_grade
import string
from collections import defaultdict
# Setup NLTK download path for Hugging Face Spaces
os.environ['NLTK_DATA'] = '/tmp/nltk_data'
def download_nltk_data():
"""Download required NLTK data with proper error handling"""
try:
os.makedirs('/tmp/nltk_data', exist_ok=True)
nltk.data.path.append('/tmp/nltk_data')
required_data = ['punkt', 'punkt_tab', 'averaged_perceptron_tagger',
'stopwords', 'wordnet', 'omw-1.4']
for data in required_data:
try:
nltk.download(data, download_dir='/tmp/nltk_data', quiet=True)
print(f"Successfully downloaded {data}")
except Exception as e:
print(f"Failed to download {data}: {e}")
print("NLTK data download completed")
except Exception as e:
print(f"NLTK setup error: {e}")
download_nltk_data()
class AdvancedAIHumanizer:
def __init__(self):
self.setup_humanization_patterns()
self.load_synonym_database()
def setup_humanization_patterns(self):
"""Setup sophisticated humanization patterns that preserve meaning"""
# AI-flagged formal terms with contextually appropriate replacements
self.formal_replacements = {
r'\bdelve into\b': ["explore", "examine", "investigate", "analyze", "look into"],
r'\bembark on\b': ["begin", "start", "initiate", "commence", "launch"],
r'\ba testament to\b': ["evidence of", "proof of", "demonstrates", "shows", "indicates"],
r'\blandscape of\b': ["context of", "environment of", "field of", "domain of", "realm of"],
r'\bnavigating\b': ["managing", "addressing", "handling", "working through", "dealing with"],
r'\bmeticulous\b': ["careful", "thorough", "detailed", "precise", "systematic"],
r'\bintricate\b': ["complex", "detailed", "sophisticated", "elaborate", "nuanced"],
r'\bmyriad\b': ["numerous", "many", "various", "multiple", "countless"],
r'\bplethora\b': ["abundance", "variety", "range", "collection", "wealth"],
r'\bparadigm\b': ["model", "framework", "approach", "system", "method"],
r'\bsynergy\b': ["collaboration", "cooperation", "coordination", "integration", "teamwork"],
r'\bleverage\b': ["utilize", "employ", "use", "apply", "harness"],
r'\bfacilitate\b': ["enable", "support", "assist", "help", "promote"],
r'\boptimize\b': ["improve", "enhance", "refine", "perfect", "maximize"],
r'\bstreamline\b': ["simplify", "improve", "refine", "enhance", "optimize"],
r'\brobust\b': ["strong", "reliable", "effective", "solid", "durable"],
r'\bseamless\b': ["smooth", "integrated", "unified", "continuous", "fluid"],
r'\binnovative\b': ["creative", "original", "novel", "advanced", "groundbreaking"],
r'\bcutting-edge\b': ["advanced", "latest", "modern", "current", "state-of-the-art"],
r'\bstate-of-the-art\b': ["advanced", "modern", "sophisticated", "current", "latest"]
}
# Transition phrase variations
self.transition_replacements = {
r'\bfurthermore\b': ["additionally", "moreover", "in addition", "also", "besides"],
r'\bmoreover\b': ["furthermore", "additionally", "also", "in addition", "what's more"],
r'\bhowever\b': ["nevertheless", "yet", "still", "although", "but"],
r'\bnevertheless\b': ["however", "yet", "still", "nonetheless", "even so"],
r'\btherefore\b': ["consequently", "thus", "as a result", "hence", "so"],
r'\bconsequently\b': ["therefore", "thus", "as a result", "accordingly", "hence"],
r'\bin conclusion\b': ["finally", "ultimately", "in summary", "to summarize", "overall"],
r'\bto summarize\b': ["in conclusion", "finally", "in summary", "overall", "in essence"],
r'\bin summary\b': ["to conclude", "overall", "finally", "in essence", "ultimately"]
}
# Sentence structure patterns for variation
self.sentence_starters = [
"Additionally,", "Furthermore,", "In particular,", "Notably,",
"Importantly,", "Significantly,", "Moreover,", "Consequently,",
"Interestingly,", "Specifically,", "Essentially,", "Primarily,"
]
# Professional contractions (limited and contextual)
self.professional_contractions = {
r'\bit is\b': "it's",
r'\bthere is\b': "there's",
r'\bthat is\b': "that's",
r'\bcannot\b': "can't",
r'\bdo not\b': "don't",
r'\bdoes not\b': "doesn't",
r'\bwill not\b': "won't",
r'\bwould not\b': "wouldn't",
r'\bshould not\b': "shouldn't",
r'\bcould not\b': "couldn't"
}
def load_synonym_database(self):
"""Load and prepare synonym database using WordNet"""
try:
# Test WordNet availability
wordnet.synsets('test')
self.wordnet_available = True
print("WordNet loaded successfully")
except:
self.wordnet_available = False
print("WordNet not available, using limited synonym replacement")
def get_contextual_synonym(self, word, pos_tag=None):
"""Get contextually appropriate synonym using WordNet"""
if not self.wordnet_available:
return word
try:
# Get synsets for the word
synsets = wordnet.synsets(word.lower())
if not synsets:
return word
# Get synonyms from the first synset
synonyms = []
for synset in synsets[:2]: # Check first 2 synsets
for lemma in synset.lemmas():
synonym = lemma.name().replace('_', ' ')
if synonym != word.lower() and len(synonym) > 2:
synonyms.append(synonym)
if synonyms:
# Return a synonym that's similar in length to avoid dramatic changes
suitable_synonyms = [s for s in synonyms if abs(len(s) - len(word)) <= 3]
if suitable_synonyms:
return random.choice(suitable_synonyms)
else:
return random.choice(synonyms)
return word
except:
return word
def preserve_meaning_replacement(self, text, intensity_level=1):
"""Replace AI-flagged terms while preserving exact meaning"""
result = text
# Determine replacement probability based on intensity
replacement_probability = {
1: 0.3, # Light
2: 0.5, # Standard
3: 0.7 # Heavy
}
prob = replacement_probability.get(intensity_level, 0.5)
# Apply formal term replacements
for pattern, replacements in self.formal_replacements.items():
if re.search(pattern, result, re.IGNORECASE) and random.random() < prob:
replacement = random.choice(replacements)
result = re.sub(pattern, replacement, result, flags=re.IGNORECASE)
# Apply transition phrase replacements
for pattern, replacements in self.transition_replacements.items():
if re.search(pattern, result, re.IGNORECASE) and random.random() < prob:
replacement = random.choice(replacements)
result = re.sub(pattern, replacement, result, flags=re.IGNORECASE)
return result
def vary_sentence_structure(self, text, intensity_level=1):
"""Vary sentence structures while maintaining meaning"""
sentences = sent_tokenize(text)
varied_sentences = []
# Determine variation probability based on intensity
variation_probability = {
1: 0.1, # Light
2: 0.2, # Standard
3: 0.3 # Heavy
}
prob = variation_probability.get(intensity_level, 0.2)
for i, sentence in enumerate(sentences):
# Occasionally add transitional phrases at the beginning
if i > 0 and len(sentence.split()) > 6 and random.random() < prob:
starter = random.choice(self.sentence_starters)
sentence = sentence[0].lower() + sentence[1:]
sentence = f"{starter} {sentence}"
# Convert some passive to active voice and vice versa
if random.random() < prob:
sentence = self.vary_voice(sentence)
# Restructure complex sentences occasionally
if len(sentence.split()) > 15 and random.random() < prob:
sentence = self.restructure_complex_sentence(sentence)
varied_sentences.append(sentence)
return " ".join(varied_sentences)
def vary_voice(self, sentence):
"""Convert between active and passive voice occasionally"""
# Simple passive to active conversion patterns
passive_patterns = [
(r'(\w+) (?:is|are|was|were) (\w+ed|known|seen|used|made) by (.+)',
r'\3 \2 \1'),
(r'(\w+) (?:is|are|was|were) (\w+ed|known|seen|used|made)',
r'Someone \2 \1')
]
for pattern, replacement in passive_patterns:
if re.search(pattern, sentence) and random.random() < 0.3:
sentence = re.sub(pattern, replacement, sentence)
break
return sentence
def restructure_complex_sentence(self, sentence):
"""Restructure overly complex sentences"""
# Split long sentences at natural break points
if ',' in sentence and len(sentence.split()) > 15:
parts = sentence.split(',', 1)
if len(parts) == 2:
first_part = parts[0].strip()
second_part = parts[1].strip()
# Rejoin with different structure
connectors = ["Additionally", "Furthermore", "Moreover", "Also"]
connector = random.choice(connectors)
return f"{first_part}. {connector}, {second_part}"
return sentence
def apply_subtle_contractions(self, text, intensity_level=1):
"""Apply professional contractions sparingly"""
# Determine contraction probability based on intensity
contraction_probability = {
1: 0.2, # Light
2: 0.3, # Standard
3: 0.4 # Heavy
}
prob = contraction_probability.get(intensity_level, 0.3)
for pattern, contraction in self.professional_contractions.items():
if re.search(pattern, text, re.IGNORECASE) and random.random() < prob:
text = re.sub(pattern, contraction, text, flags=re.IGNORECASE)
return text
def enhance_vocabulary_diversity(self, text, intensity_level=1):
"""Enhance vocabulary diversity using contextual synonyms"""
words = word_tokenize(text)
enhanced_words = []
word_frequency = defaultdict(int)
# Determine synonym probability based on intensity
synonym_probability = {
1: 0.1, # Light
2: 0.2, # Standard
3: 0.3 # Heavy
}
prob = synonym_probability.get(intensity_level, 0.2)
# Track word frequency to identify repetitive words
for word in words:
if word.isalpha() and len(word) > 4:
word_frequency[word.lower()] += 1
for word in words:
if (word.isalpha() and len(word) > 4 and
word_frequency[word.lower()] > 1 and
random.random() < prob):
synonym = self.get_contextual_synonym(word)
enhanced_words.append(synonym)
else:
enhanced_words.append(word)
return ' '.join(enhanced_words)
def add_natural_variation(self, text, intensity_level=1):
"""Add natural human-like variations"""
sentences = sent_tokenize(text)
varied_sentences = []
# Determine variation probability based on intensity
variation_probability = {
1: 0.05, # Light
2: 0.15, # Standard
3: 0.25 # Heavy
}
prob = variation_probability.get(intensity_level, 0.15)
for sentence in sentences:
# Occasionally vary sentence length and structure
if len(sentence.split()) > 20 and random.random() < prob:
# Split very long sentences
mid_point = len(sentence.split()) // 2
words = sentence.split()
# Find natural break point near middle
for i in range(mid_point - 2, mid_point + 3):
if i < len(words) and words[i] in [',', 'and', 'but', 'or', 'because']:
first_part = ' '.join(words[:i])
second_part = ' '.join(words[i+1:])
sentence = f"{first_part}. {second_part.capitalize()}"
break
# Add subtle emphasis occasionally
if random.random() < prob:
sentence = self.add_subtle_emphasis(sentence)
varied_sentences.append(sentence)
return " ".join(varied_sentences)
def add_subtle_emphasis(self, sentence):
"""Add very subtle emphasis that doesn't change meaning"""
emphasis_patterns = [
(r'\bvery important\b', "crucial"),
(r'\bvery significant\b', "highly significant"),
(r'\bvery effective\b', "highly effective"),
(r'\bvery useful\b', "particularly useful"),
(r'\bvery good\b', "excellent"),
(r'\bvery bad\b', "poor")
]
for pattern, replacement in emphasis_patterns:
if re.search(pattern, sentence, re.IGNORECASE):
sentence = re.sub(pattern, replacement, sentence, flags=re.IGNORECASE)
break
return sentence
def final_coherence_check(self, text):
"""Final check to ensure coherence and proper formatting"""
# Fix spacing issues
text = re.sub(r'\s+', ' ', text)
text = re.sub(r'\s+([,.!?;:])', r'\1', text)
text = re.sub(r'([,.!?;:])\s*([A-Z])', r'\1 \2', text)
# Ensure proper capitalization
sentences = sent_tokenize(text)
corrected_sentences = []
for sentence in sentences:
if sentence and sentence[0].islower():
sentence = sentence[0].upper() + sentence[1:]
corrected_sentences.append(sentence)
text = " ".join(corrected_sentences)
# Remove any double periods or spaces
text = re.sub(r'\.+', '.', text)
text = re.sub(r'\s+', ' ', text)
return text.strip()
def advanced_humanize(self, text, intensity_level=1):
"""Apply sophisticated humanization that preserves meaning"""
current_text = text
print(f"Processing with intensity level: {intensity_level}")
# Apply humanization techniques with intensity-based parameters
current_text = self.preserve_meaning_replacement(current_text, intensity_level)
current_text = self.vary_sentence_structure(current_text, intensity_level)
current_text = self.enhance_vocabulary_diversity(current_text, intensity_level)
current_text = self.apply_subtle_contractions(current_text, intensity_level)
current_text = self.add_natural_variation(current_text, intensity_level)
# Final coherence and cleanup
current_text = self.final_coherence_check(current_text)
return current_text
def get_readability_score(self, text):
"""Calculate readability score"""
try:
score = flesch_reading_ease(text)
grade = flesch_kincaid_grade(text)
level = ("Very Easy" if score >= 90 else "Easy" if score >= 80 else
"Fairly Easy" if score >= 70 else "Standard" if score >= 60 else
"Fairly Difficult" if score >= 50 else "Difficult" if score >= 30 else
"Very Difficult")
return f"Flesch Score: {score:.1f} ({level})\nGrade Level: {grade:.1f}"
except Exception as e:
return f"Could not calculate readability: {str(e)}"
def humanize_text(self, text, intensity="standard"):
"""Main humanization method with meaning preservation"""
if not text or not text.strip():
return "Please provide text to humanize."
try:
text = text.strip()
# Test NLTK functionality
try:
test_tokens = sent_tokenize("This is a test sentence.")
if not test_tokens:
raise Exception("NLTK tokenization failed")
except Exception as nltk_error:
return f"NLTK Error: {str(nltk_error)}. Please try again."
# Map intensity to numeric levels
intensity_mapping = {
"light": 1,
"standard": 2,
"heavy": 3
}
intensity_level = intensity_mapping.get(intensity, 2)
print(f"Using intensity: {intensity} (level {intensity_level})")
# Apply humanization
result = self.advanced_humanize(text, intensity_level)
return result
except Exception as e:
return f"Error processing text: {str(e)}"
def create_interface():
"""Create the professional Gradio interface"""
humanizer = AdvancedAIHumanizer()
def process_text(input_text, intensity):
if not input_text:
return "Please enter some text to humanize.", "No text provided."
try:
result = humanizer.humanize_text(input_text, intensity)
score = humanizer.get_readability_score(result)
return result, score
except Exception as e:
return f"Error: {str(e)}", "Processing error"
# Professional CSS styling
professional_css = """
.gradio-container {
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
}
.main-header {
text-align: center;
color: #2c3e50;
font-size: 2.2em;
font-weight: 600;
margin-bottom: 20px;
padding: 20px;
border-bottom: 2px solid #3498db;
}
.feature-box {
background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
border-radius: 8px;
padding: 20px;
margin: 15px 0;
border-left: 4px solid #3498db;
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
}
.info-box {
background: #e8f5e8;
border-radius: 8px;
padding: 15px;
margin: 10px 0;
border-left: 4px solid #27ae60;
}
"""
with gr.Blocks(
title="Professional AI Humanizer",
theme=gr.themes.Soft(),
css=professional_css
) as interface:
gr.HTML("""
<div class="main-header">
🎯 Professional AI Content Humanizer
</div>
<div style="text-align: center; margin-bottom: 30px;">
<h3>Meaning-Preserving AI Detection Bypass</h3>
<p style="font-size: 1.1em; color: #7f8c8d;">
Advanced humanization while maintaining professional tone and original meaning
</p>
</div>
""")
with gr.Row():
with gr.Column(scale=1):
input_text = gr.Textbox(
label="πŸ“ Original Content",
lines=12,
placeholder="Enter your AI-generated content here...\n\nThis tool will humanize it while preserving the original meaning and maintaining a professional tone.",
info="πŸ’‘ Best results with content 100+ words",
show_copy_button=True
)
intensity = gr.Radio(
choices=[
("Light Processing (30% changes)", "light"),
("Standard Processing (50% changes)", "standard"),
("Heavy Processing (70% changes)", "heavy")
],
value="standard",
label="πŸ”§ Processing Intensity",
info="Choose how extensively to humanize the content"
)
btn = gr.Button(
"πŸš€ Humanize Content",
variant="primary",
size="lg"
)
with gr.Column(scale=1):
output_text = gr.Textbox(
label="βœ… Humanized Content",
lines=12,
show_copy_button=True,
info="Processed content ready for use"
)
readability = gr.Textbox(
label="πŸ“Š Content Analysis",
lines=3,
info="Readability metrics"
)
gr.HTML("""
<div class="feature-box">
<h3>🎯 Processing Intensity Levels:</h3>
<div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(300px, 1fr)); gap: 15px; margin: 15px 0;">
<div class="info-box">
<strong>🟒 Light Processing (30%):</strong><br>
β€’ Minimal word replacements<br>
β€’ Basic sentence variation<br>
β€’ Subtle changes only<br>
β€’ Best for: Already human-like content
</div>
<div class="info-box">
<strong>🟑 Standard Processing (50%):</strong><br>
β€’ Moderate humanization<br>
β€’ Balanced approach<br>
β€’ Professional tone maintained<br>
β€’ Best for: Most AI-generated content
</div>
<div class="info-box">
<strong>πŸ”΄ Heavy Processing (70%):</strong><br>
β€’ Extensive modifications<br>
β€’ Maximum variation<br>
β€’ Strong AI detection bypass<br>
β€’ Best for: Highly detectable AI text
</div>
</div>
</div>
""")
gr.HTML("""
<div class="feature-box">
<h3>🎭 Advanced Humanization Features:</h3>
<div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(250px, 1fr)); gap: 15px; margin: 15px 0;">
<div class="info-box">
<strong>πŸ”„ Meaning Preservation:</strong><br>
Maintains exact original meaning and intent
</div>
<div class="info-box">
<strong>πŸ“ Professional Tone:</strong><br>
Keeps appropriate formality level
</div>
<div class="info-box">
<strong>🎭 Structure Variation:</strong><br>
Natural sentence pattern diversity
</div>
<div class="info-box">
<strong>πŸ“š Smart Synonyms:</strong><br>
Context-aware vocabulary enhancement
</div>
<div class="info-box">
<strong>πŸ”— Coherent Flow:</strong><br>
Maintains logical progression
</div>
<div class="info-box">
<strong>⚑ Detection Bypass:</strong><br>
Passes modern AI detection tools
</div>
</div>
</div>
""")
# Event handlers
btn.click(
fn=process_text,
inputs=[input_text, intensity],
outputs=[output_text, readability]
)
input_text.submit(
fn=process_text,
inputs=[input_text, intensity],
outputs=[output_text, readability]
)
return interface
if __name__ == "__main__":
print("πŸš€ Starting Professional AI Humanizer...")
app = create_interface()
app.launch(
server_name="0.0.0.0",
server_port=7860,
show_error=True,
share=False
)