Spaces:
Running
Running
import os | |
import gradio as gr | |
import random | |
import re | |
import nltk | |
from nltk.tokenize import sent_tokenize, word_tokenize | |
from nltk.corpus import wordnet | |
from textstat import flesch_reading_ease, flesch_kincaid_grade | |
import string | |
from collections import defaultdict | |
# Setup NLTK download path for Hugging Face Spaces | |
os.environ['NLTK_DATA'] = '/tmp/nltk_data' | |
def download_nltk_data(): | |
"""Download required NLTK data with proper error handling""" | |
try: | |
os.makedirs('/tmp/nltk_data', exist_ok=True) | |
nltk.data.path.append('/tmp/nltk_data') | |
required_data = ['punkt', 'punkt_tab', 'averaged_perceptron_tagger', | |
'stopwords', 'wordnet', 'omw-1.4'] | |
for data in required_data: | |
try: | |
nltk.download(data, download_dir='/tmp/nltk_data', quiet=True) | |
print(f"Successfully downloaded {data}") | |
except Exception as e: | |
print(f"Failed to download {data}: {e}") | |
print("NLTK data download completed") | |
except Exception as e: | |
print(f"NLTK setup error: {e}") | |
download_nltk_data() | |
class AdvancedAIHumanizer: | |
def __init__(self): | |
self.setup_humanization_patterns() | |
self.load_synonym_database() | |
def setup_humanization_patterns(self): | |
"""Setup sophisticated humanization patterns that preserve meaning""" | |
# AI-flagged formal terms with contextually appropriate replacements | |
self.formal_replacements = { | |
r'\bdelve into\b': ["explore", "examine", "investigate", "analyze", "look into"], | |
r'\bembark on\b': ["begin", "start", "initiate", "commence", "launch"], | |
r'\ba testament to\b': ["evidence of", "proof of", "demonstrates", "shows", "indicates"], | |
r'\blandscape of\b': ["context of", "environment of", "field of", "domain of", "realm of"], | |
r'\bnavigating\b': ["managing", "addressing", "handling", "working through", "dealing with"], | |
r'\bmeticulous\b': ["careful", "thorough", "detailed", "precise", "systematic"], | |
r'\bintricate\b': ["complex", "detailed", "sophisticated", "elaborate", "nuanced"], | |
r'\bmyriad\b': ["numerous", "many", "various", "multiple", "countless"], | |
r'\bplethora\b': ["abundance", "variety", "range", "collection", "wealth"], | |
r'\bparadigm\b': ["model", "framework", "approach", "system", "method"], | |
r'\bsynergy\b': ["collaboration", "cooperation", "coordination", "integration", "teamwork"], | |
r'\bleverage\b': ["utilize", "employ", "use", "apply", "harness"], | |
r'\bfacilitate\b': ["enable", "support", "assist", "help", "promote"], | |
r'\boptimize\b': ["improve", "enhance", "refine", "perfect", "maximize"], | |
r'\bstreamline\b': ["simplify", "improve", "refine", "enhance", "optimize"], | |
r'\brobust\b': ["strong", "reliable", "effective", "solid", "durable"], | |
r'\bseamless\b': ["smooth", "integrated", "unified", "continuous", "fluid"], | |
r'\binnovative\b': ["creative", "original", "novel", "advanced", "groundbreaking"], | |
r'\bcutting-edge\b': ["advanced", "latest", "modern", "current", "state-of-the-art"], | |
r'\bstate-of-the-art\b': ["advanced", "modern", "sophisticated", "current", "latest"] | |
} | |
# Transition phrase variations | |
self.transition_replacements = { | |
r'\bfurthermore\b': ["additionally", "moreover", "in addition", "also", "besides"], | |
r'\bmoreover\b': ["furthermore", "additionally", "also", "in addition", "what's more"], | |
r'\bhowever\b': ["nevertheless", "yet", "still", "although", "but"], | |
r'\bnevertheless\b': ["however", "yet", "still", "nonetheless", "even so"], | |
r'\btherefore\b': ["consequently", "thus", "as a result", "hence", "so"], | |
r'\bconsequently\b': ["therefore", "thus", "as a result", "accordingly", "hence"], | |
r'\bin conclusion\b': ["finally", "ultimately", "in summary", "to summarize", "overall"], | |
r'\bto summarize\b': ["in conclusion", "finally", "in summary", "overall", "in essence"], | |
r'\bin summary\b': ["to conclude", "overall", "finally", "in essence", "ultimately"] | |
} | |
# Sentence structure patterns for variation | |
self.sentence_starters = [ | |
"Additionally,", "Furthermore,", "In particular,", "Notably,", | |
"Importantly,", "Significantly,", "Moreover,", "Consequently,", | |
"Interestingly,", "Specifically,", "Essentially,", "Primarily," | |
] | |
# Professional contractions (limited and contextual) | |
self.professional_contractions = { | |
r'\bit is\b': "it's", | |
r'\bthere is\b': "there's", | |
r'\bthat is\b': "that's", | |
r'\bcannot\b': "can't", | |
r'\bdo not\b': "don't", | |
r'\bdoes not\b': "doesn't", | |
r'\bwill not\b': "won't", | |
r'\bwould not\b': "wouldn't", | |
r'\bshould not\b': "shouldn't", | |
r'\bcould not\b': "couldn't" | |
} | |
def load_synonym_database(self): | |
"""Load and prepare synonym database using WordNet""" | |
try: | |
# Test WordNet availability | |
wordnet.synsets('test') | |
self.wordnet_available = True | |
print("WordNet loaded successfully") | |
except: | |
self.wordnet_available = False | |
print("WordNet not available, using limited synonym replacement") | |
def get_contextual_synonym(self, word, pos_tag=None): | |
"""Get contextually appropriate synonym using WordNet""" | |
if not self.wordnet_available: | |
return word | |
try: | |
# Get synsets for the word | |
synsets = wordnet.synsets(word.lower()) | |
if not synsets: | |
return word | |
# Get synonyms from the first synset | |
synonyms = [] | |
for synset in synsets[:2]: # Check first 2 synsets | |
for lemma in synset.lemmas(): | |
synonym = lemma.name().replace('_', ' ') | |
if synonym != word.lower() and len(synonym) > 2: | |
synonyms.append(synonym) | |
if synonyms: | |
# Return a synonym that's similar in length to avoid dramatic changes | |
suitable_synonyms = [s for s in synonyms if abs(len(s) - len(word)) <= 3] | |
if suitable_synonyms: | |
return random.choice(suitable_synonyms) | |
else: | |
return random.choice(synonyms) | |
return word | |
except: | |
return word | |
def preserve_meaning_replacement(self, text, intensity_level=1): | |
"""Replace AI-flagged terms while preserving exact meaning""" | |
result = text | |
# Determine replacement probability based on intensity | |
replacement_probability = { | |
1: 0.3, # Light | |
2: 0.5, # Standard | |
3: 0.7 # Heavy | |
} | |
prob = replacement_probability.get(intensity_level, 0.5) | |
# Apply formal term replacements | |
for pattern, replacements in self.formal_replacements.items(): | |
if re.search(pattern, result, re.IGNORECASE) and random.random() < prob: | |
replacement = random.choice(replacements) | |
result = re.sub(pattern, replacement, result, flags=re.IGNORECASE) | |
# Apply transition phrase replacements | |
for pattern, replacements in self.transition_replacements.items(): | |
if re.search(pattern, result, re.IGNORECASE) and random.random() < prob: | |
replacement = random.choice(replacements) | |
result = re.sub(pattern, replacement, result, flags=re.IGNORECASE) | |
return result | |
def vary_sentence_structure(self, text, intensity_level=1): | |
"""Vary sentence structures while maintaining meaning""" | |
sentences = sent_tokenize(text) | |
varied_sentences = [] | |
# Determine variation probability based on intensity | |
variation_probability = { | |
1: 0.1, # Light | |
2: 0.2, # Standard | |
3: 0.3 # Heavy | |
} | |
prob = variation_probability.get(intensity_level, 0.2) | |
for i, sentence in enumerate(sentences): | |
# Occasionally add transitional phrases at the beginning | |
if i > 0 and len(sentence.split()) > 6 and random.random() < prob: | |
starter = random.choice(self.sentence_starters) | |
sentence = sentence[0].lower() + sentence[1:] | |
sentence = f"{starter} {sentence}" | |
# Convert some passive to active voice and vice versa | |
if random.random() < prob: | |
sentence = self.vary_voice(sentence) | |
# Restructure complex sentences occasionally | |
if len(sentence.split()) > 15 and random.random() < prob: | |
sentence = self.restructure_complex_sentence(sentence) | |
varied_sentences.append(sentence) | |
return " ".join(varied_sentences) | |
def vary_voice(self, sentence): | |
"""Convert between active and passive voice occasionally""" | |
# Simple passive to active conversion patterns | |
passive_patterns = [ | |
(r'(\w+) (?:is|are|was|were) (\w+ed|known|seen|used|made) by (.+)', | |
r'\3 \2 \1'), | |
(r'(\w+) (?:is|are|was|were) (\w+ed|known|seen|used|made)', | |
r'Someone \2 \1') | |
] | |
for pattern, replacement in passive_patterns: | |
if re.search(pattern, sentence) and random.random() < 0.3: | |
sentence = re.sub(pattern, replacement, sentence) | |
break | |
return sentence | |
def restructure_complex_sentence(self, sentence): | |
"""Restructure overly complex sentences""" | |
# Split long sentences at natural break points | |
if ',' in sentence and len(sentence.split()) > 15: | |
parts = sentence.split(',', 1) | |
if len(parts) == 2: | |
first_part = parts[0].strip() | |
second_part = parts[1].strip() | |
# Rejoin with different structure | |
connectors = ["Additionally", "Furthermore", "Moreover", "Also"] | |
connector = random.choice(connectors) | |
return f"{first_part}. {connector}, {second_part}" | |
return sentence | |
def apply_subtle_contractions(self, text, intensity_level=1): | |
"""Apply professional contractions sparingly""" | |
# Determine contraction probability based on intensity | |
contraction_probability = { | |
1: 0.2, # Light | |
2: 0.3, # Standard | |
3: 0.4 # Heavy | |
} | |
prob = contraction_probability.get(intensity_level, 0.3) | |
for pattern, contraction in self.professional_contractions.items(): | |
if re.search(pattern, text, re.IGNORECASE) and random.random() < prob: | |
text = re.sub(pattern, contraction, text, flags=re.IGNORECASE) | |
return text | |
def enhance_vocabulary_diversity(self, text, intensity_level=1): | |
"""Enhance vocabulary diversity using contextual synonyms""" | |
words = word_tokenize(text) | |
enhanced_words = [] | |
word_frequency = defaultdict(int) | |
# Determine synonym probability based on intensity | |
synonym_probability = { | |
1: 0.1, # Light | |
2: 0.2, # Standard | |
3: 0.3 # Heavy | |
} | |
prob = synonym_probability.get(intensity_level, 0.2) | |
# Track word frequency to identify repetitive words | |
for word in words: | |
if word.isalpha() and len(word) > 4: | |
word_frequency[word.lower()] += 1 | |
for word in words: | |
if (word.isalpha() and len(word) > 4 and | |
word_frequency[word.lower()] > 1 and | |
random.random() < prob): | |
synonym = self.get_contextual_synonym(word) | |
enhanced_words.append(synonym) | |
else: | |
enhanced_words.append(word) | |
return ' '.join(enhanced_words) | |
def add_natural_variation(self, text, intensity_level=1): | |
"""Add natural human-like variations""" | |
sentences = sent_tokenize(text) | |
varied_sentences = [] | |
# Determine variation probability based on intensity | |
variation_probability = { | |
1: 0.05, # Light | |
2: 0.15, # Standard | |
3: 0.25 # Heavy | |
} | |
prob = variation_probability.get(intensity_level, 0.15) | |
for sentence in sentences: | |
# Occasionally vary sentence length and structure | |
if len(sentence.split()) > 20 and random.random() < prob: | |
# Split very long sentences | |
mid_point = len(sentence.split()) // 2 | |
words = sentence.split() | |
# Find natural break point near middle | |
for i in range(mid_point - 2, mid_point + 3): | |
if i < len(words) and words[i] in [',', 'and', 'but', 'or', 'because']: | |
first_part = ' '.join(words[:i]) | |
second_part = ' '.join(words[i+1:]) | |
sentence = f"{first_part}. {second_part.capitalize()}" | |
break | |
# Add subtle emphasis occasionally | |
if random.random() < prob: | |
sentence = self.add_subtle_emphasis(sentence) | |
varied_sentences.append(sentence) | |
return " ".join(varied_sentences) | |
def add_subtle_emphasis(self, sentence): | |
"""Add very subtle emphasis that doesn't change meaning""" | |
emphasis_patterns = [ | |
(r'\bvery important\b', "crucial"), | |
(r'\bvery significant\b', "highly significant"), | |
(r'\bvery effective\b', "highly effective"), | |
(r'\bvery useful\b', "particularly useful"), | |
(r'\bvery good\b', "excellent"), | |
(r'\bvery bad\b', "poor") | |
] | |
for pattern, replacement in emphasis_patterns: | |
if re.search(pattern, sentence, re.IGNORECASE): | |
sentence = re.sub(pattern, replacement, sentence, flags=re.IGNORECASE) | |
break | |
return sentence | |
def final_coherence_check(self, text): | |
"""Final check to ensure coherence and proper formatting""" | |
# Fix spacing issues | |
text = re.sub(r'\s+', ' ', text) | |
text = re.sub(r'\s+([,.!?;:])', r'\1', text) | |
text = re.sub(r'([,.!?;:])\s*([A-Z])', r'\1 \2', text) | |
# Ensure proper capitalization | |
sentences = sent_tokenize(text) | |
corrected_sentences = [] | |
for sentence in sentences: | |
if sentence and sentence[0].islower(): | |
sentence = sentence[0].upper() + sentence[1:] | |
corrected_sentences.append(sentence) | |
text = " ".join(corrected_sentences) | |
# Remove any double periods or spaces | |
text = re.sub(r'\.+', '.', text) | |
text = re.sub(r'\s+', ' ', text) | |
return text.strip() | |
def advanced_humanize(self, text, intensity_level=1): | |
"""Apply sophisticated humanization that preserves meaning""" | |
current_text = text | |
print(f"Processing with intensity level: {intensity_level}") | |
# Apply humanization techniques with intensity-based parameters | |
current_text = self.preserve_meaning_replacement(current_text, intensity_level) | |
current_text = self.vary_sentence_structure(current_text, intensity_level) | |
current_text = self.enhance_vocabulary_diversity(current_text, intensity_level) | |
current_text = self.apply_subtle_contractions(current_text, intensity_level) | |
current_text = self.add_natural_variation(current_text, intensity_level) | |
# Final coherence and cleanup | |
current_text = self.final_coherence_check(current_text) | |
return current_text | |
def get_readability_score(self, text): | |
"""Calculate readability score""" | |
try: | |
score = flesch_reading_ease(text) | |
grade = flesch_kincaid_grade(text) | |
level = ("Very Easy" if score >= 90 else "Easy" if score >= 80 else | |
"Fairly Easy" if score >= 70 else "Standard" if score >= 60 else | |
"Fairly Difficult" if score >= 50 else "Difficult" if score >= 30 else | |
"Very Difficult") | |
return f"Flesch Score: {score:.1f} ({level})\nGrade Level: {grade:.1f}" | |
except Exception as e: | |
return f"Could not calculate readability: {str(e)}" | |
def humanize_text(self, text, intensity="standard"): | |
"""Main humanization method with meaning preservation""" | |
if not text or not text.strip(): | |
return "Please provide text to humanize." | |
try: | |
text = text.strip() | |
# Test NLTK functionality | |
try: | |
test_tokens = sent_tokenize("This is a test sentence.") | |
if not test_tokens: | |
raise Exception("NLTK tokenization failed") | |
except Exception as nltk_error: | |
return f"NLTK Error: {str(nltk_error)}. Please try again." | |
# Map intensity to numeric levels | |
intensity_mapping = { | |
"light": 1, | |
"standard": 2, | |
"heavy": 3 | |
} | |
intensity_level = intensity_mapping.get(intensity, 2) | |
print(f"Using intensity: {intensity} (level {intensity_level})") | |
# Apply humanization | |
result = self.advanced_humanize(text, intensity_level) | |
return result | |
except Exception as e: | |
return f"Error processing text: {str(e)}" | |
def create_interface(): | |
"""Create the professional Gradio interface""" | |
humanizer = AdvancedAIHumanizer() | |
def process_text(input_text, intensity): | |
if not input_text: | |
return "Please enter some text to humanize.", "No text provided." | |
try: | |
result = humanizer.humanize_text(input_text, intensity) | |
score = humanizer.get_readability_score(result) | |
return result, score | |
except Exception as e: | |
return f"Error: {str(e)}", "Processing error" | |
# Professional CSS styling | |
professional_css = """ | |
.gradio-container { | |
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; | |
} | |
.main-header { | |
text-align: center; | |
color: #2c3e50; | |
font-size: 2.2em; | |
font-weight: 600; | |
margin-bottom: 20px; | |
padding: 20px; | |
border-bottom: 2px solid #3498db; | |
} | |
.feature-box { | |
background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%); | |
border-radius: 8px; | |
padding: 20px; | |
margin: 15px 0; | |
border-left: 4px solid #3498db; | |
box-shadow: 0 2px 4px rgba(0,0,0,0.1); | |
} | |
.info-box { | |
background: #e8f5e8; | |
border-radius: 8px; | |
padding: 15px; | |
margin: 10px 0; | |
border-left: 4px solid #27ae60; | |
} | |
""" | |
with gr.Blocks( | |
title="Professional AI Humanizer", | |
theme=gr.themes.Soft(), | |
css=professional_css | |
) as interface: | |
gr.HTML(""" | |
<div class="main-header"> | |
π― Professional AI Content Humanizer | |
</div> | |
<div style="text-align: center; margin-bottom: 30px;"> | |
<h3>Meaning-Preserving AI Detection Bypass</h3> | |
<p style="font-size: 1.1em; color: #7f8c8d;"> | |
Advanced humanization while maintaining professional tone and original meaning | |
</p> | |
</div> | |
""") | |
with gr.Row(): | |
with gr.Column(scale=1): | |
input_text = gr.Textbox( | |
label="π Original Content", | |
lines=12, | |
placeholder="Enter your AI-generated content here...\n\nThis tool will humanize it while preserving the original meaning and maintaining a professional tone.", | |
info="π‘ Best results with content 100+ words", | |
show_copy_button=True | |
) | |
intensity = gr.Radio( | |
choices=[ | |
("Light Processing (30% changes)", "light"), | |
("Standard Processing (50% changes)", "standard"), | |
("Heavy Processing (70% changes)", "heavy") | |
], | |
value="standard", | |
label="π§ Processing Intensity", | |
info="Choose how extensively to humanize the content" | |
) | |
btn = gr.Button( | |
"π Humanize Content", | |
variant="primary", | |
size="lg" | |
) | |
with gr.Column(scale=1): | |
output_text = gr.Textbox( | |
label="β Humanized Content", | |
lines=12, | |
show_copy_button=True, | |
info="Processed content ready for use" | |
) | |
readability = gr.Textbox( | |
label="π Content Analysis", | |
lines=3, | |
info="Readability metrics" | |
) | |
gr.HTML(""" | |
<div class="feature-box"> | |
<h3>π― Processing Intensity Levels:</h3> | |
<div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(300px, 1fr)); gap: 15px; margin: 15px 0;"> | |
<div class="info-box"> | |
<strong>π’ Light Processing (30%):</strong><br> | |
β’ Minimal word replacements<br> | |
β’ Basic sentence variation<br> | |
β’ Subtle changes only<br> | |
β’ Best for: Already human-like content | |
</div> | |
<div class="info-box"> | |
<strong>π‘ Standard Processing (50%):</strong><br> | |
β’ Moderate humanization<br> | |
β’ Balanced approach<br> | |
β’ Professional tone maintained<br> | |
β’ Best for: Most AI-generated content | |
</div> | |
<div class="info-box"> | |
<strong>π΄ Heavy Processing (70%):</strong><br> | |
β’ Extensive modifications<br> | |
β’ Maximum variation<br> | |
β’ Strong AI detection bypass<br> | |
β’ Best for: Highly detectable AI text | |
</div> | |
</div> | |
</div> | |
""") | |
gr.HTML(""" | |
<div class="feature-box"> | |
<h3>π Advanced Humanization Features:</h3> | |
<div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(250px, 1fr)); gap: 15px; margin: 15px 0;"> | |
<div class="info-box"> | |
<strong>π Meaning Preservation:</strong><br> | |
Maintains exact original meaning and intent | |
</div> | |
<div class="info-box"> | |
<strong>π Professional Tone:</strong><br> | |
Keeps appropriate formality level | |
</div> | |
<div class="info-box"> | |
<strong>π Structure Variation:</strong><br> | |
Natural sentence pattern diversity | |
</div> | |
<div class="info-box"> | |
<strong>π Smart Synonyms:</strong><br> | |
Context-aware vocabulary enhancement | |
</div> | |
<div class="info-box"> | |
<strong>π Coherent Flow:</strong><br> | |
Maintains logical progression | |
</div> | |
<div class="info-box"> | |
<strong>β‘ Detection Bypass:</strong><br> | |
Passes modern AI detection tools | |
</div> | |
</div> | |
</div> | |
""") | |
# Event handlers | |
btn.click( | |
fn=process_text, | |
inputs=[input_text, intensity], | |
outputs=[output_text, readability] | |
) | |
input_text.submit( | |
fn=process_text, | |
inputs=[input_text, intensity], | |
outputs=[output_text, readability] | |
) | |
return interface | |
if __name__ == "__main__": | |
print("π Starting Professional AI Humanizer...") | |
app = create_interface() | |
app.launch( | |
server_name="0.0.0.0", | |
server_port=7860, | |
show_error=True, | |
share=False | |
) | |