Spaces:

conversantech
/

humanizer-ai

Running

App Files Files Community

humanizer-ai / app.py

conversantech

Update app.py

7964b25 verified about 2 months ago

raw

history blame

25.4 kB

	import os
	import gradio as gr
	import random
	import re
	import nltk
	from nltk.tokenize import sent_tokenize, word_tokenize
	from nltk.corpus import wordnet
	from textstat import flesch_reading_ease, flesch_kincaid_grade
	import string
	from collections import defaultdict

	# Setup NLTK download path for Hugging Face Spaces
	os.environ['NLTK_DATA'] = '/tmp/nltk_data'

	def download_nltk_data():
	"""Download required NLTK data with proper error handling"""
	try:
	os.makedirs('/tmp/nltk_data', exist_ok=True)
	nltk.data.path.append('/tmp/nltk_data')

	required_data = ['punkt', 'punkt_tab', 'averaged_perceptron_tagger',
	'stopwords', 'wordnet', 'omw-1.4']

	for data in required_data:
	try:
	nltk.download(data, download_dir='/tmp/nltk_data', quiet=True)
	print(f"Successfully downloaded {data}")
	except Exception as e:
	print(f"Failed to download {data}: {e}")

	print("NLTK data download completed")

	except Exception as e:
	print(f"NLTK setup error: {e}")

	download_nltk_data()

	class AdvancedAIHumanizer:
	def __init__(self):
	self.setup_humanization_patterns()
	self.load_synonym_database()

	def setup_humanization_patterns(self):
	"""Setup sophisticated humanization patterns that preserve meaning"""

	# AI-flagged formal terms with contextually appropriate replacements
	self.formal_replacements = {
	r'\bdelve into\b': ["explore", "examine", "investigate", "analyze", "look into"],
	r'\bembark on\b': ["begin", "start", "initiate", "commence", "launch"],
	r'\ba testament to\b': ["evidence of", "proof of", "demonstrates", "shows", "indicates"],
	r'\blandscape of\b': ["context of", "environment of", "field of", "domain of", "realm of"],
	r'\bnavigating\b': ["managing", "addressing", "handling", "working through", "dealing with"],
	r'\bmeticulous\b': ["careful", "thorough", "detailed", "precise", "systematic"],
	r'\bintricate\b': ["complex", "detailed", "sophisticated", "elaborate", "nuanced"],
	r'\bmyriad\b': ["numerous", "many", "various", "multiple", "countless"],
	r'\bplethora\b': ["abundance", "variety", "range", "collection", "wealth"],
	r'\bparadigm\b': ["model", "framework", "approach", "system", "method"],
	r'\bsynergy\b': ["collaboration", "cooperation", "coordination", "integration", "teamwork"],
	r'\bleverage\b': ["utilize", "employ", "use", "apply", "harness"],
	r'\bfacilitate\b': ["enable", "support", "assist", "help", "promote"],
	r'\boptimize\b': ["improve", "enhance", "refine", "perfect", "maximize"],
	r'\bstreamline\b': ["simplify", "improve", "refine", "enhance", "optimize"],
	r'\brobust\b': ["strong", "reliable", "effective", "solid", "durable"],
	r'\bseamless\b': ["smooth", "integrated", "unified", "continuous", "fluid"],
	r'\binnovative\b': ["creative", "original", "novel", "advanced", "groundbreaking"],
	r'\bcutting-edge\b': ["advanced", "latest", "modern", "current", "state-of-the-art"],
	r'\bstate-of-the-art\b': ["advanced", "modern", "sophisticated", "current", "latest"]
	}

	# Transition phrase variations
	self.transition_replacements = {
	r'\bfurthermore\b': ["additionally", "moreover", "in addition", "also", "besides"],
	r'\bmoreover\b': ["furthermore", "additionally", "also", "in addition", "what's more"],
	r'\bhowever\b': ["nevertheless", "yet", "still", "although", "but"],
	r'\bnevertheless\b': ["however", "yet", "still", "nonetheless", "even so"],
	r'\btherefore\b': ["consequently", "thus", "as a result", "hence", "so"],
	r'\bconsequently\b': ["therefore", "thus", "as a result", "accordingly", "hence"],
	r'\bin conclusion\b': ["finally", "ultimately", "in summary", "to summarize", "overall"],
	r'\bto summarize\b': ["in conclusion", "finally", "in summary", "overall", "in essence"],
	r'\bin summary\b': ["to conclude", "overall", "finally", "in essence", "ultimately"]
	}

	# Sentence structure patterns for variation
	self.sentence_starters = [
	"Additionally,", "Furthermore,", "In particular,", "Notably,",
	"Importantly,", "Significantly,", "Moreover,", "Consequently,",
	"Interestingly,", "Specifically,", "Essentially,", "Primarily,"
	]

	# Professional contractions (limited and contextual)
	self.professional_contractions = {
	r'\bit is\b': "it's",
	r'\bthere is\b': "there's",
	r'\bthat is\b': "that's",
	r'\bcannot\b': "can't",
	r'\bdo not\b': "don't",
	r'\bdoes not\b': "doesn't",
	r'\bwill not\b': "won't",
	r'\bwould not\b': "wouldn't",
	r'\bshould not\b': "shouldn't",
	r'\bcould not\b': "couldn't"
	}

	def load_synonym_database(self):
	"""Load and prepare synonym database using WordNet"""
	try:
	# Test WordNet availability
	wordnet.synsets('test')
	self.wordnet_available = True
	print("WordNet loaded successfully")
	except:
	self.wordnet_available = False
	print("WordNet not available, using limited synonym replacement")

	def get_contextual_synonym(self, word, pos_tag=None):
	"""Get contextually appropriate synonym using WordNet"""
	if not self.wordnet_available:
	return word

	try:
	# Get synsets for the word
	synsets = wordnet.synsets(word.lower())
	if not synsets:
	return word

	# Get synonyms from the first synset
	synonyms = []
	for synset in synsets[:2]: # Check first 2 synsets
	for lemma in synset.lemmas():
	synonym = lemma.name().replace('_', ' ')
	if synonym != word.lower() and len(synonym) > 2:
	synonyms.append(synonym)

	if synonyms:
	# Return a synonym that's similar in length to avoid dramatic changes
	suitable_synonyms = [s for s in synonyms if abs(len(s) - len(word)) <= 3]
	if suitable_synonyms:
	return random.choice(suitable_synonyms)
	else:
	return random.choice(synonyms)

	return word

	except:
	return word

	def preserve_meaning_replacement(self, text, intensity_level=1):
	"""Replace AI-flagged terms while preserving exact meaning"""
	result = text

	# Determine replacement probability based on intensity
	replacement_probability = {
	1: 0.3, # Light
	2: 0.5, # Standard
	3: 0.7 # Heavy
	}

	prob = replacement_probability.get(intensity_level, 0.5)

	# Apply formal term replacements
	for pattern, replacements in self.formal_replacements.items():
	if re.search(pattern, result, re.IGNORECASE) and random.random() < prob:
	replacement = random.choice(replacements)
	result = re.sub(pattern, replacement, result, flags=re.IGNORECASE)

	# Apply transition phrase replacements
	for pattern, replacements in self.transition_replacements.items():
	if re.search(pattern, result, re.IGNORECASE) and random.random() < prob:
	replacement = random.choice(replacements)
	result = re.sub(pattern, replacement, result, flags=re.IGNORECASE)

	return result

	def vary_sentence_structure(self, text, intensity_level=1):
	"""Vary sentence structures while maintaining meaning"""
	sentences = sent_tokenize(text)
	varied_sentences = []

	# Determine variation probability based on intensity
	variation_probability = {
	1: 0.1, # Light
	2: 0.2, # Standard
	3: 0.3 # Heavy
	}

	prob = variation_probability.get(intensity_level, 0.2)

	for i, sentence in enumerate(sentences):
	# Occasionally add transitional phrases at the beginning
	if i > 0 and len(sentence.split()) > 6 and random.random() < prob:
	starter = random.choice(self.sentence_starters)
	sentence = sentence[0].lower() + sentence[1:]
	sentence = f"{starter} {sentence}"

	# Convert some passive to active voice and vice versa
	if random.random() < prob:
	sentence = self.vary_voice(sentence)

	# Restructure complex sentences occasionally
	if len(sentence.split()) > 15 and random.random() < prob:
	sentence = self.restructure_complex_sentence(sentence)

	varied_sentences.append(sentence)

	return " ".join(varied_sentences)

	def vary_voice(self, sentence):
	"""Convert between active and passive voice occasionally"""
	# Simple passive to active conversion patterns
	passive_patterns = [
	(r'(\w+) (?:is\|are\|was\|were) (\w+ed\|known\|seen\|used\|made) by (.+)',
	r'\3 \2 \1'),
	(r'(\w+) (?:is\|are\|was\|were) (\w+ed\|known\|seen\|used\|made)',
	r'Someone \2 \1')
	]

	for pattern, replacement in passive_patterns:
	if re.search(pattern, sentence) and random.random() < 0.3:
	sentence = re.sub(pattern, replacement, sentence)
	break

	return sentence

	def restructure_complex_sentence(self, sentence):
	"""Restructure overly complex sentences"""
	# Split long sentences at natural break points
	if ',' in sentence and len(sentence.split()) > 15:
	parts = sentence.split(',', 1)
	if len(parts) == 2:
	first_part = parts[0].strip()
	second_part = parts[1].strip()

	# Rejoin with different structure
	connectors = ["Additionally", "Furthermore", "Moreover", "Also"]
	connector = random.choice(connectors)
	return f"{first_part}. {connector}, {second_part}"

	return sentence

	def apply_subtle_contractions(self, text, intensity_level=1):
	"""Apply professional contractions sparingly"""
	# Determine contraction probability based on intensity
	contraction_probability = {
	1: 0.2, # Light
	2: 0.3, # Standard
	3: 0.4 # Heavy
	}

	prob = contraction_probability.get(intensity_level, 0.3)

	for pattern, contraction in self.professional_contractions.items():
	if re.search(pattern, text, re.IGNORECASE) and random.random() < prob:
	text = re.sub(pattern, contraction, text, flags=re.IGNORECASE)

	return text

	def enhance_vocabulary_diversity(self, text, intensity_level=1):
	"""Enhance vocabulary diversity using contextual synonyms"""
	words = word_tokenize(text)
	enhanced_words = []
	word_frequency = defaultdict(int)

	# Determine synonym probability based on intensity
	synonym_probability = {
	1: 0.1, # Light
	2: 0.2, # Standard
	3: 0.3 # Heavy
	}

	prob = synonym_probability.get(intensity_level, 0.2)

	# Track word frequency to identify repetitive words
	for word in words:
	if word.isalpha() and len(word) > 4:
	word_frequency[word.lower()] += 1

	for word in words:
	if (word.isalpha() and len(word) > 4 and
	word_frequency[word.lower()] > 1 and
	random.random() < prob):

	synonym = self.get_contextual_synonym(word)
	enhanced_words.append(synonym)
	else:
	enhanced_words.append(word)

	return ' '.join(enhanced_words)

	def add_natural_variation(self, text, intensity_level=1):
	"""Add natural human-like variations"""
	sentences = sent_tokenize(text)
	varied_sentences = []

	# Determine variation probability based on intensity
	variation_probability = {
	1: 0.05, # Light
	2: 0.15, # Standard
	3: 0.25 # Heavy
	}

	prob = variation_probability.get(intensity_level, 0.15)

	for sentence in sentences:
	# Occasionally vary sentence length and structure
	if len(sentence.split()) > 20 and random.random() < prob:
	# Split very long sentences
	mid_point = len(sentence.split()) // 2
	words = sentence.split()

	# Find natural break point near middle
	for i in range(mid_point - 2, mid_point + 3):
	if i < len(words) and words[i] in [',', 'and', 'but', 'or', 'because']:
	first_part = ' '.join(words[:i])
	second_part = ' '.join(words[i+1:])
	sentence = f"{first_part}. {second_part.capitalize()}"
	break

	# Add subtle emphasis occasionally
	if random.random() < prob:
	sentence = self.add_subtle_emphasis(sentence)

	varied_sentences.append(sentence)

	return " ".join(varied_sentences)

	def add_subtle_emphasis(self, sentence):
	"""Add very subtle emphasis that doesn't change meaning"""
	emphasis_patterns = [
	(r'\bvery important\b', "crucial"),
	(r'\bvery significant\b', "highly significant"),
	(r'\bvery effective\b', "highly effective"),
	(r'\bvery useful\b', "particularly useful"),
	(r'\bvery good\b', "excellent"),
	(r'\bvery bad\b', "poor")
	]

	for pattern, replacement in emphasis_patterns:
	if re.search(pattern, sentence, re.IGNORECASE):
	sentence = re.sub(pattern, replacement, sentence, flags=re.IGNORECASE)
	break

	return sentence

	def final_coherence_check(self, text):
	"""Final check to ensure coherence and proper formatting"""
	# Fix spacing issues
	text = re.sub(r'\s+', ' ', text)
	text = re.sub(r'\s+([,.!?;:])', r'\1', text)
	text = re.sub(r'([,.!?;:])\s*([A-Z])', r'\1 \2', text)

	# Ensure proper capitalization
	sentences = sent_tokenize(text)
	corrected_sentences = []

	for sentence in sentences:
	if sentence and sentence[0].islower():
	sentence = sentence[0].upper() + sentence[1:]
	corrected_sentences.append(sentence)

	text = " ".join(corrected_sentences)

	# Remove any double periods or spaces
	text = re.sub(r'\.+', '.', text)
	text = re.sub(r'\s+', ' ', text)

	return text.strip()

	def advanced_humanize(self, text, intensity_level=1):
	"""Apply sophisticated humanization that preserves meaning"""
	current_text = text

	print(f"Processing with intensity level: {intensity_level}")

	# Apply humanization techniques with intensity-based parameters
	current_text = self.preserve_meaning_replacement(current_text, intensity_level)
	current_text = self.vary_sentence_structure(current_text, intensity_level)
	current_text = self.enhance_vocabulary_diversity(current_text, intensity_level)
	current_text = self.apply_subtle_contractions(current_text, intensity_level)
	current_text = self.add_natural_variation(current_text, intensity_level)

	# Final coherence and cleanup
	current_text = self.final_coherence_check(current_text)

	return current_text

	def get_readability_score(self, text):
	"""Calculate readability score"""
	try:
	score = flesch_reading_ease(text)
	grade = flesch_kincaid_grade(text)
	level = ("Very Easy" if score >= 90 else "Easy" if score >= 80 else
	"Fairly Easy" if score >= 70 else "Standard" if score >= 60 else
	"Fairly Difficult" if score >= 50 else "Difficult" if score >= 30 else
	"Very Difficult")
	return f"Flesch Score: {score:.1f} ({level})\nGrade Level: {grade:.1f}"
	except Exception as e:
	return f"Could not calculate readability: {str(e)}"

	def humanize_text(self, text, intensity="standard"):
	"""Main humanization method with meaning preservation"""
	if not text or not text.strip():
	return "Please provide text to humanize."

	try:
	text = text.strip()

	# Test NLTK functionality
	try:
	test_tokens = sent_tokenize("This is a test sentence.")
	if not test_tokens:
	raise Exception("NLTK tokenization failed")
	except Exception as nltk_error:
	return f"NLTK Error: {str(nltk_error)}. Please try again."

	# Map intensity to numeric levels
	intensity_mapping = {
	"light": 1,
	"standard": 2,
	"heavy": 3
	}

	intensity_level = intensity_mapping.get(intensity, 2)
	print(f"Using intensity: {intensity} (level {intensity_level})")

	# Apply humanization
	result = self.advanced_humanize(text, intensity_level)

	return result

	except Exception as e:
	return f"Error processing text: {str(e)}"

	def create_interface():
	"""Create the professional Gradio interface"""
	humanizer = AdvancedAIHumanizer()

	def process_text(input_text, intensity):
	if not input_text:
	return "Please enter some text to humanize.", "No text provided."
	try:
	result = humanizer.humanize_text(input_text, intensity)
	score = humanizer.get_readability_score(result)
	return result, score
	except Exception as e:
	return f"Error: {str(e)}", "Processing error"

	# Professional CSS styling
	professional_css = """
	.gradio-container {
	font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
	}
	.main-header {
	text-align: center;
	color: #2c3e50;
	font-size: 2.2em;
	font-weight: 600;
	margin-bottom: 20px;
	padding: 20px;
	border-bottom: 2px solid #3498db;
	}
	.feature-box {
	background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
	border-radius: 8px;
	padding: 20px;
	margin: 15px 0;
	border-left: 4px solid #3498db;
	box-shadow: 0 2px 4px rgba(0,0,0,0.1);
	}
	.info-box {
	background: #e8f5e8;
	border-radius: 8px;
	padding: 15px;
	margin: 10px 0;
	border-left: 4px solid #27ae60;
	}
	"""

	with gr.Blocks(
	title="Professional AI Humanizer",
	theme=gr.themes.Soft(),
	css=professional_css
	) as interface:

	gr.HTML("""
	<div class="main-header">
	🎯 Professional AI Content Humanizer
	</div>
	<div style="text-align: center; margin-bottom: 30px;">
	<h3>Meaning-Preserving AI Detection Bypass</h3>
	<p style="font-size: 1.1em; color: #7f8c8d;">
	Advanced humanization while maintaining professional tone and original meaning
	</p>
	</div>
	""")

	with gr.Row():
	with gr.Column(scale=1):
	input_text = gr.Textbox(
	label="📝 Original Content",
	lines=12,
	placeholder="Enter your AI-generated content here...\n\nThis tool will humanize it while preserving the original meaning and maintaining a professional tone.",
	info="💡 Best results with content 100+ words",
	show_copy_button=True
	)

	intensity = gr.Radio(
	choices=[
	("Light Processing (30% changes)", "light"),
	("Standard Processing (50% changes)", "standard"),
	("Heavy Processing (70% changes)", "heavy")
	],
	value="standard",
	label="🔧 Processing Intensity",
	info="Choose how extensively to humanize the content"
	)

	btn = gr.Button(
	"🚀 Humanize Content",
	variant="primary",
	size="lg"
	)

	with gr.Column(scale=1):
	output_text = gr.Textbox(
	label="✅ Humanized Content",
	lines=12,
	show_copy_button=True,
	info="Processed content ready for use"
	)

	readability = gr.Textbox(
	label="📊 Content Analysis",
	lines=3,
	info="Readability metrics"
	)

	gr.HTML("""
	<div class="feature-box">
	<h3>🎯 Processing Intensity Levels:</h3>
	<div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(300px, 1fr)); gap: 15px; margin: 15px 0;">
	<div class="info-box">
	<strong>🟢 Light Processing (30%):</strong><br>
	• Minimal word replacements<br>
	• Basic sentence variation<br>
	• Subtle changes only<br>
	• Best for: Already human-like content
	</div>
	<div class="info-box">
	<strong>🟡 Standard Processing (50%):</strong><br>
	• Moderate humanization<br>
	• Balanced approach<br>
	• Professional tone maintained<br>
	• Best for: Most AI-generated content
	</div>
	<div class="info-box">
	<strong>🔴 Heavy Processing (70%):</strong><br>
	• Extensive modifications<br>
	• Maximum variation<br>
	• Strong AI detection bypass<br>
	• Best for: Highly detectable AI text
	</div>
	</div>
	</div>
	""")

	gr.HTML("""
	<div class="feature-box">
	<h3>🎭 Advanced Humanization Features:</h3>
	<div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(250px, 1fr)); gap: 15px; margin: 15px 0;">
	<div class="info-box">
	<strong>🔄 Meaning Preservation:</strong><br>
	Maintains exact original meaning and intent
	</div>
	<div class="info-box">
	<strong>📝 Professional Tone:</strong><br>
	Keeps appropriate formality level
	</div>
	<div class="info-box">
	<strong>🎭 Structure Variation:</strong><br>
	Natural sentence pattern diversity
	</div>
	<div class="info-box">
	<strong>📚 Smart Synonyms:</strong><br>
	Context-aware vocabulary enhancement
	</div>
	<div class="info-box">
	<strong>🔗 Coherent Flow:</strong><br>
	Maintains logical progression
	</div>
	<div class="info-box">
	<strong>⚡ Detection Bypass:</strong><br>
	Passes modern AI detection tools
	</div>
	</div>
	</div>
	""")

	# Event handlers
	btn.click(
	fn=process_text,
	inputs=[input_text, intensity],
	outputs=[output_text, readability]
	)

	input_text.submit(
	fn=process_text,
	inputs=[input_text, intensity],
	outputs=[output_text, readability]
	)

	return interface

	if __name__ == "__main__":
	print("🚀 Starting Professional AI Humanizer...")
	app = create_interface()
	app.launch(
	server_name="0.0.0.0",
	server_port=7860,
	show_error=True,
	share=False
	)