Spaces:

conversantech
/

humanizer-ai

Running

App Files Files Community

conversantech commited on Jun 25

Commit

5a4c20a

1 Parent(s): 9d7abc9

changes

Browse files

Files changed (2) hide show

app.py +62 -126
humanization_utils.py +0 -317

app.py CHANGED Viewed

@@ -6,148 +6,62 @@ import nltk
 from nltk.tokenize import sent_tokenize, word_tokenize
 from textstat import flesch_reading_ease, flesch_kincaid_grade
-# Setup NLTK download path
 os.environ['NLTK_DATA'] = '/tmp/nltk_data'
-# Download required NLTK data
-try:
-    nltk.download('punkt', download_dir='/tmp/nltk_data')
-    nltk.download('averaged_perceptron_tagger', download_dir='/tmp/nltk_data')
-    nltk.download('stopwords', download_dir='/tmp/nltk_data')
-    nltk.data.path.append('/tmp/nltk_data')
-    print("NLTK data downloaded successfully")
-except Exception as e:
-    print(f"NLTK download error: {e}")
 class AIContentHumanizer:
     def __init__(self):
         self.setup_humanization_patterns()
     def setup_humanization_patterns(self):
         self.ai_replacements = {
             r'\bit is important to note that\b': ["worth mentioning that", "keep in mind that", "note that"],
-            r'\bit should be noted that\b': ["remember that", "worth noting that", "keep in mind"],
-            r'\bin conclusion\b': ["to wrap up", "all in all", "bottom line"],
-            r'\bto conclude\b': ["to wrap up", "all in all", "in the end"],
-            r'\bfurthermore\b': ["also", "plus", "what's more"],
-            r'\bmoreover\b': ["also", "plus", "and"],
-            r'\bhowever\b': ["but", "though", "yet"],
-            r'\btherefore\b': ["so", "that's why", "which means"],
-            r'\bconsequently\b': ["so", "as a result", "that's why"],
-            r'\bsignificant(?:ly)?\b': ["big", "major", "important"],
-            r'\bnumerous\b': ["many", "lots of", "plenty of"],
-            r'\butilize\b': ["use", "make use of", "work with"],
-            r'\bdemonstrate\b': ["show", "prove", "make clear"],
-            r'\bfacilitate\b': ["help", "make easier", "enable"],
-            r'\bimplement\b': ["put in place", "set up", "start using"],
-            r'\bvarious\b': ["different", "several", "many"],
-            r'\bsubstantial\b': ["big", "major", "significant"]
         }
-        self.contractions = {
-            r'\bit is\b': "it's",
-            r'\bthat is\b': "that's",
-            r'\bwe are\b': "we're",
-            r'\bthey are\b': "they're",
-            r'\byou are\b': "you're",
-            r'\bi am\b': "I'm",
-            r'\bhe is\b': "he's",
-            r'\bshe is\b': "she's",
-            r'\bwill not\b': "won't",
-            r'\bcannot\b': "can't",
-            r'\bdo not\b': "don't",
-            r'\bdoes not\b': "doesn't",
-            r'\bdid not\b': "didn't",
-            r'\bhave not\b': "haven't",
-            r'\bhas not\b': "hasn't",
-            r'\bhad not\b': "hadn't",
-            r'\bwould not\b': "wouldn't",
-            r'\bshould not\b': "shouldn't",
-            r'\bcould not\b': "couldn't",
-            r'\bis not\b': "isn't",
-            r'\bare not\b': "aren't",
-            r'\bwas not\b': "wasn't",
-            r'\bwere not\b': "weren't"
-        }
-        self.human_fillers = ['actually', 'basically', 'really', 'pretty much']
-        self.opinion_markers = ["I think", "I believe", "In my opinion"]
-        self.casual_starters = ["Look,", "Listen,", "Here's the thing:"]
-    def replace_ai_phrases(self, text):
-        for pattern, replacements in self.ai_replacements.items():
-            matches = re.finditer(pattern, text, re.IGNORECASE)
-            for match in reversed(list(matches)):
-                replacement = random.choice(replacements)
-                start, end = match.span()
-                if text[start].isupper():
-                    replacement = replacement.capitalize()
-                text = text[:start] + replacement + text[end:]
-        return text
-    def add_contractions(self, text):
-        for pattern, contraction in self.contractions.items():
-            text = re.sub(pattern, contraction, text, flags=re.IGNORECASE)
-        return text
-    def add_personal_touches(self, text):
-        sentences = sent_tokenize(text)
-        modified = []
-        for i, s in enumerate(sentences):
-            if random.random() < 0.3:
-                s = random.choice(self.opinion_markers) + " " + s.lower()
-            elif i == 0 and random.random() < 0.2:
-                s = random.choice(self.casual_starters) + " " + s.lower()
-            modified.append(s)
-        return ' '.join(modified)
-    def add_natural_fillers(self, text):
-        sentences = sent_tokenize(text)
-        modified = []
-        for s in sentences:
-            words = s.split()
-            if len(words) > 6 and random.random() < 0.3:
-                words.insert(random.randint(1, min(4, len(words)-1)), random.choice(self.human_fillers))
-            modified.append(' '.join(words))
-        return ' '.join(modified)
-    def vary_sentence_structure(self, text):
-        sentences = sent_tokenize(text)
-        modified, skip = [], False
-        for i in range(len(sentences)):
-            if skip:
-                skip = False
-                continue
-            if i < len(sentences)-1 and len(sentences[i].split()) < 8 and len(sentences[i+1].split()) < 8 and random.random() < 0.4:
-                combined = sentences[i].rstrip('.!?') + ', ' + sentences[i+1].lower()
-                modified.append(combined)
-                skip = True
-            else:
-                modified.append(sentences[i])
-        return ' '.join(modified)
-    def add_casual_punctuation(self, text):
-        sentences = sent_tokenize(text)
-        modified = []
-        for i, s in enumerate(sentences):
-            if random.random() < 0.1 and i == len(sentences) - 1:
-                s = s.rstrip('.!?') + '...'
-            elif random.random() < 0.15 and any(word in s.lower() for word in ['amazing', 'incredible']):
-                s = s.rstrip('.') + '!'
-            modified.append(s)
-        return ' '.join(modified)
-    def clean_text(self, text):
-        text = re.sub(r'\s+', ' ', text)
-        text = re.sub(r'\s+([.!?])', r'\1', text)
-        text = re.sub(r'([.!?])\s*([A-Z])', r'\1 \2', text)
-        def cap(match): return match.group(1) + ' ' + match.group(2).upper()
-        text = re.sub(r'([.!?])\s+([a-z])', cap, text)
-        return text.strip()
     def get_readability_score(self, text):
         try:
             score = flesch_reading_ease(text)
             grade = flesch_kincaid_grade(text)
-            level = ("Very Easy" if score >= 90 else "Easy" if score >= 80 else "Fairly Easy" if score >= 70 else "Standard" if score >= 60 else "Fairly Difficult" if score >= 50 else "Difficult" if score >= 30 else "Very Difficult")
             return f"Flesch Score: {score:.1f} ({level})\nGrade Level: {grade:.1f}"
         except Exception as e:
             return f"Could not calculate readability: {str(e)}"
@@ -155,22 +69,41 @@ class AIContentHumanizer:
     def humanize_text(self, text, intensity="medium"):
         if not text or not text.strip():
             return "Please provide text to humanize."
         try:
             text = text.strip()
             text = self.replace_ai_phrases(text)
             text = self.add_contractions(text)
             if intensity in ["medium", "heavy"]:
                 text = self.vary_sentence_structure(text)
                 text = self.add_personal_touches(text)
                 text = self.add_casual_punctuation(text)
             if intensity == "heavy":
                 text = self.add_natural_fillers(text)
             return self.clean_text(text)
         except Exception as e:
             return f"Error processing text: {str(e)}\n\nOriginal text: {text}"
 def create_interface():
     humanizer = AIContentHumanizer()
     def process_text(input_text, intensity):
         if not input_text:
             return "Please enter some text to humanize.", "No text provided."
@@ -184,13 +117,16 @@ def create_interface():
     with gr.Blocks(title="AI Content Humanizer") as interface:
         gr.Markdown("""# 🤖➡️👤 AI Content Humanizer
 Transform AI-generated content into human-sounding, casual, and readable text!""")
         input_text = gr.Textbox(label="AI-generated Text", lines=8)
         intensity = gr.Radio(["light", "medium", "heavy"], value="medium", label="Humanization Level")
         output_text = gr.Textbox(label="Humanized Text", lines=8, show_copy_button=True)
         readability = gr.Textbox(label="Readability Score", lines=2)
         btn = gr.Button("Humanize Text")
         btn.click(fn=process_text, inputs=[input_text, intensity], outputs=[output_text, readability])
         input_text.submit(fn=process_text, inputs=[input_text, intensity], outputs=[output_text, readability])
     return interface
 if __name__ == "__main__":

 from nltk.tokenize import sent_tokenize, word_tokenize
 from textstat import flesch_reading_ease, flesch_kincaid_grade
+# Setup NLTK download path for Hugging Face Spaces
 os.environ['NLTK_DATA'] = '/tmp/nltk_data'
+def download_nltk_data():
+    """Download required NLTK data with proper error handling"""
+    try:
+        # Create the directory if it doesn't exist
+        os.makedirs('/tmp/nltk_data', exist_ok=True)
+        # Add the path to NLTK's data path
+        nltk.data.path.append('/tmp/nltk_data')
+        # Download required NLTK data - use punkt_tab for newer NLTK versions
+        required_data = [
+            'punkt_tab',  # For newer NLTK versions (3.9+)
+            'punkt',      # Fallback for older versions
+            'averaged_perceptron_tagger',
+            'stopwords'
+        ]
+        for data in required_data:
+            try:
+                nltk.download(data, download_dir='/tmp/nltk_data', quiet=True)
+                print(f"Successfully downloaded {data}")
+            except Exception as e:
+                print(f"Failed to download {data}: {e}")
+        print("NLTK data download completed")
+        print(f"NLTK data paths: {nltk.data.path}")
+    except Exception as e:
+        print(f"NLTK setup error: {e}")
+# Download NLTK data at startup
+download_nltk_data()
 class AIContentHumanizer:
     def __init__(self):
         self.setup_humanization_patterns()
     def setup_humanization_patterns(self):
+        # Your existing patterns code here...
         self.ai_replacements = {
             r'\bit is important to note that\b': ["worth mentioning that", "keep in mind that", "note that"],
+            # ... rest of your patterns
         }
+        # ... rest of your existing code
     def get_readability_score(self, text):
         try:
             score = flesch_reading_ease(text)
             grade = flesch_kincaid_grade(text)
+            level = ("Very Easy" if score >= 90 else "Easy" if score >= 80 else
+                    "Fairly Easy" if score >= 70 else "Standard" if score >= 60 else
+                    "Fairly Difficult" if score >= 50 else "Difficult" if score >= 30 else
+                    "Very Difficult")
             return f"Flesch Score: {score:.1f} ({level})\nGrade Level: {grade:.1f}"
         except Exception as e:
             return f"Could not calculate readability: {str(e)}"
     def humanize_text(self, text, intensity="medium"):
         if not text or not text.strip():
             return "Please provide text to humanize."
         try:
             text = text.strip()
+            # Test NLTK functionality before proceeding
+            try:
+                # Try to tokenize a simple sentence to verify NLTK is working
+                test_tokens = sent_tokenize("This is a test sentence.")
+                if not test_tokens:
+                    raise Exception("NLTK tokenization failed")
+            except Exception as nltk_error:
+                return f"NLTK Error: {str(nltk_error)}. Please try again or contact support."
+            # Your existing humanization logic here...
             text = self.replace_ai_phrases(text)
             text = self.add_contractions(text)
             if intensity in ["medium", "heavy"]:
                 text = self.vary_sentence_structure(text)
                 text = self.add_personal_touches(text)
                 text = self.add_casual_punctuation(text)
             if intensity == "heavy":
                 text = self.add_natural_fillers(text)
             return self.clean_text(text)
         except Exception as e:
             return f"Error processing text: {str(e)}\n\nOriginal text: {text}"
+    # ... rest of your existing methods
 def create_interface():
     humanizer = AIContentHumanizer()
     def process_text(input_text, intensity):
         if not input_text:
             return "Please enter some text to humanize.", "No text provided."
     with gr.Blocks(title="AI Content Humanizer") as interface:
         gr.Markdown("""# 🤖➡️👤 AI Content Humanizer
 Transform AI-generated content into human-sounding, casual, and readable text!""")
         input_text = gr.Textbox(label="AI-generated Text", lines=8)
         intensity = gr.Radio(["light", "medium", "heavy"], value="medium", label="Humanization Level")
         output_text = gr.Textbox(label="Humanized Text", lines=8, show_copy_button=True)
         readability = gr.Textbox(label="Readability Score", lines=2)
         btn = gr.Button("Humanize Text")
         btn.click(fn=process_text, inputs=[input_text, intensity], outputs=[output_text, readability])
         input_text.submit(fn=process_text, inputs=[input_text, intensity], outputs=[output_text, readability])
     return interface
 if __name__ == "__main__":

humanization_utils.py DELETED Viewed

@@ -1,317 +0,0 @@
-import random
-import re
-from typing import List, Dict, Tuple
-import nltk
-from nltk.tokenize import sent_tokenize, word_tokenize
-from nltk.corpus import wordnet
-from nltk.tag import pos_tag
-class AdvancedHumanizer:
-    def __init__(self):
-        self.load_humanization_data()
-    def load_humanization_data(self):
-        """Load comprehensive humanization patterns and data"""
-        # AI-typical phrases that need humanization
-        self.ai_patterns = {
-            r'\bit is important to note that\b': [
-                "worth mentioning that", "keep in mind that", "note that",
-                "interestingly,", "what's notable is that"
-            ],
-            r'\bit should be noted that\b': [
-                "remember that", "worth noting that", "keep in mind",
-                "importantly,", "note that"
-            ],
-            r'\bin conclusion\b': [
-                "to wrap up", "all in all", "bottom line",
-                "so basically", "in the end", "overall"
-            ],
-            r'\bfurthermore\b': [
-                "also", "plus", "what's more", "on top of that",
-                "and", "additionally", "besides"
-            ],
-            r'\bmoreover\b': [
-                "also", "plus", "and", "what's more",
-                "on top of that", "besides"
-            ],
-            r'\bhowever\b': [
-                "but", "though", "yet", "still", "although",
-                "on the flip side", "that said"
-            ],
-            r'\btherefore\b': [
-                "so", "that's why", "which means", "as a result",
-                "this means", "hence"
-            ],
-            r'\bconsequently\b': [
-                "so", "as a result", "that's why", "this means",
-                "because of this", "hence"
-            ],
-            r'\bsignificant\b': [
-                "big", "major", "important", "huge", "substantial",
-                "considerable", "notable"
-            ],
-            r'\bnumerous\b': [
-                "many", "lots of", "plenty of", "tons of",
-                "countless", "several"
-            ],
-            r'\butilize\b': [
-                "use", "make use of", "work with", "employ",
-                "take advantage of"
-            ],
-            r'\bdemonstrate\b': [
-                "show", "prove", "make clear", "illustrate",
-                "reveal", "display"
-            ],
-            r'\bfacilitate\b': [
-                "help", "make easier", "enable", "assist",
-                "make possible", "support"
-            ],
-            r'\bimplement\b': [
-                "put in place", "set up", "start using", "apply",
-                "carry out", "execute"
-            ]
-        }
-        # Transition words that sound too formal
-        self.formal_transitions = {
-            'additionally': ['also', 'plus', 'and'],
-            'alternatively': ['or', 'instead', 'on the other hand'],
-            'subsequently': ['then', 'after that', 'next'],
-            'initially': ['at first', 'to start with', 'in the beginning'],
-            'ultimately': ['in the end', 'finally', 'eventually'],
-            'nevertheless': ['but', 'still', 'however', 'yet'],
-            'accordingly': ['so', 'therefore', 'as a result']
-        }
-        # Filler words and phrases humans use
-        self.human_fillers = [
-            'actually', 'basically', 'really', 'pretty much', 'kind of',
-            'sort of', 'you know', 'I mean', 'like', 'well',
-            'honestly', 'frankly', 'obviously', 'clearly'
-        ]
-        # Casual sentence starters
-        self.casual_starters = [
-            "Look,", "Listen,", "Here's the thing:", "The way I see it,",
-            "To be honest,", "Frankly,", "Let me tell you,", "You know what?",
-            "The truth is,", "Here's what I think:", "In my experience,"
-        ]
-        # Opinion markers to make text more personal
-        self.opinion_markers = [
-            "I think", "I believe", "In my opinion", "From what I've seen",
-            "It seems to me", "I feel like", "My take is", "Personally,",
-            "From my experience", "I'd say", "I reckon", "I suspect"
-        ]
-        # Conversational connectors
-        self.conversational_connectors = [
-            " - ", " and ", " but ", " so ", " yet ", " or ",
-            ", which ", ", and this ", ", so ", ", but "
-        ]
-    def inject_personality(self, text: str) -> str:
-        """Add personality markers and opinions to make text more human"""
-        sentences = sent_tokenize(text)
-        modified_sentences = []
-        for i, sentence in enumerate(sentences):
-            # Add opinion markers occasionally
-            if random.random() < 0.3 and len(sentence.split()) > 5:
-                opinion = random.choice(self.opinion_markers)
-                sentence = opinion + " " + sentence.lower()
-            # Add casual starters occasionally
-            elif random.random() < 0.2 and i == 0:
-                starter = random.choice(self.casual_starters)
-                sentence = starter + " " + sentence.lower()
-            modified_sentences.append(sentence)
-        return ' '.join(modified_sentences)
-    def add_natural_flow(self, text: str) -> str:
-        """Improve natural flow by varying sentence structure"""
-        sentences = sent_tokenize(text)
-        if len(sentences) < 2:
-            return text
-        new_sentences = []
-        skip_next = False
-        for i, sentence in enumerate(sentences):
-            if skip_next:
-                skip_next = False
-                continue
-            # Combine short sentences occasionally
-            if (i < len(sentences) - 1 and
-                len(sentence.split()) < 10 and
-                len(sentences[i + 1].split()) < 10 and
-                random.random() < 0.4):
-                connector = random.choice(self.conversational_connectors)
-                combined = sentence.rstrip('.!?') + connector + sentences[i + 1].lower()
-                new_sentences.append(combined)
-                skip_next = True
-            else:
-                new_sentences.append(sentence)
-        return ' '.join(new_sentences)
-    def add_hesitation_and_fillers(self, text: str) -> str:
-        """Add natural hesitation and filler words"""
-        sentences = sent_tokenize(text)
-        modified_sentences = []
-        for sentence in sentences:
-            words = sentence.split()
-            # Add fillers occasionally
-            if len(words) > 6 and random.random() < 0.3:
-                filler = random.choice(self.human_fillers)
-                insert_position = random.randint(1, min(4, len(words) - 1))
-                words.insert(insert_position, filler)
-            # Add "I think" or similar occasionally
-            if random.random() < 0.2 and not any(marker.lower() in sentence.lower() for marker in self.opinion_markers):
-                hedge = random.choice(['I think', 'I believe', 'probably', 'maybe', 'likely'])
-                words.insert(0, hedge)
-            modified_sentences.append(' '.join(words))
-        return ' '.join(modified_sentences)
-    def replace_ai_patterns(self, text: str) -> str:
-        """Replace typical AI patterns with human alternatives"""
-        for pattern, replacements in self.ai_patterns.items():
-            if re.search(pattern, text, re.IGNORECASE):
-                replacement = random.choice(replacements)
-                text = re.sub(pattern, replacement, text, flags=re.IGNORECASE)
-        return text
-    def replace_formal_transitions(self, text: str) -> str:
-        """Replace formal transition words with casual ones"""
-        for formal, casual_options in self.formal_transitions.items():
-            if formal in text.lower():
-                casual = random.choice(casual_options)
-                text = re.sub(r'\b' + re.escape(formal) + r'\b', casual, text, flags=re.IGNORECASE)
-        return text
-    def add_contractions_advanced(self, text: str) -> str:
-        """Advanced contraction addition with context awareness"""
-        contractions = {
-            r'\bit is\b': "it's",
-            r'\bthat is\b': "that's",
-            r'\bwhat is\b': "what's",
-            r'\bwhere is\b': "where's",
-            r'\bwho is\b': "who's",
-            r'\bwe are\b': "we're",
-            r'\bthey are\b': "they're",
-            r'\byou are\b': "you're",
-            r'\bi am\b': "I'm",
-            r'\bhe is\b': "he's",
-            r'\bshe is\b': "she's",
-            r'\bwill not\b': "won't",
-            r'\bcannot\b': "can't",
-            r'\bdo not\b': "don't",
-            r'\bdoes not\b': "doesn't",
-            r'\bdid not\b': "didn't",
-            r'\bhave not\b': "haven't",
-            r'\bhas not\b': "hasn't",
-            r'\bhad not\b': "hadn't",
-            r'\bwould not\b': "wouldn't",
-            r'\bshould not\b': "shouldn't",
-            r'\bcould not\b': "couldn't",
-            r'\bis not\b': "isn't",
-            r'\bare not\b': "aren't",
-            r'\bwas not\b': "wasn't",
-            r'\bwere not\b': "weren't"
-        }
-        for pattern, contraction in contractions.items():
-            text = re.sub(pattern, contraction, text, flags=re.IGNORECASE)
-        return text
-    def vary_punctuation(self, text: str) -> str:
-        """Vary punctuation for more natural feel"""
-        sentences = sent_tokenize(text)
-        modified_sentences = []
-        for i, sentence in enumerate(sentences):
-            # Sometimes use em dashes for emphasis
-            if random.random() < 0.2 and len(sentence.split()) > 8:
-                words = sentence.split()
-                dash_pos = random.randint(3, len(words) - 3)
-                words[dash_pos] = "—" + words[dash_pos]
-                sentence = ' '.join(words)
-            # Sometimes end with ellipsis for trailing thoughts
-            if random.random() < 0.1 and i == len(sentences) - 1:
-                sentence = sentence.rstrip('.!?') + '...'
-            # Sometimes use exclamation for emphasis
-            elif random.random() < 0.15 and any(word in sentence.lower()
-                                               for word in ['amazing', 'incredible', 'fantastic', 'great', 'awesome']):
-                sentence = sentence.rstrip('.') + '!'
-            modified_sentences.append(sentence)
-        return ' '.join(modified_sentences)
-    def add_parenthetical_thoughts(self, text: str) -> str:
-        """Add parenthetical thoughts and asides"""
-        sentences = sent_tokenize(text)
-        modified_sentences = []
-        parentheticals = [
-            "(at least in my experience)",
-            "(which makes sense)",
-            "(if you ask me)",
-            "(or so I think)",
-            "(from what I can tell)",
-            "(surprisingly enough)",
-            "(believe it or not)",
-            "(go figure)"
-        ]
-        for sentence in sentences:
-            if random.random() < 0.15 and len(sentence.split()) > 8:
-                parenthetical = random.choice(parentheticals)
-                words = sentence.split()
-                insert_pos = random.randint(3, len(words) - 2)
-                words.insert(insert_pos, parenthetical)
-                sentence = ' '.join(words)
-            modified_sentences.append(sentence)
-        return ' '.join(modified_sentences)
-    def humanize_comprehensively(self, text: str, intensity: str = "medium") -> str:
-        """Apply comprehensive humanization based on intensity level"""
-        if not text or not text.strip():
-            return text
-        # Always apply basic humanization
-        text = self.replace_ai_patterns(text)
-        text = self.add_contractions_advanced(text)
-        text = self.replace_formal_transitions(text)
-        if intensity in ["medium", "heavy"]:
-            text = self.add_natural_flow(text)
-            text = self.inject_personality(text)
-            text = self.vary_punctuation(text)
-        if intensity == "heavy":
-            text = self.add_hesitation_and_fillers(text)
-            text = self.add_parenthetical_thoughts(text)
-        # Clean up any double spaces or weird formatting
-        text = re.sub(r'\s+', ' ', text)
-        text = text.strip()
-        return text