from detoxify import Detoxify from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline , AutoModelForSequenceClassification, RobertaTokenizer, RobertaForSequenceClassification import re import json import google.generativeai as genai import os import spacy import torch import torch.nn.functional as F # Configure Google Gemini API genai.configure(api_key=os.getenv("GENIE_API_KEY")) # Load custom toxic phrases and modifiers from JSON with open("toxic_words.json") as f: custom_data = json.load(f) custom_words = custom_data["custom_toxic_phrases"] modifiers = custom_data["modifiers"] # Load pre-trained models # Detoxify model for general toxicity scoring tox_model = Detoxify("original") # HuggingFace sentiment model sentiment_model = pipeline("sentiment-analysis") # Local paraphrasing model (negative -> positive) model_name = "ggallipoli/bart-base_neg2pos" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForSeq2SeqLM.from_pretrained(model_name) # HateBERT-like toxicity classifier tokenizer2 = RobertaTokenizer.from_pretrained('s-nlp/roberta_toxicity_classifier') model2 = RobertaForSequenceClassification.from_pretrained('s-nlp/roberta_toxicity_classifier') def check_toxicity(text): """ Predicts the general toxicity of the given text using Detoxify. Args: text (str): Input text to evaluate. Returns: float: Toxicity score. """ result = tox_model.predict(text) toxicity_score = result.get("toxicity", 0) return toxicity_score def is_custom_toxic(text): """ Checks if text contains any custom-defined toxic phrases, optionally combined with modifiers. Args: text (str): Input text to check. Returns: bool: True if custom toxic phrase is found, else False. """ text = text.lower() for word in custom_words: base = word.lower() patterns = [base] for m in modifiers: patterns.append(f"{m} {base}") for p in patterns: if re.search(r'\b' + re.escape(p) + r'\b', text): return True return False def check_sentiment(text): """ Uses sentiment analysis to score negative sentiment in text. Args: text (str): Input text. Returns: float: Sentiment score. """ result = sentiment_model(text)[0] if result["label"] == "NEGATIVE": return result["score"] return 0 def paraphrase_text_local(text): """ Locally paraphrases text using a BART model trained to convert negative text to a positive/neutral tone. Args: text (str): Input text. Returns: str: Paraphrased text. """ inputs = tokenizer(text, return_tensors="pt", truncation=True) outputs = model.generate(inputs["input_ids"], max_length=100, num_beams=5, early_stopping=True) return tokenizer.decode(outputs[0], skip_special_tokens=True) def paraphrase_text(text): """ Paraphrases text using Google Gemini API with a fallback to local model if the API fails. Args: text (str): Input text. Returns: str: Paraphrased text. """ prompt = ( f"Rewrite the following social media comment to keep the meaning very close to the original without adding or removing key ideas. " f"The rewritten version should be concise, not exceeding twice the length of the original. " f"It should sound friendly, respectful, and either neutral or slightly positive—something teenagers might say casually to each other online. " f"Avoid any form of bullying, blaming, sarcasm, mockery, offensive language, or any harsh or judgmental tone. " f"Replace all harmful, negative, or offensive expressions with more neutral, respectful language while preserving the original intent of the message. " f"Ensure the response is just the rewritten comment with no additional explanation or reasoning. " f"The rewritten version should be appropriate for all audiences and sound conversational, like something a teen would write to a friend. " f"Reply with the new sentence only: {text}" ) try: model = genai.GenerativeModel("gemini-2.0-flash") response = model.generate_content(prompt) return response.text.strip() #except Exception as e: print(f"Error during paraphrasing: {e}") return None except Exception as e: print("Gemini error:", e) return paraphrase_text_local(text) def check_cyberbullying_with_hatebert(text): """ Uses HateBERT-style model to check toxicity in text. Args: text (str): Input text. Returns: float: Toxicity probability score. """ inputs = tokenizer2(text, return_tensors="pt") with torch.no_grad(): outputs = model2(**inputs) probs = F.softmax(outputs.logits, dim=1) toxicity_score = probs[0][1].item() #if toxicity_score > 0.4: #return True #return False return toxicity_score spacy.cli.download("en_core_web_sm") nlp = spacy.load("en_core_web_sm") first_person_pronouns = ["I", "me", "my", "mine", "we", "our", "ours"] def is_person_or_pronoun(text): """ Checks if text mentions another person (not first-person) or contains a pronoun referring to someone else. Args: text (str): Input text. Returns: bool: True if another person is mentioned, else False. """ doc = nlp(text) for token in doc: if token.pos_ == "PRON" and token.text.lower() not in [pron.lower() for pron in first_person_pronouns]: return True if token.ent_type_ == "PERSON" or re.match(r"^@", token.text): # @username return True return False