Spaces:

axnand
/

verbo-backend

Running

App Files Files Community

axnand commited on 18 days ago

Commit

5a2da96

0 Parent(s):

Deploying backend

Browse files

Files changed (18) hide show

.dockerignore +21 -0
.gitignore +36 -0
Dockerfile +23 -0
README.md +0 -0
app/__init__.py +0 -0
app/api/__init__.py +0 -0
app/api/core/config.py +16 -0
app/api/detect.py +15 -0
app/api/grammar.py +10 -0
app/api/rephrase.py +16 -0
app/app.py +33 -0
app/models/schemas.py +35 -0
app/services/ai_detector.py +56 -0
app/services/drift.py +13 -0
app/services/grammar.py +8 -0
app/services/rephraser.py +81 -0
requirements.txt +8 -0
start.sh +2 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,21 @@

+# Ignore model cache, Python cache, and other unnecessary files
+__pycache__/
+*.pyc
+*.pyo
+*.pyd
+*.log
+*.sqlite
+*.db
+# Env and build artifacts
+venv/
+.env
+.env.*
+*.egg-info/
+dist/
+build/
+.idea/
+.vscode/
+*.bak
+.DS_Store
+node_modules/x

.gitignore ADDED Viewed

	@@ -0,0 +1,36 @@

+# Python
+__pycache__/
+*.py[cod]
+*.egg
+*.egg-info/
+*.pyo
+*.pyd
+# Virtual Environment
+venv/
+.env
+.venv/
+# Model caches
+.cache/
+*.bin
+*.pt
+*.ckpt
+# VSCode
+.vscode/
+# OS files
+.DS_Store
+Thumbs.db
+# Logs
+*.log
+# FastAPI/uvicorn logs
+*.log
+*.sqlite3
+# Ignore uploaded files or generated outputs
+outputs/
+hf_cache/

Dockerfile ADDED Viewed

	@@ -0,0 +1,23 @@

+# Base image
+FROM python:3.11-slim
+# Set working directory
+WORKDIR /app
+# Environment variables for better logging and HF cache
+ENV TRANSFORMERS_CACHE=/tmp/hf_cache \
+    PYTHONUNBUFFERED=1 \
+    PYTHONDONTWRITEBYTECODE=1
+# Install dependencies
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy app code
+COPY app/ ./app/
+# Expose port for FastAPI
+EXPOSE 8000
+# Start the FastAPI server
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]

README.md ADDED Viewed

File without changes

app/__init__.py ADDED Viewed

File without changes

app/api/__init__.py ADDED Viewed

File without changes

app/api/core/config.py ADDED Viewed

	@@ -0,0 +1,16 @@

+# app/core/config.py
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi import FastAPI
+def setup_cors(app: FastAPI):
+    """
+    Configure CORS settings for development and frontend integration.
+    """
+    app.add_middleware(
+        CORSMiddleware,
+        allow_origins=["*"],  # Change this in production to a specific domain like ["https://verbo-ai.vercel.app"]
+        allow_credentials=True,
+        allow_methods=["*"],
+        allow_headers=["*"],
+    )

app/api/detect.py ADDED Viewed

	@@ -0,0 +1,15 @@

+from fastapi import APIRouter
+from app.models.schemas import DetectInput, DetectResponse
+from app.services.ai_detector import detect_ai_text
+router = APIRouter()
+@router.post("/detect", response_model=DetectResponse)
+async def detect_ai(input_data: DetectInput):
+    """
+    Detect AI-generated content in the input text.
+    Returns overall likelihood and flagged sentences.
+    """
+    print(f"Input received: {input_data.text}")
+    return detect_ai_text(input_data.text)

app/api/grammar.py ADDED Viewed

	@@ -0,0 +1,10 @@

+from fastapi import APIRouter
+from app.services.grammar import correct_grammar
+from app.models.schemas import GrammarInput, GrammarResponse
+router = APIRouter()
+@router.post("/grammar", response_model=GrammarResponse)
+async def grammar_check(input: GrammarInput):
+    result = correct_grammar(input.text)
+    return GrammarResponse(corrected_text=result)

app/api/rephrase.py ADDED Viewed

	@@ -0,0 +1,16 @@

+from fastapi import APIRouter
+from app.models.schemas import RephraseInput, RephraseResponse
+from app.services.rephraser import rephrase_text
+from app.services.drift import slight_semantic_drift
+router = APIRouter()
+@router.post("/rephrase", response_model=RephraseResponse)
+async def rephrase(input_data: RephraseInput):
+    raw_rephrased = rephrase_text(input_data.text, input_data.tone)
+    final_output = slight_semantic_drift(raw_rephrased)
+    return {
+        "original_text": input_data.text,
+        "rephrased_text": final_output,
+        "tone_used": input_data.tone
+    }

app/app.py ADDED Viewed

	@@ -0,0 +1,33 @@

+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from app.api import detect, rephrase, grammar
+app = FastAPI(
+    title="Verbo Backend",
+    description="Backend for Verbo Chrome Extension to detect and rephrase AI-generated text",
+    version="1.0.0"
+)
+# CORS Setup
+origins = [
+    "http://localhost:8080",  # for local Vite frontend
+    "https://verbo-ai.vercel.app",  # production
+]
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=origins,
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Register routes
+app.include_router(detect.router, prefix="/api", tags=["AI Detection"])
+app.include_router(rephrase.router, prefix="/api", tags=["Rephrasing"])
+app.include_router(grammar.router, prefix="/api")
+@app.get("/", tags=["Root"])
+async def read_root():
+    return {"message": "Welcome to the Verbo Backend API."}

app/models/schemas.py ADDED Viewed

	@@ -0,0 +1,35 @@

+from pydantic import BaseModel
+from typing import List, Optional
+from pydantic import Field
+from typing import Literal
+class DetectInput(BaseModel):
+    text: str
+class SentenceAnalysis(BaseModel):
+    sentence: str
+    ai_likelihood: float
+class DetectResponse(BaseModel):
+    overall_ai_score: float
+    flagged_sentences: List[SentenceAnalysis]
+class RephraseInput(BaseModel):
+    text: str
+    tone: Literal["general", "professional", "casual", "polite", "witty"]
+class RephraseResponse(BaseModel):
+    original_text: str
+    rephrased_text: str
+    tone_used: str
+class GrammarInput(BaseModel):
+    text: str
+class GrammarResponse(BaseModel):
+    corrected_text: str

app/services/ai_detector.py ADDED Viewed

	@@ -0,0 +1,56 @@

+from transformers import pipeline
+detector = pipeline("text-classification", model="roberta-base-openai-detector")
+def clean_text(text: str) -> list:
+    """
+    Splits the input text into clean sentences/lines.
+    """
+    lines = text.strip().split(".")
+    return [line.strip() for line in lines if line.strip()]
+def detect_ai_text(text: str) -> dict:
+    """
+    Detects AI-generated content in the input text.
+    Returns overall_ai_score, flagged_sentences, and highlighted_html.
+    """
+    lines = clean_text(text)
+    flagged_sentences = []
+    total_ai_score = 0.0
+    highlighted_html = ""
+    for line in lines:
+        try:
+            result = detector(line)[0]
+            label = result["label"]
+            score = result["score"]
+            sentence_html = line
+            if label == "LABEL_1" or label.lower() == "fake":
+                flagged_sentences.append({
+                    "sentence": line,
+                    "ai_likelihood": round(score, 2)
+                })
+                total_ai_score += score
+                # Highlight based on score
+                if score > 0.7:
+                    sentence_html = f'<span class="bg-red-500/20 border-b-2 border-red-500 rounded px-1">{line}</span>'
+                elif score > 0.4:
+                    sentence_html = f'<span class="bg-yellow-500/20 border-b-2 border-yellow-500 rounded px-1">{line}</span>'
+            else:
+                total_ai_score += score  # optional: to keep normalization balanced
+            highlighted_html += sentence_html + ". "
+        except Exception as e:
+            print(f"Error analyzing line: {line[:30]}... | Error: {e}")
+            highlighted_html += line + ". "
+    overall_ai_score = round(total_ai_score / len(lines), 2) if lines else 0.0
+    return {
+        "overall_ai_score": overall_ai_score,
+        "flagged_sentences": flagged_sentences,
+        "highlighted_html": highlighted_html.strip()
+    }

app/services/drift.py ADDED Viewed

	@@ -0,0 +1,13 @@

+import random
+def slight_semantic_drift(text: str) -> str:
+    edits = [
+        lambda t: t.replace("very", "extremely"),
+        lambda t: t.replace("important", "crucial"),
+        lambda t: t.replace(",", ", "),
+        lambda t: t.replace("has become", "is now"),
+        lambda t: t.replace("In today's", "Nowadays,"),
+    ]
+    for func in random.sample(edits, k=min(2, len(edits))):
+        text = func(text)
+    return text

app/services/grammar.py ADDED Viewed

	@@ -0,0 +1,8 @@

+import language_tool_python
+tool = language_tool_python.LanguageToolPublicAPI('en-US')
+def correct_grammar(text: str) -> str:
+    matches = tool.check(text)
+    corrected_text = language_tool_python.utils.correct(text, matches)
+    return corrected_text

app/services/rephraser.py ADDED Viewed

	@@ -0,0 +1,81 @@

+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+import torch
+import re
+import random
+model_name = "humarin/chatgpt_paraphraser_on_T5_base"
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device)
+def post_process_humanize(text: str) -> str:
+    # Step 1: Remove generic AI prefixes
+    generic_starts = [
+        "the following text is",
+        "here is a",
+        "this is the",
+        "paraphrased version",
+        "rephrased version",
+        "reworded version",
+        "rewritten version"
+    ]
+    for start in generic_starts:
+        if text.lower().startswith(start):
+            text = text[len(start):].strip(" :.-\"\n")
+    # Step 2: Light contractions and filler words
+    replacements = {
+        "do not": "don't",
+        "cannot": "can't",
+        "will not": "won't",
+        "should not": "shouldn't",
+        "has not": "hasn't",
+        "have not": "haven't",
+        "it is": "it's",
+        "we are": "we're",
+        "you are": "you're",
+        "they are": "they're",
+        "I am": "I'm",
+        "There is": "There's",
+        "That is": "That's",
+        "because": "since",
+    }
+    for orig, repl in replacements.items():
+        text = re.sub(rf"\b{orig}\b", repl, text, flags=re.IGNORECASE)
+    # Step 3: Add human filler expressions in safe spots
+    softeners = ["frankly", "actually", "honestly", "to be fair", "in fact", "well"]
+    if random.random() < 0.5:
+        sentences = re.split(r'(?<=[.!?]) +', text)
+        if len(sentences) > 1:
+            insert_idx = random.randint(0, len(sentences) - 2)
+            sentences[insert_idx] += f", {random.choice(softeners)}"
+            text = " ".join(sentences)
+    return text.strip()
+def rephrase_text(text: str, tone: str = "general") -> str:
+    if tone == "general":
+        prompt = f"paraphrase: {text} </s>"
+    else:
+        prompt = f"Rephrase in a {tone} tone without introductory phrases:\n{text} </s>"
+    inputs = tokenizer([prompt], return_tensors="pt", padding=True, truncation=True).to(device)
+    outputs = model.generate(
+        **inputs,
+        do_sample=True,
+        top_k=50,
+        top_p=0.92,
+        temperature=0.8,
+        max_length=1024,
+        repetition_penalty=1.1,
+        early_stopping=True
+    )
+    result = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    # Post-process to humanize
+    result = post_process_humanize(result)
+    return result

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+fastapi
+uvicorn
+transformers
+torch
+pydantic
+protobuf
+huggingface-hub
+language_tool_python

start.sh ADDED Viewed

	@@ -0,0 +1,2 @@


1	+
2	+ uvicorn app.main:app --reload --host 0.0.0.0 --port 8000