Spaces:
Running
Running
Commit
·
5a2da96
0
Parent(s):
Deploying backend
Browse files- .dockerignore +21 -0
- .gitignore +36 -0
- Dockerfile +23 -0
- README.md +0 -0
- app/__init__.py +0 -0
- app/api/__init__.py +0 -0
- app/api/core/config.py +16 -0
- app/api/detect.py +15 -0
- app/api/grammar.py +10 -0
- app/api/rephrase.py +16 -0
- app/app.py +33 -0
- app/models/schemas.py +35 -0
- app/services/ai_detector.py +56 -0
- app/services/drift.py +13 -0
- app/services/grammar.py +8 -0
- app/services/rephraser.py +81 -0
- requirements.txt +8 -0
- start.sh +2 -0
.dockerignore
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Ignore model cache, Python cache, and other unnecessary files
|
2 |
+
__pycache__/
|
3 |
+
*.pyc
|
4 |
+
*.pyo
|
5 |
+
*.pyd
|
6 |
+
*.log
|
7 |
+
*.sqlite
|
8 |
+
*.db
|
9 |
+
|
10 |
+
# Env and build artifacts
|
11 |
+
venv/
|
12 |
+
.env
|
13 |
+
.env.*
|
14 |
+
*.egg-info/
|
15 |
+
dist/
|
16 |
+
build/
|
17 |
+
.idea/
|
18 |
+
.vscode/
|
19 |
+
*.bak
|
20 |
+
.DS_Store
|
21 |
+
node_modules/x
|
.gitignore
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Python
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*.egg
|
5 |
+
*.egg-info/
|
6 |
+
*.pyo
|
7 |
+
*.pyd
|
8 |
+
|
9 |
+
# Virtual Environment
|
10 |
+
venv/
|
11 |
+
.env
|
12 |
+
.venv/
|
13 |
+
|
14 |
+
# Model caches
|
15 |
+
.cache/
|
16 |
+
*.bin
|
17 |
+
*.pt
|
18 |
+
*.ckpt
|
19 |
+
|
20 |
+
# VSCode
|
21 |
+
.vscode/
|
22 |
+
|
23 |
+
# OS files
|
24 |
+
.DS_Store
|
25 |
+
Thumbs.db
|
26 |
+
|
27 |
+
# Logs
|
28 |
+
*.log
|
29 |
+
|
30 |
+
# FastAPI/uvicorn logs
|
31 |
+
*.log
|
32 |
+
*.sqlite3
|
33 |
+
|
34 |
+
# Ignore uploaded files or generated outputs
|
35 |
+
outputs/
|
36 |
+
hf_cache/
|
Dockerfile
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Base image
|
2 |
+
FROM python:3.11-slim
|
3 |
+
|
4 |
+
# Set working directory
|
5 |
+
WORKDIR /app
|
6 |
+
|
7 |
+
# Environment variables for better logging and HF cache
|
8 |
+
ENV TRANSFORMERS_CACHE=/tmp/hf_cache \
|
9 |
+
PYTHONUNBUFFERED=1 \
|
10 |
+
PYTHONDONTWRITEBYTECODE=1
|
11 |
+
|
12 |
+
# Install dependencies
|
13 |
+
COPY requirements.txt .
|
14 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
15 |
+
|
16 |
+
# Copy app code
|
17 |
+
COPY app/ ./app/
|
18 |
+
|
19 |
+
# Expose port for FastAPI
|
20 |
+
EXPOSE 8000
|
21 |
+
|
22 |
+
# Start the FastAPI server
|
23 |
+
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
|
README.md
ADDED
File without changes
|
app/__init__.py
ADDED
File without changes
|
app/api/__init__.py
ADDED
File without changes
|
app/api/core/config.py
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# app/core/config.py
|
2 |
+
|
3 |
+
from fastapi.middleware.cors import CORSMiddleware
|
4 |
+
from fastapi import FastAPI
|
5 |
+
|
6 |
+
def setup_cors(app: FastAPI):
|
7 |
+
"""
|
8 |
+
Configure CORS settings for development and frontend integration.
|
9 |
+
"""
|
10 |
+
app.add_middleware(
|
11 |
+
CORSMiddleware,
|
12 |
+
allow_origins=["*"], # Change this in production to a specific domain like ["https://verbo-ai.vercel.app"]
|
13 |
+
allow_credentials=True,
|
14 |
+
allow_methods=["*"],
|
15 |
+
allow_headers=["*"],
|
16 |
+
)
|
app/api/detect.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
from fastapi import APIRouter
|
3 |
+
from app.models.schemas import DetectInput, DetectResponse
|
4 |
+
from app.services.ai_detector import detect_ai_text
|
5 |
+
|
6 |
+
router = APIRouter()
|
7 |
+
|
8 |
+
@router.post("/detect", response_model=DetectResponse)
|
9 |
+
async def detect_ai(input_data: DetectInput):
|
10 |
+
"""
|
11 |
+
Detect AI-generated content in the input text.
|
12 |
+
Returns overall likelihood and flagged sentences.
|
13 |
+
"""
|
14 |
+
print(f"Input received: {input_data.text}")
|
15 |
+
return detect_ai_text(input_data.text)
|
app/api/grammar.py
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import APIRouter
|
2 |
+
from app.services.grammar import correct_grammar
|
3 |
+
from app.models.schemas import GrammarInput, GrammarResponse
|
4 |
+
|
5 |
+
router = APIRouter()
|
6 |
+
|
7 |
+
@router.post("/grammar", response_model=GrammarResponse)
|
8 |
+
async def grammar_check(input: GrammarInput):
|
9 |
+
result = correct_grammar(input.text)
|
10 |
+
return GrammarResponse(corrected_text=result)
|
app/api/rephrase.py
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import APIRouter
|
2 |
+
from app.models.schemas import RephraseInput, RephraseResponse
|
3 |
+
from app.services.rephraser import rephrase_text
|
4 |
+
from app.services.drift import slight_semantic_drift
|
5 |
+
|
6 |
+
router = APIRouter()
|
7 |
+
|
8 |
+
@router.post("/rephrase", response_model=RephraseResponse)
|
9 |
+
async def rephrase(input_data: RephraseInput):
|
10 |
+
raw_rephrased = rephrase_text(input_data.text, input_data.tone)
|
11 |
+
final_output = slight_semantic_drift(raw_rephrased)
|
12 |
+
return {
|
13 |
+
"original_text": input_data.text,
|
14 |
+
"rephrased_text": final_output,
|
15 |
+
"tone_used": input_data.tone
|
16 |
+
}
|
app/app.py
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI
|
2 |
+
from fastapi.middleware.cors import CORSMiddleware
|
3 |
+
from app.api import detect, rephrase, grammar
|
4 |
+
|
5 |
+
|
6 |
+
app = FastAPI(
|
7 |
+
title="Verbo Backend",
|
8 |
+
description="Backend for Verbo Chrome Extension to detect and rephrase AI-generated text",
|
9 |
+
version="1.0.0"
|
10 |
+
)
|
11 |
+
|
12 |
+
# CORS Setup
|
13 |
+
origins = [
|
14 |
+
"http://localhost:8080", # for local Vite frontend
|
15 |
+
"https://verbo-ai.vercel.app", # production
|
16 |
+
]
|
17 |
+
|
18 |
+
app.add_middleware(
|
19 |
+
CORSMiddleware,
|
20 |
+
allow_origins=origins,
|
21 |
+
allow_credentials=True,
|
22 |
+
allow_methods=["*"],
|
23 |
+
allow_headers=["*"],
|
24 |
+
)
|
25 |
+
|
26 |
+
# Register routes
|
27 |
+
app.include_router(detect.router, prefix="/api", tags=["AI Detection"])
|
28 |
+
app.include_router(rephrase.router, prefix="/api", tags=["Rephrasing"])
|
29 |
+
app.include_router(grammar.router, prefix="/api")
|
30 |
+
|
31 |
+
@app.get("/", tags=["Root"])
|
32 |
+
async def read_root():
|
33 |
+
return {"message": "Welcome to the Verbo Backend API."}
|
app/models/schemas.py
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
|
3 |
+
from pydantic import BaseModel
|
4 |
+
from typing import List, Optional
|
5 |
+
from pydantic import Field
|
6 |
+
from typing import Literal
|
7 |
+
|
8 |
+
|
9 |
+
class DetectInput(BaseModel):
|
10 |
+
text: str
|
11 |
+
|
12 |
+
class SentenceAnalysis(BaseModel):
|
13 |
+
sentence: str
|
14 |
+
ai_likelihood: float
|
15 |
+
|
16 |
+
class DetectResponse(BaseModel):
|
17 |
+
overall_ai_score: float
|
18 |
+
flagged_sentences: List[SentenceAnalysis]
|
19 |
+
|
20 |
+
|
21 |
+
|
22 |
+
class RephraseInput(BaseModel):
|
23 |
+
text: str
|
24 |
+
tone: Literal["general", "professional", "casual", "polite", "witty"]
|
25 |
+
|
26 |
+
class RephraseResponse(BaseModel):
|
27 |
+
original_text: str
|
28 |
+
rephrased_text: str
|
29 |
+
tone_used: str
|
30 |
+
|
31 |
+
class GrammarInput(BaseModel):
|
32 |
+
text: str
|
33 |
+
|
34 |
+
class GrammarResponse(BaseModel):
|
35 |
+
corrected_text: str
|
app/services/ai_detector.py
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import pipeline
|
2 |
+
|
3 |
+
detector = pipeline("text-classification", model="roberta-base-openai-detector")
|
4 |
+
|
5 |
+
def clean_text(text: str) -> list:
|
6 |
+
"""
|
7 |
+
Splits the input text into clean sentences/lines.
|
8 |
+
"""
|
9 |
+
lines = text.strip().split(".")
|
10 |
+
return [line.strip() for line in lines if line.strip()]
|
11 |
+
|
12 |
+
def detect_ai_text(text: str) -> dict:
|
13 |
+
"""
|
14 |
+
Detects AI-generated content in the input text.
|
15 |
+
Returns overall_ai_score, flagged_sentences, and highlighted_html.
|
16 |
+
"""
|
17 |
+
lines = clean_text(text)
|
18 |
+
flagged_sentences = []
|
19 |
+
total_ai_score = 0.0
|
20 |
+
highlighted_html = ""
|
21 |
+
|
22 |
+
for line in lines:
|
23 |
+
try:
|
24 |
+
result = detector(line)[0]
|
25 |
+
label = result["label"]
|
26 |
+
score = result["score"]
|
27 |
+
|
28 |
+
sentence_html = line
|
29 |
+
if label == "LABEL_1" or label.lower() == "fake":
|
30 |
+
flagged_sentences.append({
|
31 |
+
"sentence": line,
|
32 |
+
"ai_likelihood": round(score, 2)
|
33 |
+
})
|
34 |
+
total_ai_score += score
|
35 |
+
|
36 |
+
# Highlight based on score
|
37 |
+
if score > 0.7:
|
38 |
+
sentence_html = f'<span class="bg-red-500/20 border-b-2 border-red-500 rounded px-1">{line}</span>'
|
39 |
+
elif score > 0.4:
|
40 |
+
sentence_html = f'<span class="bg-yellow-500/20 border-b-2 border-yellow-500 rounded px-1">{line}</span>'
|
41 |
+
else:
|
42 |
+
total_ai_score += score # optional: to keep normalization balanced
|
43 |
+
|
44 |
+
highlighted_html += sentence_html + ". "
|
45 |
+
|
46 |
+
except Exception as e:
|
47 |
+
print(f"Error analyzing line: {line[:30]}... | Error: {e}")
|
48 |
+
highlighted_html += line + ". "
|
49 |
+
|
50 |
+
overall_ai_score = round(total_ai_score / len(lines), 2) if lines else 0.0
|
51 |
+
|
52 |
+
return {
|
53 |
+
"overall_ai_score": overall_ai_score,
|
54 |
+
"flagged_sentences": flagged_sentences,
|
55 |
+
"highlighted_html": highlighted_html.strip()
|
56 |
+
}
|
app/services/drift.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import random
|
2 |
+
|
3 |
+
def slight_semantic_drift(text: str) -> str:
|
4 |
+
edits = [
|
5 |
+
lambda t: t.replace("very", "extremely"),
|
6 |
+
lambda t: t.replace("important", "crucial"),
|
7 |
+
lambda t: t.replace(",", ", "),
|
8 |
+
lambda t: t.replace("has become", "is now"),
|
9 |
+
lambda t: t.replace("In today's", "Nowadays,"),
|
10 |
+
]
|
11 |
+
for func in random.sample(edits, k=min(2, len(edits))):
|
12 |
+
text = func(text)
|
13 |
+
return text
|
app/services/grammar.py
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import language_tool_python
|
2 |
+
|
3 |
+
tool = language_tool_python.LanguageToolPublicAPI('en-US')
|
4 |
+
|
5 |
+
def correct_grammar(text: str) -> str:
|
6 |
+
matches = tool.check(text)
|
7 |
+
corrected_text = language_tool_python.utils.correct(text, matches)
|
8 |
+
return corrected_text
|
app/services/rephraser.py
ADDED
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
2 |
+
import torch
|
3 |
+
import re
|
4 |
+
import random
|
5 |
+
|
6 |
+
model_name = "humarin/chatgpt_paraphraser_on_T5_base"
|
7 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
8 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
9 |
+
model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device)
|
10 |
+
|
11 |
+
def post_process_humanize(text: str) -> str:
|
12 |
+
# Step 1: Remove generic AI prefixes
|
13 |
+
generic_starts = [
|
14 |
+
"the following text is",
|
15 |
+
"here is a",
|
16 |
+
"this is the",
|
17 |
+
"paraphrased version",
|
18 |
+
"rephrased version",
|
19 |
+
"reworded version",
|
20 |
+
"rewritten version"
|
21 |
+
]
|
22 |
+
for start in generic_starts:
|
23 |
+
if text.lower().startswith(start):
|
24 |
+
text = text[len(start):].strip(" :.-\"\n")
|
25 |
+
|
26 |
+
# Step 2: Light contractions and filler words
|
27 |
+
replacements = {
|
28 |
+
"do not": "don't",
|
29 |
+
"cannot": "can't",
|
30 |
+
"will not": "won't",
|
31 |
+
"should not": "shouldn't",
|
32 |
+
"has not": "hasn't",
|
33 |
+
"have not": "haven't",
|
34 |
+
"it is": "it's",
|
35 |
+
"we are": "we're",
|
36 |
+
"you are": "you're",
|
37 |
+
"they are": "they're",
|
38 |
+
"I am": "I'm",
|
39 |
+
"There is": "There's",
|
40 |
+
"That is": "That's",
|
41 |
+
"because": "since",
|
42 |
+
}
|
43 |
+
|
44 |
+
for orig, repl in replacements.items():
|
45 |
+
text = re.sub(rf"\b{orig}\b", repl, text, flags=re.IGNORECASE)
|
46 |
+
|
47 |
+
# Step 3: Add human filler expressions in safe spots
|
48 |
+
softeners = ["frankly", "actually", "honestly", "to be fair", "in fact", "well"]
|
49 |
+
if random.random() < 0.5:
|
50 |
+
sentences = re.split(r'(?<=[.!?]) +', text)
|
51 |
+
if len(sentences) > 1:
|
52 |
+
insert_idx = random.randint(0, len(sentences) - 2)
|
53 |
+
sentences[insert_idx] += f", {random.choice(softeners)}"
|
54 |
+
text = " ".join(sentences)
|
55 |
+
|
56 |
+
return text.strip()
|
57 |
+
|
58 |
+
def rephrase_text(text: str, tone: str = "general") -> str:
|
59 |
+
if tone == "general":
|
60 |
+
prompt = f"paraphrase: {text} </s>"
|
61 |
+
else:
|
62 |
+
prompt = f"Rephrase in a {tone} tone without introductory phrases:\n{text} </s>"
|
63 |
+
|
64 |
+
inputs = tokenizer([prompt], return_tensors="pt", padding=True, truncation=True).to(device)
|
65 |
+
|
66 |
+
outputs = model.generate(
|
67 |
+
**inputs,
|
68 |
+
do_sample=True,
|
69 |
+
top_k=50,
|
70 |
+
top_p=0.92,
|
71 |
+
temperature=0.8,
|
72 |
+
max_length=1024,
|
73 |
+
repetition_penalty=1.1,
|
74 |
+
early_stopping=True
|
75 |
+
)
|
76 |
+
|
77 |
+
result = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
78 |
+
|
79 |
+
# Post-process to humanize
|
80 |
+
result = post_process_humanize(result)
|
81 |
+
return result
|
requirements.txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
fastapi
|
2 |
+
uvicorn
|
3 |
+
transformers
|
4 |
+
torch
|
5 |
+
pydantic
|
6 |
+
protobuf
|
7 |
+
huggingface-hub
|
8 |
+
language_tool_python
|
start.sh
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
|
2 |
+
uvicorn app.main:app --reload --host 0.0.0.0 --port 8000
|