axnand commited on
Commit
5a2da96
·
0 Parent(s):

Deploying backend

Browse files
.dockerignore ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ignore model cache, Python cache, and other unnecessary files
2
+ __pycache__/
3
+ *.pyc
4
+ *.pyo
5
+ *.pyd
6
+ *.log
7
+ *.sqlite
8
+ *.db
9
+
10
+ # Env and build artifacts
11
+ venv/
12
+ .env
13
+ .env.*
14
+ *.egg-info/
15
+ dist/
16
+ build/
17
+ .idea/
18
+ .vscode/
19
+ *.bak
20
+ .DS_Store
21
+ node_modules/x
.gitignore ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.egg
5
+ *.egg-info/
6
+ *.pyo
7
+ *.pyd
8
+
9
+ # Virtual Environment
10
+ venv/
11
+ .env
12
+ .venv/
13
+
14
+ # Model caches
15
+ .cache/
16
+ *.bin
17
+ *.pt
18
+ *.ckpt
19
+
20
+ # VSCode
21
+ .vscode/
22
+
23
+ # OS files
24
+ .DS_Store
25
+ Thumbs.db
26
+
27
+ # Logs
28
+ *.log
29
+
30
+ # FastAPI/uvicorn logs
31
+ *.log
32
+ *.sqlite3
33
+
34
+ # Ignore uploaded files or generated outputs
35
+ outputs/
36
+ hf_cache/
Dockerfile ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Base image
2
+ FROM python:3.11-slim
3
+
4
+ # Set working directory
5
+ WORKDIR /app
6
+
7
+ # Environment variables for better logging and HF cache
8
+ ENV TRANSFORMERS_CACHE=/tmp/hf_cache \
9
+ PYTHONUNBUFFERED=1 \
10
+ PYTHONDONTWRITEBYTECODE=1
11
+
12
+ # Install dependencies
13
+ COPY requirements.txt .
14
+ RUN pip install --no-cache-dir -r requirements.txt
15
+
16
+ # Copy app code
17
+ COPY app/ ./app/
18
+
19
+ # Expose port for FastAPI
20
+ EXPOSE 8000
21
+
22
+ # Start the FastAPI server
23
+ CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
README.md ADDED
File without changes
app/__init__.py ADDED
File without changes
app/api/__init__.py ADDED
File without changes
app/api/core/config.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app/core/config.py
2
+
3
+ from fastapi.middleware.cors import CORSMiddleware
4
+ from fastapi import FastAPI
5
+
6
+ def setup_cors(app: FastAPI):
7
+ """
8
+ Configure CORS settings for development and frontend integration.
9
+ """
10
+ app.add_middleware(
11
+ CORSMiddleware,
12
+ allow_origins=["*"], # Change this in production to a specific domain like ["https://verbo-ai.vercel.app"]
13
+ allow_credentials=True,
14
+ allow_methods=["*"],
15
+ allow_headers=["*"],
16
+ )
app/api/detect.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from fastapi import APIRouter
3
+ from app.models.schemas import DetectInput, DetectResponse
4
+ from app.services.ai_detector import detect_ai_text
5
+
6
+ router = APIRouter()
7
+
8
+ @router.post("/detect", response_model=DetectResponse)
9
+ async def detect_ai(input_data: DetectInput):
10
+ """
11
+ Detect AI-generated content in the input text.
12
+ Returns overall likelihood and flagged sentences.
13
+ """
14
+ print(f"Input received: {input_data.text}")
15
+ return detect_ai_text(input_data.text)
app/api/grammar.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter
2
+ from app.services.grammar import correct_grammar
3
+ from app.models.schemas import GrammarInput, GrammarResponse
4
+
5
+ router = APIRouter()
6
+
7
+ @router.post("/grammar", response_model=GrammarResponse)
8
+ async def grammar_check(input: GrammarInput):
9
+ result = correct_grammar(input.text)
10
+ return GrammarResponse(corrected_text=result)
app/api/rephrase.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter
2
+ from app.models.schemas import RephraseInput, RephraseResponse
3
+ from app.services.rephraser import rephrase_text
4
+ from app.services.drift import slight_semantic_drift
5
+
6
+ router = APIRouter()
7
+
8
+ @router.post("/rephrase", response_model=RephraseResponse)
9
+ async def rephrase(input_data: RephraseInput):
10
+ raw_rephrased = rephrase_text(input_data.text, input_data.tone)
11
+ final_output = slight_semantic_drift(raw_rephrased)
12
+ return {
13
+ "original_text": input_data.text,
14
+ "rephrased_text": final_output,
15
+ "tone_used": input_data.tone
16
+ }
app/app.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ from app.api import detect, rephrase, grammar
4
+
5
+
6
+ app = FastAPI(
7
+ title="Verbo Backend",
8
+ description="Backend for Verbo Chrome Extension to detect and rephrase AI-generated text",
9
+ version="1.0.0"
10
+ )
11
+
12
+ # CORS Setup
13
+ origins = [
14
+ "http://localhost:8080", # for local Vite frontend
15
+ "https://verbo-ai.vercel.app", # production
16
+ ]
17
+
18
+ app.add_middleware(
19
+ CORSMiddleware,
20
+ allow_origins=origins,
21
+ allow_credentials=True,
22
+ allow_methods=["*"],
23
+ allow_headers=["*"],
24
+ )
25
+
26
+ # Register routes
27
+ app.include_router(detect.router, prefix="/api", tags=["AI Detection"])
28
+ app.include_router(rephrase.router, prefix="/api", tags=["Rephrasing"])
29
+ app.include_router(grammar.router, prefix="/api")
30
+
31
+ @app.get("/", tags=["Root"])
32
+ async def read_root():
33
+ return {"message": "Welcome to the Verbo Backend API."}
app/models/schemas.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ from pydantic import BaseModel
4
+ from typing import List, Optional
5
+ from pydantic import Field
6
+ from typing import Literal
7
+
8
+
9
+ class DetectInput(BaseModel):
10
+ text: str
11
+
12
+ class SentenceAnalysis(BaseModel):
13
+ sentence: str
14
+ ai_likelihood: float
15
+
16
+ class DetectResponse(BaseModel):
17
+ overall_ai_score: float
18
+ flagged_sentences: List[SentenceAnalysis]
19
+
20
+
21
+
22
+ class RephraseInput(BaseModel):
23
+ text: str
24
+ tone: Literal["general", "professional", "casual", "polite", "witty"]
25
+
26
+ class RephraseResponse(BaseModel):
27
+ original_text: str
28
+ rephrased_text: str
29
+ tone_used: str
30
+
31
+ class GrammarInput(BaseModel):
32
+ text: str
33
+
34
+ class GrammarResponse(BaseModel):
35
+ corrected_text: str
app/services/ai_detector.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline
2
+
3
+ detector = pipeline("text-classification", model="roberta-base-openai-detector")
4
+
5
+ def clean_text(text: str) -> list:
6
+ """
7
+ Splits the input text into clean sentences/lines.
8
+ """
9
+ lines = text.strip().split(".")
10
+ return [line.strip() for line in lines if line.strip()]
11
+
12
+ def detect_ai_text(text: str) -> dict:
13
+ """
14
+ Detects AI-generated content in the input text.
15
+ Returns overall_ai_score, flagged_sentences, and highlighted_html.
16
+ """
17
+ lines = clean_text(text)
18
+ flagged_sentences = []
19
+ total_ai_score = 0.0
20
+ highlighted_html = ""
21
+
22
+ for line in lines:
23
+ try:
24
+ result = detector(line)[0]
25
+ label = result["label"]
26
+ score = result["score"]
27
+
28
+ sentence_html = line
29
+ if label == "LABEL_1" or label.lower() == "fake":
30
+ flagged_sentences.append({
31
+ "sentence": line,
32
+ "ai_likelihood": round(score, 2)
33
+ })
34
+ total_ai_score += score
35
+
36
+ # Highlight based on score
37
+ if score > 0.7:
38
+ sentence_html = f'<span class="bg-red-500/20 border-b-2 border-red-500 rounded px-1">{line}</span>'
39
+ elif score > 0.4:
40
+ sentence_html = f'<span class="bg-yellow-500/20 border-b-2 border-yellow-500 rounded px-1">{line}</span>'
41
+ else:
42
+ total_ai_score += score # optional: to keep normalization balanced
43
+
44
+ highlighted_html += sentence_html + ". "
45
+
46
+ except Exception as e:
47
+ print(f"Error analyzing line: {line[:30]}... | Error: {e}")
48
+ highlighted_html += line + ". "
49
+
50
+ overall_ai_score = round(total_ai_score / len(lines), 2) if lines else 0.0
51
+
52
+ return {
53
+ "overall_ai_score": overall_ai_score,
54
+ "flagged_sentences": flagged_sentences,
55
+ "highlighted_html": highlighted_html.strip()
56
+ }
app/services/drift.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+
3
+ def slight_semantic_drift(text: str) -> str:
4
+ edits = [
5
+ lambda t: t.replace("very", "extremely"),
6
+ lambda t: t.replace("important", "crucial"),
7
+ lambda t: t.replace(",", ", "),
8
+ lambda t: t.replace("has become", "is now"),
9
+ lambda t: t.replace("In today's", "Nowadays,"),
10
+ ]
11
+ for func in random.sample(edits, k=min(2, len(edits))):
12
+ text = func(text)
13
+ return text
app/services/grammar.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ import language_tool_python
2
+
3
+ tool = language_tool_python.LanguageToolPublicAPI('en-US')
4
+
5
+ def correct_grammar(text: str) -> str:
6
+ matches = tool.check(text)
7
+ corrected_text = language_tool_python.utils.correct(text, matches)
8
+ return corrected_text
app/services/rephraser.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
2
+ import torch
3
+ import re
4
+ import random
5
+
6
+ model_name = "humarin/chatgpt_paraphraser_on_T5_base"
7
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
8
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
9
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device)
10
+
11
+ def post_process_humanize(text: str) -> str:
12
+ # Step 1: Remove generic AI prefixes
13
+ generic_starts = [
14
+ "the following text is",
15
+ "here is a",
16
+ "this is the",
17
+ "paraphrased version",
18
+ "rephrased version",
19
+ "reworded version",
20
+ "rewritten version"
21
+ ]
22
+ for start in generic_starts:
23
+ if text.lower().startswith(start):
24
+ text = text[len(start):].strip(" :.-\"\n")
25
+
26
+ # Step 2: Light contractions and filler words
27
+ replacements = {
28
+ "do not": "don't",
29
+ "cannot": "can't",
30
+ "will not": "won't",
31
+ "should not": "shouldn't",
32
+ "has not": "hasn't",
33
+ "have not": "haven't",
34
+ "it is": "it's",
35
+ "we are": "we're",
36
+ "you are": "you're",
37
+ "they are": "they're",
38
+ "I am": "I'm",
39
+ "There is": "There's",
40
+ "That is": "That's",
41
+ "because": "since",
42
+ }
43
+
44
+ for orig, repl in replacements.items():
45
+ text = re.sub(rf"\b{orig}\b", repl, text, flags=re.IGNORECASE)
46
+
47
+ # Step 3: Add human filler expressions in safe spots
48
+ softeners = ["frankly", "actually", "honestly", "to be fair", "in fact", "well"]
49
+ if random.random() < 0.5:
50
+ sentences = re.split(r'(?<=[.!?]) +', text)
51
+ if len(sentences) > 1:
52
+ insert_idx = random.randint(0, len(sentences) - 2)
53
+ sentences[insert_idx] += f", {random.choice(softeners)}"
54
+ text = " ".join(sentences)
55
+
56
+ return text.strip()
57
+
58
+ def rephrase_text(text: str, tone: str = "general") -> str:
59
+ if tone == "general":
60
+ prompt = f"paraphrase: {text} </s>"
61
+ else:
62
+ prompt = f"Rephrase in a {tone} tone without introductory phrases:\n{text} </s>"
63
+
64
+ inputs = tokenizer([prompt], return_tensors="pt", padding=True, truncation=True).to(device)
65
+
66
+ outputs = model.generate(
67
+ **inputs,
68
+ do_sample=True,
69
+ top_k=50,
70
+ top_p=0.92,
71
+ temperature=0.8,
72
+ max_length=1024,
73
+ repetition_penalty=1.1,
74
+ early_stopping=True
75
+ )
76
+
77
+ result = tokenizer.decode(outputs[0], skip_special_tokens=True)
78
+
79
+ # Post-process to humanize
80
+ result = post_process_humanize(result)
81
+ return result
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ transformers
4
+ torch
5
+ pydantic
6
+ protobuf
7
+ huggingface-hub
8
+ language_tool_python
start.sh ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+
2
+ uvicorn app.main:app --reload --host 0.0.0.0 --port 8000