Spaces:
Sleeping
Sleeping
from fastapi import FastAPI, Request | |
from transformers import AutoModelForSequenceClassification, AutoConfig, RobertaTokenizer | |
from scipy.special import softmax | |
import numpy as np | |
import os | |
app = FastAPI() | |
# Set HF cache and home directory to writable path | |
os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf-cache" | |
os.environ["HF_HOME"] = "/tmp/hf-home" | |
# Model and tokenizer setup | |
MODEL = "cardiffnlp/twitter-roberta-base-sentiment-latest" | |
TOKENIZER_MODEL = "cardiffnlp/twitter-roberta-base-sentiment" | |
tokenizer = RobertaTokenizer.from_pretrained(TOKENIZER_MODEL) | |
config = AutoConfig.from_pretrained(MODEL) | |
model = AutoModelForSequenceClassification.from_pretrained(MODEL) | |
# Preprocessing | |
def preprocess(text): | |
tokens = [] | |
for t in text.split(): | |
if t.startswith("@") and len(t) > 1: | |
t = "@user" | |
elif t.startswith("http"): | |
t = "http" | |
tokens.append(t) | |
return " ".join(tokens) | |
# Endpoint | |
async def analyze(request: Request): | |
data = await request.json() | |
text = preprocess(data.get("text", "")) | |
encoded_input = tokenizer(text, return_tensors='pt') | |
output = model(**encoded_input) | |
scores = output[0][0].detach().numpy() | |
scores = softmax(scores) | |
ranking = np.argsort(scores)[::-1] | |
result = [] | |
for i in ranking: | |
label = config.id2label[i] | |
score = round(float(scores[i]), 4) | |
result.append({"label": label, "score": score}) | |
return {"result": result} | |