from fastapi import FastAPI, Request from transformers import AutoModelForSequenceClassification, AutoConfig, RobertaTokenizer from scipy.special import softmax import numpy as np import os app = FastAPI() # Set HF cache and home directory to writable path os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf-cache" os.environ["HF_HOME"] = "/tmp/hf-home" # Model and tokenizer setup MODEL = "cardiffnlp/twitter-roberta-base-sentiment-latest" TOKENIZER_MODEL = "cardiffnlp/twitter-roberta-base-sentiment" tokenizer = RobertaTokenizer.from_pretrained(TOKENIZER_MODEL) config = AutoConfig.from_pretrained(MODEL) model = AutoModelForSequenceClassification.from_pretrained(MODEL) # Preprocessing def preprocess(text): tokens = [] for t in text.split(): if t.startswith("@") and len(t) > 1: t = "@user" elif t.startswith("http"): t = "http" tokens.append(t) return " ".join(tokens) # Endpoint @app.post("/analyze") async def analyze(request: Request): data = await request.json() text = preprocess(data.get("text", "")) encoded_input = tokenizer(text, return_tensors='pt') output = model(**encoded_input) scores = output[0][0].detach().numpy() scores = softmax(scores) ranking = np.argsort(scores)[::-1] result = [] for i in ranking: label = config.id2label[i] score = round(float(scores[i]), 4) result.append({"label": label, "score": score}) return {"result": result}