import os os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf-cache" os.environ["HF_HOME"] = "/tmp/hf-home" from fastapi import FastAPI, Request from transformers import AutoModelForSequenceClassification, AutoTokenizer, AutoConfig from scipy.special import softmax import numpy as np app = FastAPI() MODEL = "cardiffnlp/twitter-roberta-base-sentiment-latest" # Load model and tokenizer tokenizer = AutoTokenizer.from_pretrained(MODEL) config = AutoConfig.from_pretrained(MODEL) model = AutoModelForSequenceClassification.from_pretrained(MODEL) # Preprocessing step for Twitter-style input def preprocess(text): tokens = [] for t in text.split(): if t.startswith("@") and len(t) > 1: t = "@user" elif t.startswith("http"): t = "http" tokens.append(t) return " ".join(tokens) @app.post("/analyze") async def analyze(request: Request): data = await request.json() raw_text = data.get("text", "") # Logging for debugging print(f"Raw input: {raw_text}") if not raw_text.strip(): return {"error": "Empty input text."} text = preprocess(raw_text) print(f"Preprocessed: {text}") encoded_input = tokenizer(text, return_tensors='pt', truncation=True, padding=True) print(f"Encoded input: {encoded_input.input_ids}") output = model(**encoded_input) scores = output[0][0].detach().numpy() probs = softmax(scores) # Logging output print(f"Raw scores: {scores}") print(f"Softmax probs: {probs}") result = [ {"label": config.id2label[i], "score": round(float(probs[i]), 4)} for i in probs.argsort()[::-1] ] print(f"Result: {result}") return {"result": result}