Spaces:
Sleeping
Sleeping
import os | |
os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf-cache" | |
os.environ["HF_HOME"] = "/tmp/hf-home" | |
from fastapi import FastAPI, Request | |
from transformers import AutoModelForSequenceClassification, AutoTokenizer, AutoConfig | |
from scipy.special import softmax | |
import numpy as np | |
app = FastAPI() | |
MODEL = "cardiffnlp/twitter-roberta-base-sentiment-latest" | |
# Load model and tokenizer | |
tokenizer = AutoTokenizer.from_pretrained(MODEL) | |
config = AutoConfig.from_pretrained(MODEL) | |
model = AutoModelForSequenceClassification.from_pretrained(MODEL) | |
# Preprocessing step for Twitter-style input | |
def preprocess(text): | |
tokens = [] | |
for t in text.split(): | |
if t.startswith("@") and len(t) > 1: | |
t = "@user" | |
elif t.startswith("http"): | |
t = "http" | |
tokens.append(t) | |
return " ".join(tokens) | |
async def analyze(request: Request): | |
data = await request.json() | |
raw_text = data.get("text", "") | |
# Logging for debugging | |
print(f"Raw input: {raw_text}") | |
if not raw_text.strip(): | |
return {"error": "Empty input text."} | |
text = preprocess(raw_text) | |
print(f"Preprocessed: {text}") | |
encoded_input = tokenizer(text, return_tensors='pt', truncation=True, padding=True) | |
print(f"Encoded input: {encoded_input.input_ids}") | |
output = model(**encoded_input) | |
scores = output[0][0].detach().numpy() | |
probs = softmax(scores) | |
# Logging output | |
print(f"Raw scores: {scores}") | |
print(f"Softmax probs: {probs}") | |
result = [ | |
{"label": config.id2label[i], "score": round(float(probs[i]), 4)} | |
for i in probs.argsort()[::-1] | |
] | |
print(f"Result: {result}") | |
return {"result": result} | |