Spaces:

asmashayea
/

absa-app

Running

absa-app / inference.py

d472db4 about 8 hours ago

5.54 kB

	import torch
	import json
	from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModel, AutoConfig
	from peft import LoraConfig, get_peft_model, PeftModel
	from modeling_bilstm_crf import BERT_BiLSTM_CRF
	from generative_inference import infer_t5_bart, infer_gpt_absa, infer_deepseek, infer_allam
	from huggingface_hub import hf_hub_download

	# Define supported models and their adapter IDs
	MODEL_OPTIONS = {
	"Araberta": {
	"base": "asmashayea/absa-araberta",
	"adapter": "asmashayea/absa-araberta"
	},
	"mT5": {
	"base": "google/mt5-base",
	"adapter": "asmashayea/mt4-absa"
	},
	"mBART": {
	"base": "facebook/mbart-large-50-many-to-many-mmt",
	"adapter": "asmashayea/mbart-absa"
	},
	"GPT3.5": {"base": "openai/gpt-3.5-turbo",
	"model_id": "ft:gpt-3.5-turbo-0125:asma:gpt-3-5-turbo-absa:Bb6gmwkE"},
	"GPT4o": {"base": "openai/gpt-4o",
	"model_id": "ft:gpt-4o-mini-2024-07-18:asma:gpt4-finetune-absa:BazoEjnp"},
	"ALLaM": {
	"base": "ALLaM-AI/ALLaM-7B-Instruct-preview",
	"adapter": "asmashayea/allam-absa"
	},
	"DeepSeek": {
	"base": "deepseek-ai/deepseek-llm-7b-chat",
	"adapter": "asmashayea/deepseek-absa"
	}
	}


	cached_models = {}

	def load_araberta():
	path = "asmashayea/absa-arabert"

	tokenizer = AutoTokenizer.from_pretrained(path)

	base_model = AutoModel.from_pretrained(path)
	# lora_config = LoraConfig.from_pretrained(path)
	# lora_model = get_peft_model(base_model, lora_config)


	base_model = AutoModel.from_pretrained(path)
	lora_model = PeftModel.from_pretrained(base_model, path)

	local_pt = hf_hub_download(repo_id="asmashayea/absa-arabert", filename="bilstm_crf_head.pt")

	config = AutoConfig.from_pretrained(path)
	model = BERT_BiLSTM_CRF(lora_model, config)
	model.load_state_dict(torch.load(local_pt, map_location=torch.device("cpu")))

	# model.load_state_dict(torch.load(local_pt))
	model.eval()

	cached_models["Araberta"] = (tokenizer, model)
	return tokenizer, model


	def infer_araberta(text):
	if "Araberta" not in cached_models:
	tokenizer, model = load_araberta()
	else:
	tokenizer, model = cached_models["Araberta"]

	device = next(model.parameters()).device

	inputs = tokenizer(text, return_tensors='pt', truncation=True, padding='max_length', max_length=128)
	input_ids = inputs['input_ids'].to(device)
	attention_mask = inputs['attention_mask'].to(device)

	with torch.no_grad():
	outputs = model(input_ids=input_ids, attention_mask=attention_mask)
	predicted_ids = outputs['logits'][0].cpu().tolist()

	tokens = tokenizer.convert_ids_to_tokens(input_ids[0].cpu())
	predicted_labels = [model.id2label.get(p, 'O') for p in predicted_ids]

	clean_tokens = [t for t in tokens if t not in tokenizer.all_special_tokens]
	clean_labels = [l for t, l in zip(tokens, predicted_labels) if t not in tokenizer.all_special_tokens]

	# ✅ New: map short to full sentiment
	sentiment_map = {
	"POS": "positive",
	"NEG": "negative",
	"NEU": "neutral"
	}

	aspects = []
	current_tokens = []
	current_sentiment = None

	for token, label in zip(clean_tokens, clean_labels):
	if label.startswith("B-"):
	if current_tokens:
	aspects.append({
	"aspect": " ".join(current_tokens).replace("##", ""),
	"sentiment": sentiment_map.get(current_sentiment, current_sentiment)
	})
	current_tokens = [token]
	current_sentiment = label.split("-")[1]
	elif label.startswith("I-") and current_sentiment == label.split("-")[1]:
	current_tokens.append(token)
	else:
	if current_tokens:
	aspects.append({
	"aspect": " ".join(current_tokens).replace("##", ""),
	"sentiment": sentiment_map.get(current_sentiment, current_sentiment)
	})
	current_tokens = []
	current_sentiment = None

	if current_tokens:
	aspects.append({
	"aspect": " ".join(current_tokens).replace("##", ""),
	"sentiment": sentiment_map.get(current_sentiment, current_sentiment)
	})

	token_predictions = []
	merged_token = ""
	merged_label = None

	for token, label in zip(clean_tokens, clean_labels):
	if token.startswith("##"):
	merged_token += token[2:]
	else:
	if merged_token:
	token_predictions.append({
	"token": merged_token,
	"label": merged_label
	})
	merged_token = token
	merged_label = label

	# Add last token
	if merged_token:
	token_predictions.append({
	"token": merged_token,
	"label": merged_label
	})


	return {
	"aspects": aspects,
	"token_predictions": token_predictions
	}



	def predict_absa(text, model_choice):


	if model_choice in ['mT5', 'mBART']:
	decoded = infer_t5_bart(text, model_choice)

	elif model_choice == 'Araberta':

	decoded = infer_araberta(text)

	elif model_choice == 'GPT3.5' or model_choice == 'GPT4o':
	decoded = infer_gpt_absa(text, model_choice)

	elif model_choice == "DeepSeek":
	return infer_deepseek(text)

	elif model_choice == "ALLaM":
	return infer_allam(text)

	return decoded