Mir-2002
/

codet5p-google-style-docstrings

Model card Files Files and versions Community

codet5p-google-style-docstrings / handler.py

Mir-2002's picture

added handler for inference endpoint

a81cb37 about 2 months ago

history blame contribute delete

2.36 kB

	from typing import Any, Dict, List
	from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
	import torch

	MAX_INPUT_LENGTH = 256
	MAX_OUTPUT_LENGTH = 128

	class EndpointHandler:
	def __init__(self, model_dir: str = "", **kwargs: Any) -> None:
	"""
	Initializes the model and tokenizer when the endpoint starts.
	"""
	self.tokenizer = AutoTokenizer.from_pretrained(model_dir)
	# Assuming you fine-tuned CodeT5+ for a sequence-to-sequence task
	self.model = AutoModelForSeq2SeqLM.from_pretrained(model_dir)
	self.model.eval() # Set model to evaluation mode
	# You might want to move the model to GPU if available
	self.device = "cuda" if torch.cuda.is_available() else "cpu"
	self.model.to(self.device)

	def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
	"""
	Handles incoming inference requests.
	"""
	inputs = data.get("inputs")
	if not inputs:
	raise ValueError("No 'inputs' found in the request data.")

	# Ensure inputs are in a list for batch processing, even if single input
	if isinstance(inputs, str):
	inputs = [inputs]

	# Pre-processing
	# Adjust max_length and padding based on your model's training and task
	tokenized_inputs = self.tokenizer(
	inputs,
	max_length=MAX_INPUT_LENGTH,
	padding=True,
	truncation=True,
	return_tensors="pt"
	).to(self.device)

	# Inference
	with torch.no_grad():
	outputs = self.model.generate(
	tokenized_inputs["input_ids"],
	attention_mask=tokenized_inputs["attention_mask"],
	# Add generation arguments relevant to your task (e.g., max_length, num_beams)
	max_length=MAX_OUTPUT_LENGTH, # Example, adjust as needed
	num_beams=8, # Example, adjust as needed
	no_repeat_ngram_size=3,
	pad_token_id=self.tokenizer.pad_token_id) # Fixed: Added self. before tokenizer

	# Post-processing
	decoded_outputs = self.tokenizer.batch_decode(outputs, skip_special_tokens=True)

	# Format the output as a list of dictionaries
	results = [{"generated_text": text} for text in decoded_outputs]
	return results