Spaces:

pmolchanov
/

Hymba-chat

Paused

App Files Files Community

Hymba-chat / app.py

pmolchanov

Update app.py

0c8dc33 verified 11 months ago

raw

history blame

1.87 kB

	import gradio as gr

	from transformers import AutoModelForCausalLM, AutoTokenizer, StopStringCriteria, StoppingCriteriaList
	import torch

	import subprocess
	# useradd -m -u 1000 user
	subprocess.run(['useradd', '-m', "-u", "1000", "user" ])


	import torch._dynamo
	torch._dynamo.config.suppress_errors = True

	import os
	# import pwd
	# print("HERE will print PWD")
	# print(pwd.getpwuid(os.getuid())[0])
	# os.system("nvidia-smi")
	# print("TORCH_CUDA", torch.cuda.is_available())


	print("loading model")
	# Load the tokenizer and model
	repo_name = "nvidia/Hymba-1.5B-Instruct"
	# repo_name = "HuggingFaceTB/SmolLM2-1.7B-Instruct"

	tokenizer = AutoTokenizer.from_pretrained(repo_name, trust_remote_code=True)
	model = AutoModelForCausalLM.from_pretrained(repo_name, trust_remote_code=True)

	model = model.cuda().to(torch.bfloat16)

	print("model is loaded")


	# Chat with Hymba
	# prompt = input()
	prompt = "Who are you?"

	messages = [
	{"role": "system", "content": "You are a helpful assistant."}
	]
	messages.append({"role": "user", "content": prompt})

	# Apply chat template
	tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to('cuda')
	stopping_criteria = StoppingCriteriaList([StopStringCriteria(tokenizer=tokenizer, stop_strings="</s>")])

	print("generating prompt")


	outputs = model.generate(
	tokenized_chat,
	max_new_tokens=256,
	do_sample=False,
	temperature=0.7,
	use_cache=True,
	stopping_criteria=stopping_criteria
	)
	input_length = tokenized_chat.shape[1]
	response = tokenizer.decode(outputs[0][input_length:], skip_special_tokens=True)

	print(f"Model response: {response}")



	def greet(name):
	print(f"User: prompt")
	print(f"Model response: {response}")
	# return "Hello " + name + "!!"

	demo = gr.Interface(fn=greet, inputs="text", outputs="text")
	demo.launch()