Spaces:

eduardmihai
/

chatbot2

Sleeping

chatbot2 / app.py

new model

a41502d 4 months ago

1.06 kB

	import gradio as gr
	from transformers import AutoModelForCausalLM, AutoTokenizer

	model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForCausalLM.from_pretrained(model_name)

	def generate(prompt):
	inputs = tokenizer(prompt, return_tensors="pt")
	outputs = model.generate(**inputs, max_new_tokens=200)
	return tokenizer.decode(outputs[0], skip_special_tokens=True)

	gr.Interface(fn=generate, inputs="text", outputs="text").launch()

	# import gradio as gr
	# from llama_cpp import Llama

	# # Use the quantized model file path
	# model_path = "MegaTom/TinyLlama-1.1B-Chat-v1.0-Q4_K_M-GGUF" # Use your actual path to the quantized model

	# # Load the quantized model
	# llm = Llama(model_path=model_path)

	# # Function to generate text using the model
	# def generate(prompt):
	# # Generate the response
	# output = llm(prompt, max_tokens=50)
	# return output['choices'][0]['text']

	# # Set up the Gradio interface
	# gr.Interface(fn=generate, inputs="text", outputs="text").launch()