Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModelForCausalLM.from_pretrained(model_name) | |
def generate(prompt): | |
inputs = tokenizer(prompt, return_tensors="pt") | |
outputs = model.generate(**inputs, max_new_tokens=200) | |
return tokenizer.decode(outputs[0], skip_special_tokens=True) | |
gr.Interface(fn=generate, inputs="text", outputs="text").launch() | |
# import gradio as gr | |
# from llama_cpp import Llama | |
# # Use the quantized model file path | |
# model_path = "MegaTom/TinyLlama-1.1B-Chat-v1.0-Q4_K_M-GGUF" # Use your actual path to the quantized model | |
# # Load the quantized model | |
# llm = Llama(model_path=model_path) | |
# # Function to generate text using the model | |
# def generate(prompt): | |
# # Generate the response | |
# output = llm(prompt, max_tokens=50) | |
# return output['choices'][0]['text'] | |
# # Set up the Gradio interface | |
# gr.Interface(fn=generate, inputs="text", outputs="text").launch() | |