Spaces:
Runtime error
Runtime error
import streamlit as st | |
import torch | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
# Define the model and tokenizer | |
model_id = "Writer/Palmyra-Med-70B-32k" | |
def load_model(): | |
tokenizer = AutoTokenizer.from_pretrained(model_id) | |
model = AutoModelForCausalLM.from_pretrained( | |
model_id, | |
torch_dtype=torch.float16, | |
device_map="auto", | |
attn_implementation="flash_attention_2", | |
) | |
return tokenizer, model | |
tokenizer, model = load_model() | |
# Define Streamlit app | |
st.title("Medical Query Model") | |
st.write( | |
"You are interacting with a highly knowledgeable medical model. Enter your medical question below:" | |
) | |
user_input = st.text_area("Your Question") | |
if st.button("Get Response"): | |
if user_input: | |
# Prepare input for the model | |
messages = [ | |
{ | |
"role": "system", | |
"content": "You are a highly knowledgeable and experienced expert in the healthcare and biomedical field, possessing extensive medical knowledge and practical expertise.", | |
}, | |
{ | |
"role": "user", | |
"content": user_input, | |
}, | |
] | |
input_ids = tokenizer.apply_chat_template( | |
messages, tokenize=True, add_generation_prompt=True, return_tensors="pt" | |
) | |
gen_conf = { | |
"max_new_tokens": 256, | |
"eos_token_id": [tokenizer.eos_token_id, tokenizer.convert_tokens_to_ids("")], | |
"temperature": 0.0, | |
"top_p": 0.9, | |
} | |
# Generate response | |
with torch.no_grad(): | |
output_id = model.generate(input_ids, **gen_conf) | |
output_text = tokenizer.decode(output_id[0][input_ids.shape[1]:], skip_special_tokens=True) | |
st.write("Response:") | |
st.write(output_text) | |
else: | |
st.warning("Please enter a question.") | |