Eiad Gomaa commited on
Commit
04b4d4a
·
1 Parent(s): 403eecc
Files changed (2) hide show
  1. app.py +16 -3
  2. oldapp.py +96 -0
app.py CHANGED
@@ -8,6 +8,12 @@ def load_model():
8
  try:
9
  tokenizer = AutoTokenizer.from_pretrained("NousResearch/Llama-3.2-1B")
10
  model = AutoModelForCausalLM.from_pretrained("NousResearch/Llama-3.2-1B")
 
 
 
 
 
 
11
  return model, tokenizer
12
  except Exception as e:
13
  st.error(f"Error loading model: {str(e)}")
@@ -29,7 +35,13 @@ def generate_response(prompt):
29
  """Generate response from the model"""
30
  try:
31
  # Prepare the input
32
- inputs = tokenizer(prompt, return_tensors="pt", padding=True)
 
 
 
 
 
 
33
 
34
  # Generate response
35
  with torch.no_grad():
@@ -38,12 +50,13 @@ def generate_response(prompt):
38
  max_length=200,
39
  num_return_sequences=1,
40
  temperature=0.7,
41
- pad_token_id=tokenizer.eos_token_id
 
42
  )
43
 
44
  # Decode and return the response
45
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
46
- return response
47
  except Exception as e:
48
  return f"Error generating response: {str(e)}"
49
 
 
8
  try:
9
  tokenizer = AutoTokenizer.from_pretrained("NousResearch/Llama-3.2-1B")
10
  model = AutoModelForCausalLM.from_pretrained("NousResearch/Llama-3.2-1B")
11
+
12
+ # Set up padding token
13
+ if tokenizer.pad_token is None:
14
+ tokenizer.pad_token = tokenizer.eos_token
15
+ model.config.pad_token_id = model.config.eos_token_id
16
+
17
  return model, tokenizer
18
  except Exception as e:
19
  st.error(f"Error loading model: {str(e)}")
 
35
  """Generate response from the model"""
36
  try:
37
  # Prepare the input
38
+ inputs = tokenizer(
39
+ prompt,
40
+ return_tensors="pt",
41
+ padding=True,
42
+ truncation=True,
43
+ max_length=512 # Add max length for input
44
+ )
45
 
46
  # Generate response
47
  with torch.no_grad():
 
50
  max_length=200,
51
  num_return_sequences=1,
52
  temperature=0.7,
53
+ pad_token_id=tokenizer.pad_token_id,
54
+ attention_mask=inputs["attention_mask"] # Add attention mask
55
  )
56
 
57
  # Decode and return the response
58
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
59
+ return response.replace(prompt, "").strip() # Remove the input prompt from response
60
  except Exception as e:
61
  return f"Error generating response: {str(e)}"
62
 
oldapp.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
3
+ import torch
4
+
5
+ @st.cache_resource
6
+ def load_model():
7
+ """Load model and tokenizer with caching"""
8
+ try:
9
+ tokenizer = AutoTokenizer.from_pretrained("NousResearch/Llama-3.2-1B")
10
+ model = AutoModelForCausalLM.from_pretrained("NousResearch/Llama-3.2-1B")
11
+ return model, tokenizer
12
+ except Exception as e:
13
+ st.error(f"Error loading model: {str(e)}")
14
+ return None, None
15
+
16
+ # Page config
17
+ st.set_page_config(page_title="Chat with Quasar-32B", layout="wide")
18
+ st.title("Chat with Quasar-32B")
19
+
20
+ # Initialize session state for chat history
21
+ if 'messages' not in st.session_state:
22
+ st.session_state.messages = []
23
+
24
+ # Load model and tokenizer
25
+ model, tokenizer = load_model()
26
+
27
+ # Chat interface
28
+ def generate_response(prompt):
29
+ """Generate response from the model"""
30
+ try:
31
+ # Prepare the input
32
+ inputs = tokenizer(prompt, return_tensors="pt", padding=True)
33
+
34
+ # Generate response
35
+ with torch.no_grad():
36
+ outputs = model.generate(
37
+ inputs["input_ids"],
38
+ max_length=200,
39
+ num_return_sequences=1,
40
+ temperature=0.7,
41
+ pad_token_id=tokenizer.eos_token_id
42
+ )
43
+
44
+ # Decode and return the response
45
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
46
+ return response
47
+ except Exception as e:
48
+ return f"Error generating response: {str(e)}"
49
+
50
+ # Chat interface
51
+ st.write("### Chat")
52
+ chat_container = st.container()
53
+
54
+ # Display chat history
55
+ with chat_container:
56
+ for message in st.session_state.messages:
57
+ with st.chat_message(message["role"]):
58
+ st.write(message["content"])
59
+
60
+ # User input
61
+ if prompt := st.chat_input("Type your message here"):
62
+ # Add user message to chat history
63
+ st.session_state.messages.append({"role": "user", "content": prompt})
64
+
65
+ # Display user message
66
+ with chat_container:
67
+ with st.chat_message("user"):
68
+ st.write(prompt)
69
+
70
+ # Generate and display assistant response
71
+ if model and tokenizer:
72
+ with st.chat_message("assistant"):
73
+ with st.spinner("Thinking..."):
74
+ response = generate_response(prompt)
75
+ st.write(response)
76
+ st.session_state.messages.append({"role": "assistant", "content": response})
77
+ else:
78
+ st.error("Model failed to load. Please check your configuration.")
79
+
80
+ # Add a button to clear chat history
81
+ if st.button("Clear Chat History"):
82
+ st.session_state.messages = []
83
+ st.experimental_rerun()
84
+
85
+ # Display system information
86
+ with st.sidebar:
87
+ st.write("### System Information")
88
+ st.write("Model: Quasar-32B")
89
+ st.write("Status: Running" if model and tokenizer else "Status: Not loaded")
90
+
91
+ # Add some helpful instructions
92
+ st.write("### Instructions")
93
+ st.write("1. Type your message in the chat input")
94
+ st.write("2. Press Enter or click Send")
95
+ st.write("3. Wait for the AI to respond")
96
+ st.write("4. Use 'Clear Chat History' to start fresh")