Spaces:

Avinash109
/

llama

Sleeping

App Files Files Community

Avinash109 commited on Nov 16, 2024

Commit

be7bbc3

verified ·

1 Parent(s): e8acedf

Update app.py

Browse files

Files changed (1) hide show

app.py +56 -82

app.py CHANGED Viewed

@@ -1,9 +1,6 @@
 import streamlit as st
-import requests
-import json
 import datetime
 import tempfile
-import subprocess
 import black
 from streamlit_ace import st_ace
 from streamlit_extras.colored_header import colored_header
@@ -11,18 +8,17 @@ from streamlit_extras.add_vertical_space import add_vertical_space
 import re
 from typing import Optional, Dict, List
 import ast
-def get_ollama_models():
-    """Fetch available models from Ollama"""
-    try:
-        response = requests.get("http://localhost:11434/api/tags")
-        if response.status_code == 200:
-            models = [model["name"] for model in response.json()["models"]]
-            return models
-        return []
-    except Exception as e:
-        st.error(f"Error fetching models: {str(e)}")
-        return []
 def clear_chat():
     """Clear the chat history"""
@@ -50,34 +46,33 @@ def handle_file_upload():
                 return "Binary file uploaded"
     return None
-def generate_response(prompt, model, temperature, max_tokens, system_prompt, stream=False):
-    """Generate response from Ollama model with parameters"""
-    url = "http://localhost:11434/api/generate"
-    data = {
-        "model": model,
-        "prompt": prompt,
-        "system": system_prompt,
-        "temperature": temperature,
-        "max_tokens": max_tokens,
-        "stream": stream
-    }
     try:
-        if stream:
-            response_text = ""
-            with requests.post(url, json=data, stream=True) as response:
-                for line in response.iter_lines():
-                    if line:
-                        json_response = json.loads(line)
-                        response_text += json_response.get("response", "")
-                        yield response_text
-        else:
-            response = requests.post(url, json=data)
-            if response.status_code == 200:
-                return response.json()["response"]
-            else:
-                st.error(f"Error {response.status_code}: {response.text}")
-                return "Error: Unable to get response from the model."
     except Exception as e:
         st.error(f"Error generating response: {str(e)}")
         return f"Error: {str(e)}"
@@ -122,8 +117,8 @@ class CodeAnalyzer:
         return context
 class CodeCompletion:
-    def __init__(self, model: str):
-        self.model = model
     def get_completion_suggestions(self, code: str, context: Dict) -> str:
         """Generate code completion suggestions based on context"""
@@ -138,20 +133,16 @@ Classes: {', '.join(context['classes'])}
 Please complete or continue this code in a natural way."""
-        response = generate_response(prompt, self.model, 0.3, 500,
-                                  "You are a Python coding assistant. Provide only code completion, no explanations.")
-        return response
-def handle_code_continuation(incomplete_code: str, model: str) -> str:
     """Handle continuation of incomplete code"""
     prompt = f"""Complete the following Python code:
 {incomplete_code}
 Provide only the completion part that would make this code syntactically complete and logical."""
-    response = generate_response(prompt, model, 0.3, 500,
-                               "You are a Python coding assistant. Complete the code naturally.")
-    return response
 def format_code(code: str) -> str:
     """Format Python code using black"""
@@ -184,7 +175,7 @@ def init_session_state():
 def setup_page_config():
     """Setup page configuration and styling"""
     st.set_page_config(
-        page_title="Enhanced Ollama Chat",
         page_icon="🤖",
         layout="wide",
         initial_sidebar_state="expanded"
@@ -192,14 +183,12 @@ def setup_page_config():
     st.markdown("""
     <style>
-        /* Main container styling */
         .main {
             max-width: 1200px;
             margin: 0 auto;
             padding: 2rem;
         }
-        /* Message container styling */
         .stChatMessage {
             background-color: #ffffff;
             border-radius: 8px;
@@ -208,7 +197,6 @@ def setup_page_config():
             box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
         }
-        /* Chat input styling */
         .stChatInputContainer {
             border-radius: 8px;
             border: 1px solid #e0e0e0;
@@ -216,14 +204,12 @@ def setup_page_config():
             background-color: #ffffff;
         }
-        /* Code editor styling */
         .code-editor {
             border-radius: 8px;
             margin: 1rem 0;
             border: 1px solid #e0e0e0;
         }
-        /* Code snippet container */
         .code-snippet {
             background-color: #f8fafc;
             padding: 1rem;
@@ -231,7 +217,6 @@ def setup_page_config():
             margin: 0.5rem 0;
         }
-        /* Code completion suggestions */
         .completion-suggestion {
             background-color: #f1f5f9;
             padding: 0.5rem;
@@ -267,7 +252,7 @@ def code_editor_section():
             if code_content:
                 code_analyzer = CodeAnalyzer()
                 context = code_analyzer.get_context(code_content)
-                completion = CodeCompletion(st.session_state.selected_model)
                 suggestions = completion.get_completion_suggestions(code_content, context)
                 st.code(suggestions, language="python")
@@ -276,29 +261,24 @@ def main():
     setup_page_config()
     init_session_state()
     # Sidebar configuration
     with st.sidebar:
         colored_header(label="Model Settings", description="Configure your chat parameters", color_name="blue-70")
-        available_models = get_ollama_models()
-        if not available_models:
-            st.error("⚠️ No Ollama models found. Please make sure Ollama is running and models are installed.")
-            st.stop()
-        selected_model = st.selectbox("Choose a model", available_models, index=0 if available_models else None)
-        st.session_state.selected_model = selected_model
         with st.expander("Advanced Settings", expanded=False):
             temperature = st.slider("Temperature", 0.0, 2.0, 0.7, 0.1)
             max_tokens = st.number_input("Max Tokens", 50, 4096, 2048)
             system_prompt = st.text_area("System Prompt", st.session_state.system_prompt)
-            stream_output = st.checkbox("Stream Output", value=True)
         if st.button("Clear Chat"):
             clear_chat()
-    st.title("🤖 Enhanced Ollama Chat")
-    st.caption(f"Currently using: {selected_model}")
     # Main interface tabs
     tab1, tab2 = st.tabs(["Chat", "Code Editor"])
@@ -324,32 +304,26 @@ def main():
                         if not CodeAnalyzer.is_code_complete(code):
                             st.info("This code block appears to be incomplete. Would you like to complete it?")
                             if st.button("Complete Code", key=f"complete_{len(code)}"):
-                                completion = handle_code_continuation(code, selected_model)
                                 st.code(completion, language="python")
         # Chat input
-        if prompt := st.chat_input("Message (use @ to attach a file, / for commands)"):
             with st.chat_message("user"):
                 st.markdown(prompt)
             st.session_state.messages.append({"role": "user", "content": prompt})
             with st.chat_message("assistant"):
-                if stream_output:
-                    message_placeholder = st.empty()
-                    for response in generate_response(prompt, selected_model, temperature, max_tokens, system_prompt, stream=True):
-                        message_placeholder.markdown(response)
-                    final_response = response
-                else:
-                    with st.spinner("Thinking..."):
-                        final_response = generate_response(prompt, selected_model, temperature, max_tokens, system_prompt, stream=False)
-                        st.markdown(final_response)
                 # Store code blocks in context
-                code_blocks = CodeAnalyzer.extract_code_blocks(final_response)
                 if code_blocks:
                     st.session_state.last_code_state = code_blocks[-1]
-            st.session_state.messages.append({"role": "assistant", "content": final_response})
     with tab2:
         code_editor_section()
@@ -357,7 +331,7 @@ def main():
     # Footer
     add_vertical_space(2)
     st.markdown("---")
-    st.markdown("Made with ❤️ using Streamlit and Ollama")
 if __name__ == "__main__":
     main()

 import streamlit as st
 import datetime
 import tempfile
 import black
 from streamlit_ace import st_ace
 from streamlit_extras.colored_header import colored_header
 import re
 from typing import Optional, Dict, List
 import ast
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import torch
+# Initialize model and tokenizer globally
+@st.cache_resource
+def load_model_and_tokenizer():
+    tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-Coder-32B-Instruct")
+    model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-Coder-32B-Instruct")
+    if torch.cuda.is_available():
+        model = model.to("cuda")
+    return model, tokenizer
 def clear_chat():
     """Clear the chat history"""
                 return "Binary file uploaded"
     return None
+def generate_response(prompt: str, temperature: float, max_tokens: int, system_prompt: str) -> str:
+    """Generate response using the Qwen model"""
+    model, tokenizer = load_model_and_tokenizer()
+    # Format the input with system prompt
+    full_prompt = f"System: {system_prompt}\n\nUser: {prompt}\n\nAssistant:"
     try:
+        inputs = tokenizer(full_prompt, return_tensors="pt", padding=True)
+        if torch.cuda.is_available():
+            inputs = {k: v.to("cuda") for k, v in inputs.items()}
+        # Generate response
+        outputs = model.generate(
+            **inputs,
+            max_new_tokens=max_tokens,
+            temperature=temperature,
+            do_sample=True,
+            pad_token_id=tokenizer.pad_token_id
+        )
+        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        # Extract assistant's response
+        response = response.split("Assistant:")[-1].strip()
+        return response
     except Exception as e:
         st.error(f"Error generating response: {str(e)}")
         return f"Error: {str(e)}"
         return context
 class CodeCompletion:
+    def __init__(self):
+        pass
     def get_completion_suggestions(self, code: str, context: Dict) -> str:
         """Generate code completion suggestions based on context"""
 Please complete or continue this code in a natural way."""
+        return generate_response(prompt, 0.3, 500, "You are a Python coding assistant. Provide only code completion, no explanations.")
+def handle_code_continuation(incomplete_code: str) -> str:
     """Handle continuation of incomplete code"""
     prompt = f"""Complete the following Python code:
 {incomplete_code}
 Provide only the completion part that would make this code syntactically complete and logical."""
+    return generate_response(prompt, 0.3, 500, "You are a Python coding assistant. Complete the code naturally.")
 def format_code(code: str) -> str:
     """Format Python code using black"""
 def setup_page_config():
     """Setup page configuration and styling"""
     st.set_page_config(
+        page_title="Qwen Coder Chat",
         page_icon="🤖",
         layout="wide",
         initial_sidebar_state="expanded"
     st.markdown("""
     <style>
         .main {
             max-width: 1200px;
             margin: 0 auto;
             padding: 2rem;
         }
         .stChatMessage {
             background-color: #ffffff;
             border-radius: 8px;
             box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
         }
         .stChatInputContainer {
             border-radius: 8px;
             border: 1px solid #e0e0e0;
             background-color: #ffffff;
         }
         .code-editor {
             border-radius: 8px;
             margin: 1rem 0;
             border: 1px solid #e0e0e0;
         }
         .code-snippet {
             background-color: #f8fafc;
             padding: 1rem;
             margin: 0.5rem 0;
         }
         .completion-suggestion {
             background-color: #f1f5f9;
             padding: 0.5rem;
             if code_content:
                 code_analyzer = CodeAnalyzer()
                 context = code_analyzer.get_context(code_content)
+                completion = CodeCompletion()
                 suggestions = completion.get_completion_suggestions(code_content, context)
                 st.code(suggestions, language="python")
     setup_page_config()
     init_session_state()
+    # Initialize model
+    with st.spinner("Loading Qwen2.5-Coder model..."):
+        load_model_and_tokenizer()
     # Sidebar configuration
     with st.sidebar:
         colored_header(label="Model Settings", description="Configure your chat parameters", color_name="blue-70")
         with st.expander("Advanced Settings", expanded=False):
             temperature = st.slider("Temperature", 0.0, 2.0, 0.7, 0.1)
             max_tokens = st.number_input("Max Tokens", 50, 4096, 2048)
             system_prompt = st.text_area("System Prompt", st.session_state.system_prompt)
         if st.button("Clear Chat"):
             clear_chat()
+    st.title("🤖 Qwen2.5-Coder Chat")
+    st.caption("Powered by Qwen2.5-Coder-32B-Instruct")
     # Main interface tabs
     tab1, tab2 = st.tabs(["Chat", "Code Editor"])
                         if not CodeAnalyzer.is_code_complete(code):
                             st.info("This code block appears to be incomplete. Would you like to complete it?")
                             if st.button("Complete Code", key=f"complete_{len(code)}"):
+                                completion = handle_code_continuation(code)
                                 st.code(completion, language="python")
         # Chat input
+        if prompt := st.chat_input("Message (use @ to attach a file)"):
             with st.chat_message("user"):
                 st.markdown(prompt)
             st.session_state.messages.append({"role": "user", "content": prompt})
             with st.chat_message("assistant"):
+                with st.spinner("Thinking..."):
+                    response = generate_response(prompt, temperature, max_tokens, system_prompt)
+                    st.markdown(response)
                 # Store code blocks in context
+                code_blocks = CodeAnalyzer.extract_code_blocks(response)
                 if code_blocks:
                     st.session_state.last_code_state = code_blocks[-1]
+            st.session_state.messages.append({"role": "assistant", "content": response})
     with tab2:
         code_editor_section()
     # Footer
     add_vertical_space(2)
     st.markdown("---")
+    st.markdown("Made with ❤️ using Streamlit and Qwen2.5-Coder")
 if __name__ == "__main__":
     main()