Spaces:

amiguel
/

amiguel-fintune_naming_model

Sleeping

App Files Files Community

amiguel commited on Feb 13

Commit

330fc4f

verified ·

1 Parent(s): 94ddbf0

Update app.py

Browse files

Files changed (1) hide show

app.py +157 -67

app.py CHANGED Viewed

@@ -1,81 +1,171 @@
 import streamlit as st
 from transformers import AutoTokenizer, AutoModelForCausalLM
-from PyPDF2 import PdfReader
 import pandas as pd
-import os
-from dotenv import load_dotenv
-# Load environment variables
-load_dotenv()
-# Title and emojis
 st.title("🚀 WizNerd Insp 🚀")
-# Sidebar for file uploads
-st.sidebar.header("Upload Files")
-uploaded_file = st.sidebar.file_uploader("Upload XLSX or PDF File", type=["xlsx", "pdf"])
-# Load the HuggingFace model and tokenizer
-@st.cache_resource
-def load_model():
-    model_name = "amiguel/optimizedModelLinsting6.1"
-    hf_token = os.getenv("HUGGINGFACE_TOKEN")  # Load token from .env
-    if hf_token:
-        tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=hf_token)
-        model = AutoModelForCausalLM.from_pretrained(model_name, use_auth_token=hf_token)
-    else:
-        tokenizer = AutoTokenizer.from_pretrained(model_name)
-        model = AutoModelForCausalLM.from_pretrained(model_name)
-    return tokenizer, model
-try:
-    tokenizer, model = load_model()
-except Exception as e:
-    st.error(f"Error loading model: {e}")
-    st.info("Ensure the model name is correct or provide a valid Hugging Face token.")
-# Prompt style
-prompt_style = """
-Below is an instruction that describes a task, paired with an input that provides further context.
-Write a response that appropriately completes the request.
-Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.
-### Instruction:
-You are an experienced inspection methods engineer, a topside expert with advanced knowledge in scope definition, functional location determination, and inspection plan building.
-Please answer the following inspection scope question.
-### Instruction:
 {}
-### Output:
-<think>
-{}
-</think>
-{}"""
-# Function to process user input and generate response
-def generate_response(input_text):
-    formatted_input = prompt_style.format(input_text, "", "")
-    inputs = tokenizer(formatted_input, return_tensors="pt", truncation=True, max_length=512)
-    outputs = model.generate(**inputs, max_new_tokens=200, do_sample=True)
-    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    return response
-# Main chat interface
-#st.header("Chat with WizNerd Insp")
-user_input = st.text_input("Ask a question:")
-if st.button("Submit"):
-    if user_input.strip() != "":
-        response = generate_response(user_input)
-        st.write("Response:")
-        st.write(response)
 # Process uploaded files
-if uploaded_file:
-    st.write(f"Processing {uploaded_file.type} file...")
-    if uploaded_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
-        df = pd.read_excel(uploaded_file)
-        st.write(df)
-    elif uploaded_file.type == "application/pdf":
-        pdf_reader = PdfReader(uploaded_file)
-        text = ""
-        for page in pdf_reader.pages:
-            text += page.extract_text()
-        st.write(text)

 import streamlit as st
 from transformers import AutoTokenizer, AutoModelForCausalLM
+import PyPDF2
 import pandas as pd
+import torch
+# Set page configuration
+st.set_page_config(
+    page_title="WizNerd Insp",
+    page_icon="🚀",
+    layout="wide"
+)
+# Title with rocket emojis
 st.title("🚀 WizNerd Insp 🚀")
+# Define prompt template
+PROMPT_TEMPLATE = """Below is an instruction that describes a task, paired with an input that provides further context.
+You are an experienced inspection methods engineer with expertise in:
+- Offshore topside structural inspection planning
+- FLOC classification and RBI methodologies
+- Degradation mechanism analysis for process systems
+- ASME/API compliance and integrity engineering
+Write a response that appropriately completes the request following these steps:
+1. Analyze the context and question requirements
+2. Identify relevant codes and standards
+3. Consider equipment criticality factors
+4. Evaluate potential degradation mechanisms
+5. Formulate technical recommendation
+### instruction:
 {}
+### output:
+<think>
+{{REASONING}}
+</think>
+{{ANSWER}}"""
+# Sidebar file uploader
+with st.sidebar:
+    st.header("Upload Documents")
+    uploaded_file = st.file_uploader(
+        "Choose a PDF or XLSX file",
+        type=["pdf", "xlsx"],
+        label_visibility="collapsed"
+    )
+# Initialize chat history
+if "messages" not in st.session_state:
+    st.session_state.messages = []
 # Process uploaded files
+@st.cache_data
+def process_file(uploaded_file):
+    file_content = ""
+    try:
+        if uploaded_file.type == "application/pdf":
+            pdf_reader = PyPDF2.PdfReader(uploaded_file)
+            for page in pdf_reader.pages:
+                file_content += page.extract_text()
+        elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
+            df = pd.read_excel(uploaded_file)
+            file_content = df.to_string()
+    except Exception as e:
+        st.error(f"Error processing file: {str(e)}")
+        return None
+    return file_content
+# Load model and tokenizer with caching
+@st.cache_resource
+def load_model():
+    model_name = "amiguel/optimizedModelListing6.1"
+    try:
+        tokenizer = AutoTokenizer.from_pretrained(model_name)
+        model = AutoModelForCausalLM.from_pretrained(
+            model_name,
+            device_map="auto",
+            torch_dtype=torch.float16,
+            trust_remote_code=True
+        )
+        return model, tokenizer
+    except Exception as e:
+        st.error(f"Failed to load model: {str(e)}")
+        return None, None
+model, tokenizer = load_model()
+# Display chat messages
+for message in st.session_state.messages:
+    with st.chat_message(message["role"]):
+        if message["role"] == "assistant":
+            st.markdown(message["content"]["answer"])
+            with st.expander("View Reasoning Process"):
+                st.markdown(message["content"]["reasoning"])
+        else:
+            st.markdown(message["content"])
+# Chat input
+if prompt := st.chat_input("Ask your inspection question..."):
+    # Add user message to chat history
+    st.session_state.messages.append({"role": "user", "content": prompt})
+    # Process file if uploaded
+    file_context = ""
+    if uploaded_file is not None:
+        file_context = process_file(uploaded_file)
+    # Generate response
+    if model and tokenizer:
+        with st.chat_message("assistant"):
+            with st.spinner("Analyzing..."):
+                try:
+                    # Prepare input
+                    context_prompt = f"Context: {file_context}\n\nQuestion: {prompt}" if file_context else prompt
+                    formatted_prompt = PROMPT_TEMPLATE.format(context_prompt)
+                    # Tokenize input
+                    inputs = tokenizer(
+                        formatted_prompt,
+                        return_tensors="pt",
+                        max_length=4096,
+                        truncation=True
+                    ).to(model.device)
+                    # Generate response
+                    outputs = model.generate(
+                        **inputs,
+                        max_new_tokens=1024,
+                        temperature=0.7,
+                        top_p=0.9,
+                        repetition_penalty=1.1,
+                        do_sample=True
+                    )
+                    # Decode response
+                    full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+                    # Parse response components
+                    try:
+                        reasoning = full_response.split("<think>")[1].split("</think>")[0].strip()
+                        answer = full_response.split("</think>")[1].strip()
+                    except:
+                        reasoning = "Reasoning steps not properly formatted"
+                        answer = full_response
+                    # Display response
+                    with st.expander("Reasoning Process (Click to view)", expanded=False):
+                        st.markdown(f"🔍 **Analysis Steps:**\n{reasoning}")
+                    st.markdown(f"📝 **Expert Recommendation:**\n{answer}")
+                    # Add to chat history
+                    st.session_state.messages.append({
+                        "role": "assistant",
+                        "content": {
+                            "answer": answer,
+                            "reasoning": reasoning
+                        }
+                    })
+                except Exception as e:
+                    st.error(f"Generation error: {str(e)}")
+    else:
+        st.error("Model not loaded properly")