amiguel commited on
Commit
330fc4f
Β·
verified Β·
1 Parent(s): 94ddbf0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +157 -67
app.py CHANGED
@@ -1,81 +1,171 @@
1
  import streamlit as st
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
- from PyPDF2 import PdfReader
4
  import pandas as pd
5
- import os
6
- from dotenv import load_dotenv
7
 
8
- # Load environment variables
9
- load_dotenv()
 
 
 
 
10
 
11
- # Title and emojis
12
  st.title("πŸš€ WizNerd Insp πŸš€")
13
 
14
- # Sidebar for file uploads
15
- st.sidebar.header("Upload Files")
16
- uploaded_file = st.sidebar.file_uploader("Upload XLSX or PDF File", type=["xlsx", "pdf"])
 
 
 
 
17
 
18
- # Load the HuggingFace model and tokenizer
19
- @st.cache_resource
20
- def load_model():
21
- model_name = "amiguel/optimizedModelLinsting6.1"
22
- hf_token = os.getenv("HUGGINGFACE_TOKEN") # Load token from .env
23
- if hf_token:
24
- tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=hf_token)
25
- model = AutoModelForCausalLM.from_pretrained(model_name, use_auth_token=hf_token)
26
- else:
27
- tokenizer = AutoTokenizer.from_pretrained(model_name)
28
- model = AutoModelForCausalLM.from_pretrained(model_name)
29
- return tokenizer, model
30
 
31
- try:
32
- tokenizer, model = load_model()
33
- except Exception as e:
34
- st.error(f"Error loading model: {e}")
35
- st.info("Ensure the model name is correct or provide a valid Hugging Face token.")
36
-
37
- # Prompt style
38
- prompt_style = """
39
- Below is an instruction that describes a task, paired with an input that provides further context.
40
- Write a response that appropriately completes the request.
41
- Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.
42
- ### Instruction:
43
- You are an experienced inspection methods engineer, a topside expert with advanced knowledge in scope definition, functional location determination, and inspection plan building.
44
- Please answer the following inspection scope question.
45
- ### Instruction:
46
  {}
47
- ### Output:
48
- <think>
49
- {}
50
- </think>
51
- {}"""
52
 
53
- # Function to process user input and generate response
54
- def generate_response(input_text):
55
- formatted_input = prompt_style.format(input_text, "", "")
56
- inputs = tokenizer(formatted_input, return_tensors="pt", truncation=True, max_length=512)
57
- outputs = model.generate(**inputs, max_new_tokens=200, do_sample=True)
58
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
59
- return response
 
 
 
 
 
 
 
60
 
61
- # Main chat interface
62
- #st.header("Chat with WizNerd Insp")
63
- user_input = st.text_input("Ask a question:")
64
- if st.button("Submit"):
65
- if user_input.strip() != "":
66
- response = generate_response(user_input)
67
- st.write("Response:")
68
- st.write(response)
69
 
70
  # Process uploaded files
71
- if uploaded_file:
72
- st.write(f"Processing {uploaded_file.type} file...")
73
- if uploaded_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
74
- df = pd.read_excel(uploaded_file)
75
- st.write(df)
76
- elif uploaded_file.type == "application/pdf":
77
- pdf_reader = PdfReader(uploaded_file)
78
- text = ""
79
- for page in pdf_reader.pages:
80
- text += page.extract_text()
81
- st.write(text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
+ import PyPDF2
4
  import pandas as pd
5
+ import torch
 
6
 
7
+ # Set page configuration
8
+ st.set_page_config(
9
+ page_title="WizNerd Insp",
10
+ page_icon="πŸš€",
11
+ layout="wide"
12
+ )
13
 
14
+ # Title with rocket emojis
15
  st.title("πŸš€ WizNerd Insp πŸš€")
16
 
17
+ # Define prompt template
18
+ PROMPT_TEMPLATE = """Below is an instruction that describes a task, paired with an input that provides further context.
19
+ You are an experienced inspection methods engineer with expertise in:
20
+ - Offshore topside structural inspection planning
21
+ - FLOC classification and RBI methodologies
22
+ - Degradation mechanism analysis for process systems
23
+ - ASME/API compliance and integrity engineering
24
 
25
+ Write a response that appropriately completes the request following these steps:
26
+ 1. Analyze the context and question requirements
27
+ 2. Identify relevant codes and standards
28
+ 3. Consider equipment criticality factors
29
+ 4. Evaluate potential degradation mechanisms
30
+ 5. Formulate technical recommendation
 
 
 
 
 
 
31
 
32
+ ### instruction:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  {}
 
 
 
 
 
34
 
35
+ ### output:
36
+ <think>
37
+ {{REASONING}}
38
+ </think>
39
+ {{ANSWER}}"""
40
+
41
+ # Sidebar file uploader
42
+ with st.sidebar:
43
+ st.header("Upload Documents")
44
+ uploaded_file = st.file_uploader(
45
+ "Choose a PDF or XLSX file",
46
+ type=["pdf", "xlsx"],
47
+ label_visibility="collapsed"
48
+ )
49
 
50
+ # Initialize chat history
51
+ if "messages" not in st.session_state:
52
+ st.session_state.messages = []
 
 
 
 
 
53
 
54
  # Process uploaded files
55
+ @st.cache_data
56
+ def process_file(uploaded_file):
57
+ file_content = ""
58
+
59
+ try:
60
+ if uploaded_file.type == "application/pdf":
61
+ pdf_reader = PyPDF2.PdfReader(uploaded_file)
62
+ for page in pdf_reader.pages:
63
+ file_content += page.extract_text()
64
+
65
+ elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
66
+ df = pd.read_excel(uploaded_file)
67
+ file_content = df.to_string()
68
+
69
+ except Exception as e:
70
+ st.error(f"Error processing file: {str(e)}")
71
+ return None
72
+
73
+ return file_content
74
+
75
+ # Load model and tokenizer with caching
76
+ @st.cache_resource
77
+ def load_model():
78
+ model_name = "amiguel/optimizedModelListing6.1"
79
+
80
+ try:
81
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
82
+ model = AutoModelForCausalLM.from_pretrained(
83
+ model_name,
84
+ device_map="auto",
85
+ torch_dtype=torch.float16,
86
+ trust_remote_code=True
87
+ )
88
+ return model, tokenizer
89
+ except Exception as e:
90
+ st.error(f"Failed to load model: {str(e)}")
91
+ return None, None
92
+
93
+ model, tokenizer = load_model()
94
+
95
+ # Display chat messages
96
+ for message in st.session_state.messages:
97
+ with st.chat_message(message["role"]):
98
+ if message["role"] == "assistant":
99
+ st.markdown(message["content"]["answer"])
100
+ with st.expander("View Reasoning Process"):
101
+ st.markdown(message["content"]["reasoning"])
102
+ else:
103
+ st.markdown(message["content"])
104
+
105
+ # Chat input
106
+ if prompt := st.chat_input("Ask your inspection question..."):
107
+ # Add user message to chat history
108
+ st.session_state.messages.append({"role": "user", "content": prompt})
109
+
110
+ # Process file if uploaded
111
+ file_context = ""
112
+ if uploaded_file is not None:
113
+ file_context = process_file(uploaded_file)
114
+
115
+ # Generate response
116
+ if model and tokenizer:
117
+ with st.chat_message("assistant"):
118
+ with st.spinner("Analyzing..."):
119
+ try:
120
+ # Prepare input
121
+ context_prompt = f"Context: {file_context}\n\nQuestion: {prompt}" if file_context else prompt
122
+ formatted_prompt = PROMPT_TEMPLATE.format(context_prompt)
123
+
124
+ # Tokenize input
125
+ inputs = tokenizer(
126
+ formatted_prompt,
127
+ return_tensors="pt",
128
+ max_length=4096,
129
+ truncation=True
130
+ ).to(model.device)
131
+
132
+ # Generate response
133
+ outputs = model.generate(
134
+ **inputs,
135
+ max_new_tokens=1024,
136
+ temperature=0.7,
137
+ top_p=0.9,
138
+ repetition_penalty=1.1,
139
+ do_sample=True
140
+ )
141
+
142
+ # Decode response
143
+ full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
144
+
145
+ # Parse response components
146
+ try:
147
+ reasoning = full_response.split("<think>")[1].split("</think>")[0].strip()
148
+ answer = full_response.split("</think>")[1].strip()
149
+ except:
150
+ reasoning = "Reasoning steps not properly formatted"
151
+ answer = full_response
152
+
153
+ # Display response
154
+ with st.expander("Reasoning Process (Click to view)", expanded=False):
155
+ st.markdown(f"πŸ” **Analysis Steps:**\n{reasoning}")
156
+
157
+ st.markdown(f"πŸ“ **Expert Recommendation:**\n{answer}")
158
+
159
+ # Add to chat history
160
+ st.session_state.messages.append({
161
+ "role": "assistant",
162
+ "content": {
163
+ "answer": answer,
164
+ "reasoning": reasoning
165
+ }
166
+ })
167
+
168
+ except Exception as e:
169
+ st.error(f"Generation error: {str(e)}")
170
+ else:
171
+ st.error("Model not loaded properly")