amiguel commited on
Commit
004fb60
Β·
verified Β·
1 Parent(s): de40422

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -70
app.py CHANGED
@@ -5,8 +5,6 @@ import PyPDF2
5
  import pandas as pd
6
  import torch
7
  import os
8
- import time
9
- import re
10
 
11
  # Set page configuration
12
  st.set_page_config(
@@ -39,10 +37,9 @@ BOT_AVATAR = "https://raw.githubusercontent.com/achilela/vila_fofoka_analysis/99
39
  # Sidebar configuration
40
  with st.sidebar:
41
  st.header("Upload Documents πŸ“‚")
42
- uploaded_files = st.file_uploader(
43
- "Choose PDF, XLSX, or CSV files",
44
- type=["pdf", "xlsx", "csv"],
45
- accept_multiple_files=True, # Allow multiple file uploads
46
  label_visibility="collapsed"
47
  )
48
 
@@ -52,38 +49,20 @@ if "messages" not in st.session_state:
52
 
53
  # File processing function
54
  @st.cache_data
55
- def process_files(uploaded_files):
56
- if not uploaded_files:
57
- return []
58
 
59
- scopes = []
60
  try:
61
- for uploaded_file in uploaded_files:
62
- if uploaded_file.type == "application/pdf":
63
- pdf_reader = PyPDF2.PdfReader(uploaded_file)
64
- text = "\n".join([page.extract_text() for page in pdf_reader.pages])
65
- # Split text into potential scope lines (e.g., by newlines or sentences)
66
- lines = [line.strip() for line in text.split("\n") if line.strip()]
67
- # Filter lines that look like scope instructions (e.g., contain keywords like "at location", "DAL/")
68
- scope_lines = [line for line in lines if re.search(r"(at location|DAL/|PSV-|CD-|DA-)", line, re.IGNORECASE)]
69
- scopes.extend(scope_lines)
70
-
71
- elif uploaded_file.type in ["application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "text/csv"]:
72
- if uploaded_file.type == "text/csv":
73
- df = pd.read_csv(uploaded_file)
74
- else:
75
- df = pd.read_excel(uploaded_file)
76
- # Assume the first column contains scope instructions
77
- if not df.empty:
78
- scope_column = df.columns[0] # First column
79
- scope_lines = df[scope_column].dropna().astype(str).tolist()
80
- scopes.extend([line.strip() for line in scope_lines if line.strip()])
81
-
82
  except Exception as e:
83
  st.error(f"πŸ“„ Error processing file: {str(e)}")
84
- return []
85
-
86
- return scopes
87
 
88
  # Model loading function
89
  @st.cache_resource
@@ -99,7 +78,7 @@ def load_model(hf_token):
99
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=hf_token)
100
  model = AutoModelForSequenceClassification.from_pretrained(
101
  MODEL_NAME,
102
- num_labels=len(LABEL_TO_CLASS),
103
  token=hf_token
104
  )
105
 
@@ -113,7 +92,7 @@ def load_model(hf_token):
113
  st.error(f"πŸ€– Model loading failed: {str(e)}")
114
  return None
115
 
116
- # Classification function with streaming simulation
117
  def classify_instruction(prompt, file_context, model, tokenizer):
118
  full_prompt = f"Context:\n{file_context}\n\nInstruction: {prompt}"
119
 
@@ -130,17 +109,6 @@ def classify_instruction(prompt, file_context, model, tokenizer):
130
 
131
  return class_name
132
 
133
- def stream_classification_output(class_name, delay=0.05):
134
- """Simulate streaming by displaying the class name character by character."""
135
- response_container = st.empty()
136
- full_response = ""
137
- for char in class_name:
138
- full_response += char
139
- response_container.markdown(f"Predicted class: {full_response} β–Œ")
140
- time.sleep(delay)
141
- response_container.markdown(f"Predicted class: {full_response}")
142
- return full_response
143
-
144
  # Display chat messages
145
  for message in st.session_state.messages:
146
  try:
@@ -170,35 +138,16 @@ if prompt := st.chat_input("Ask your inspection question..."):
170
  st.markdown(prompt)
171
  st.session_state.messages.append({"role": "user", "content": prompt})
172
 
173
- # Process file context (if any)
174
- file_scopes = process_files(uploaded_files)
175
- file_context = "\n".join(file_scopes) if file_scopes else ""
176
 
177
- # Classify the user prompt
178
  if model and tokenizer:
179
  try:
180
  with st.chat_message("assistant", avatar=BOT_AVATAR):
181
- # Classify the user-entered prompt
182
  predicted_class = classify_instruction(prompt, file_context, model, tokenizer)
183
- # Stream the classification output
184
- streamed_response = stream_classification_output(predicted_class)
185
  response = f"Predicted class: {predicted_class}"
186
-
187
- # If there are scopes from files, classify them too
188
- if file_scopes:
189
- st.markdown("### Classifications from Uploaded Files")
190
- results = []
191
- for scope in file_scopes:
192
- predicted_class = classify_instruction(scope, file_context, model, tokenizer)
193
- results.append({"Scope": scope, "Predicted Class": predicted_class})
194
-
195
- # Display results in a table
196
- df_results = pd.DataFrame(results)
197
- st.table(df_results)
198
-
199
- # Add table to chat history
200
- response += "\n\n### Classifications from Uploaded Files\n" + df_results.to_markdown(index=False)
201
-
202
  st.session_state.messages.append({"role": "assistant", "content": response})
203
 
204
  except Exception as e:
 
5
  import pandas as pd
6
  import torch
7
  import os
 
 
8
 
9
  # Set page configuration
10
  st.set_page_config(
 
37
  # Sidebar configuration
38
  with st.sidebar:
39
  st.header("Upload Documents πŸ“‚")
40
+ uploaded_file = st.file_uploader(
41
+ "Choose a PDF or XLSX file",
42
+ type=["pdf", "xlsx"],
 
43
  label_visibility="collapsed"
44
  )
45
 
 
49
 
50
  # File processing function
51
  @st.cache_data
52
+ def process_file(uploaded_file):
53
+ if uploaded_file is None:
54
+ return ""
55
 
 
56
  try:
57
+ if uploaded_file.type == "application/pdf":
58
+ pdf_reader = PyPDF2.PdfReader(uploaded_file)
59
+ return "\n".join([page.extract_text() for page in pdf_reader.pages])
60
+ elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
61
+ df = pd.read_excel(uploaded_file)
62
+ return df.to_markdown()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  except Exception as e:
64
  st.error(f"πŸ“„ Error processing file: {str(e)}")
65
+ return ""
 
 
66
 
67
  # Model loading function
68
  @st.cache_resource
 
78
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=hf_token)
79
  model = AutoModelForSequenceClassification.from_pretrained(
80
  MODEL_NAME,
81
+ num_labels=len(LABEL_TO_CLASS), # Ensure correct number of labels
82
  token=hf_token
83
  )
84
 
 
92
  st.error(f"πŸ€– Model loading failed: {str(e)}")
93
  return None
94
 
95
+ # Classification function
96
  def classify_instruction(prompt, file_context, model, tokenizer):
97
  full_prompt = f"Context:\n{file_context}\n\nInstruction: {prompt}"
98
 
 
109
 
110
  return class_name
111
 
 
 
 
 
 
 
 
 
 
 
 
112
  # Display chat messages
113
  for message in st.session_state.messages:
114
  try:
 
138
  st.markdown(prompt)
139
  st.session_state.messages.append({"role": "user", "content": prompt})
140
 
141
+ # Process file context
142
+ file_context = process_file(uploaded_file)
 
143
 
144
+ # Classify the instruction
145
  if model and tokenizer:
146
  try:
147
  with st.chat_message("assistant", avatar=BOT_AVATAR):
 
148
  predicted_class = classify_instruction(prompt, file_context, model, tokenizer)
 
 
149
  response = f"Predicted class: {predicted_class}"
150
+ st.markdown(response)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
  st.session_state.messages.append({"role": "assistant", "content": response})
152
 
153
  except Exception as e: