amiguel commited on
Commit
135171d
Β·
verified Β·
1 Parent(s): 5902230

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -12
app.py CHANGED
@@ -85,16 +85,17 @@ def process_file(uploaded_file, _cache_key):
85
 
86
  elif uploaded_file.type in ["application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "text/csv"]:
87
  df = pd.read_excel(uploaded_file) if uploaded_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" else pd.read_csv(uploaded_file)
88
- required_cols = ["Scope", "Functional Location", "Unit Name"]
89
- available_cols = [col for col in required_cols if col in df.columns]
 
90
 
91
- if not available_cols:
92
  st.warning("No 'Scope' or 'Functional Location' columns found. Treating as plain text.")
93
  return {"type": "text", "content": df.to_string()}
94
 
95
- # Pre-process and concatenate Scope and Functional Location
96
- df = df.dropna(subset=available_cols)
97
- df["input_text"] = df[available_cols].apply(
98
  lambda row: " ".join([re.sub(r'\s+', ' ', str(val).lower().strip()) for val in row]), axis=1
99
  )
100
  return {"type": "table", "content": df[["input_text"] + available_cols]}
@@ -103,7 +104,7 @@ def process_file(uploaded_file, _cache_key):
103
  st.error(f"πŸ“„ Error processing file: {str(e)}")
104
  return None
105
 
106
- # Model loading function (cached, but only loaded once)
107
  @st.cache_resource
108
  def load_model(hf_token):
109
  if not TRANSFORMERS_AVAILABLE:
@@ -169,13 +170,12 @@ tokenizer = st.session_state.get("tokenizer")
169
  # Check for new file upload and clear cache
170
  if uploaded_file and uploaded_file != st.session_state.last_uploaded_file:
171
  st.cache_data.clear() # Clear all cached data
172
- st.session_state.file_processed = False # Reset processing state
173
- st.session_state.file_data = None # Clear previous file data
174
- st.session_state.last_uploaded_file = uploaded_file # Update last uploaded file
175
 
176
  # Process uploaded file once
177
  if uploaded_file and not st.session_state.file_processed:
178
- # Use file name and size as a cache key to ensure uniqueness
179
  cache_key = f"{uploaded_file.name}_{uploaded_file.size}"
180
  file_data = process_file(uploaded_file, cache_key)
181
  if file_data:
@@ -211,7 +211,9 @@ if prompt := st.chat_input("Ask your inspection question..."):
211
  file_data = st.session_state.file_data
212
  if file_data["type"] == "table":
213
  predictions = classify_instruction(prompt, file_data["content"], model, tokenizer)
214
- result_df = file_data["content"][["Scope", "Functional Location"]].copy()
 
 
215
  result_df["Predicted Class"] = predictions
216
  st.write("Predicted Item Classes:")
217
  st.table(result_df)
 
85
 
86
  elif uploaded_file.type in ["application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "text/csv"]:
87
  df = pd.read_excel(uploaded_file) if uploaded_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" else pd.read_csv(uploaded_file)
88
+ required_cols = ["Scope", "Functional Location"]
89
+ optional_cols = ["Unit name"] # New column added
90
+ available_cols = [col for col in required_cols + optional_cols if col in df.columns]
91
 
92
+ if not any(col in required_cols for col in available_cols):
93
  st.warning("No 'Scope' or 'Functional Location' columns found. Treating as plain text.")
94
  return {"type": "text", "content": df.to_string()}
95
 
96
+ # Pre-process and concatenate Scope and Functional Location (and Unit name if present)
97
+ df = df.dropna(subset=[col for col in available_cols if col in required_cols])
98
+ df["input_text"] = df[[col for col in available_cols if col in required_cols]].apply(
99
  lambda row: " ".join([re.sub(r'\s+', ' ', str(val).lower().strip()) for val in row]), axis=1
100
  )
101
  return {"type": "table", "content": df[["input_text"] + available_cols]}
 
104
  st.error(f"πŸ“„ Error processing file: {str(e)}")
105
  return None
106
 
107
+ # Model loading function
108
  @st.cache_resource
109
  def load_model(hf_token):
110
  if not TRANSFORMERS_AVAILABLE:
 
170
  # Check for new file upload and clear cache
171
  if uploaded_file and uploaded_file != st.session_state.last_uploaded_file:
172
  st.cache_data.clear() # Clear all cached data
173
+ st.session_state.file_processed = False
174
+ st.session_state.file_data = None
175
+ st.session_state.last_uploaded_file = uploaded_file
176
 
177
  # Process uploaded file once
178
  if uploaded_file and not st.session_state.file_processed:
 
179
  cache_key = f"{uploaded_file.name}_{uploaded_file.size}"
180
  file_data = process_file(uploaded_file, cache_key)
181
  if file_data:
 
211
  file_data = st.session_state.file_data
212
  if file_data["type"] == "table":
213
  predictions = classify_instruction(prompt, file_data["content"], model, tokenizer)
214
+ # Include "Unit name" if present, otherwise exclude it
215
+ available_cols = [col for col in ["Scope", "Functional Location", "Unit name"] if col in file_data["content"].columns]
216
+ result_df = file_data["content"][available_cols].copy()
217
  result_df["Predicted Class"] = predictions
218
  st.write("Predicted Item Classes:")
219
  st.table(result_df)