Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -85,16 +85,17 @@ def process_file(uploaded_file, _cache_key):
|
|
85 |
|
86 |
elif uploaded_file.type in ["application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "text/csv"]:
|
87 |
df = pd.read_excel(uploaded_file) if uploaded_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" else pd.read_csv(uploaded_file)
|
88 |
-
required_cols = ["Scope", "Functional Location"
|
89 |
-
|
|
|
90 |
|
91 |
-
if not available_cols:
|
92 |
st.warning("No 'Scope' or 'Functional Location' columns found. Treating as plain text.")
|
93 |
return {"type": "text", "content": df.to_string()}
|
94 |
|
95 |
-
# Pre-process and concatenate Scope and Functional Location
|
96 |
-
df = df.dropna(subset=available_cols)
|
97 |
-
df["input_text"] = df[available_cols].apply(
|
98 |
lambda row: " ".join([re.sub(r'\s+', ' ', str(val).lower().strip()) for val in row]), axis=1
|
99 |
)
|
100 |
return {"type": "table", "content": df[["input_text"] + available_cols]}
|
@@ -103,7 +104,7 @@ def process_file(uploaded_file, _cache_key):
|
|
103 |
st.error(f"π Error processing file: {str(e)}")
|
104 |
return None
|
105 |
|
106 |
-
# Model loading function
|
107 |
@st.cache_resource
|
108 |
def load_model(hf_token):
|
109 |
if not TRANSFORMERS_AVAILABLE:
|
@@ -169,13 +170,12 @@ tokenizer = st.session_state.get("tokenizer")
|
|
169 |
# Check for new file upload and clear cache
|
170 |
if uploaded_file and uploaded_file != st.session_state.last_uploaded_file:
|
171 |
st.cache_data.clear() # Clear all cached data
|
172 |
-
st.session_state.file_processed = False
|
173 |
-
st.session_state.file_data = None
|
174 |
-
st.session_state.last_uploaded_file = uploaded_file
|
175 |
|
176 |
# Process uploaded file once
|
177 |
if uploaded_file and not st.session_state.file_processed:
|
178 |
-
# Use file name and size as a cache key to ensure uniqueness
|
179 |
cache_key = f"{uploaded_file.name}_{uploaded_file.size}"
|
180 |
file_data = process_file(uploaded_file, cache_key)
|
181 |
if file_data:
|
@@ -211,7 +211,9 @@ if prompt := st.chat_input("Ask your inspection question..."):
|
|
211 |
file_data = st.session_state.file_data
|
212 |
if file_data["type"] == "table":
|
213 |
predictions = classify_instruction(prompt, file_data["content"], model, tokenizer)
|
214 |
-
|
|
|
|
|
215 |
result_df["Predicted Class"] = predictions
|
216 |
st.write("Predicted Item Classes:")
|
217 |
st.table(result_df)
|
|
|
85 |
|
86 |
elif uploaded_file.type in ["application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "text/csv"]:
|
87 |
df = pd.read_excel(uploaded_file) if uploaded_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" else pd.read_csv(uploaded_file)
|
88 |
+
required_cols = ["Scope", "Functional Location"]
|
89 |
+
optional_cols = ["Unit name"] # New column added
|
90 |
+
available_cols = [col for col in required_cols + optional_cols if col in df.columns]
|
91 |
|
92 |
+
if not any(col in required_cols for col in available_cols):
|
93 |
st.warning("No 'Scope' or 'Functional Location' columns found. Treating as plain text.")
|
94 |
return {"type": "text", "content": df.to_string()}
|
95 |
|
96 |
+
# Pre-process and concatenate Scope and Functional Location (and Unit name if present)
|
97 |
+
df = df.dropna(subset=[col for col in available_cols if col in required_cols])
|
98 |
+
df["input_text"] = df[[col for col in available_cols if col in required_cols]].apply(
|
99 |
lambda row: " ".join([re.sub(r'\s+', ' ', str(val).lower().strip()) for val in row]), axis=1
|
100 |
)
|
101 |
return {"type": "table", "content": df[["input_text"] + available_cols]}
|
|
|
104 |
st.error(f"π Error processing file: {str(e)}")
|
105 |
return None
|
106 |
|
107 |
+
# Model loading function
|
108 |
@st.cache_resource
|
109 |
def load_model(hf_token):
|
110 |
if not TRANSFORMERS_AVAILABLE:
|
|
|
170 |
# Check for new file upload and clear cache
|
171 |
if uploaded_file and uploaded_file != st.session_state.last_uploaded_file:
|
172 |
st.cache_data.clear() # Clear all cached data
|
173 |
+
st.session_state.file_processed = False
|
174 |
+
st.session_state.file_data = None
|
175 |
+
st.session_state.last_uploaded_file = uploaded_file
|
176 |
|
177 |
# Process uploaded file once
|
178 |
if uploaded_file and not st.session_state.file_processed:
|
|
|
179 |
cache_key = f"{uploaded_file.name}_{uploaded_file.size}"
|
180 |
file_data = process_file(uploaded_file, cache_key)
|
181 |
if file_data:
|
|
|
211 |
file_data = st.session_state.file_data
|
212 |
if file_data["type"] == "table":
|
213 |
predictions = classify_instruction(prompt, file_data["content"], model, tokenizer)
|
214 |
+
# Include "Unit name" if present, otherwise exclude it
|
215 |
+
available_cols = [col for col in ["Scope", "Functional Location", "Unit name"] if col in file_data["content"].columns]
|
216 |
+
result_df = file_data["content"][available_cols].copy()
|
217 |
result_df["Predicted Class"] = predictions
|
218 |
st.write("Predicted Item Classes:")
|
219 |
st.table(result_df)
|