jskinner215 commited on
Commit
b1fc865
·
1 Parent(s): 414bc96

Had an error about incorrect type (str) so adding error handling to debug

Browse files

def ask_llm_chunk(chunk, questions):
chunk = chunk.astype(str)
try:
inputs = tokenizer(table=chunk, queries=questions, padding="max_length", truncation=True, return_tensors="pt")
st.write(f"Token shape: {inputs['input_ids'].shape[1]}") # Debugging line

# Check for token limit
if inputs["input_ids"].shape[1] > 512:
st.warning("Token limit exceeded for chunk")
return ["Token limit exceeded for chunk"] * len(questions)

outputs = model(**inputs)
predicted_answer_coordinates, predicted_aggregation_indices = tokenizer.convert_logits_to_predictions(
inputs,
outputs.logits.detach(),
outputs.logits_aggregation.detach()
)

answers = []
for coordinates in predicted_answer_coordinates:
st.write(f"Type of coordinates[0]: {type(coordinates[0])}") # Debugging line
st.write(f"Value of coordinates[0]: {coordinates[0]}") # Debugging line
if len(coordinates) == 1:
answers.append(chunk.iloc[coordinates[0]].values)
else:
cell_values = []
for coordinate in coordinates:
cell_values.append(chunk.iloc[coordinate].values)
answers.append(", ".join(cell_values))
return answers
except Exception as e:
st.write(f"An error occurred: {e}")
return ["Error occurred while tokenizing"] * len(questions)

Files changed (1) hide show
  1. app.py +26 -21
app.py CHANGED
@@ -12,31 +12,36 @@ def ask_llm_chunk(chunk, questions):
12
  chunk = chunk.astype(str)
13
  try:
14
  inputs = tokenizer(table=chunk, queries=questions, padding="max_length", truncation=True, return_tensors="pt")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  except Exception as e:
16
  st.write(f"An error occurred: {e}")
17
  return ["Error occurred while tokenizing"] * len(questions)
18
 
19
- # Check for token limit
20
- if inputs["input_ids"].shape[1] > 512:
21
- st.warning("Token limit exceeded for chunk")
22
- return ["Token limit exceeded for chunk"] * len(questions)
23
-
24
- outputs = model(**inputs)
25
- predicted_answer_coordinates, predicted_aggregation_indices = tokenizer.convert_logits_to_predictions(
26
- inputs,
27
- outputs.logits.detach(),
28
- outputs.logits_aggregation.detach()
29
- )
30
- answers = []
31
- for coordinates in predicted_answer_coordinates:
32
- if len(coordinates) == 1:
33
- answers.append(chunk.iloc[coordinates[0]].values)
34
- else:
35
- cell_values = []
36
- for coordinate in coordinates:
37
- cell_values.append(chunk.iloc[coordinate].values)
38
- answers.append(", ".join(cell_values))
39
- return answers
40
 
41
 
42
  MAX_ROWS_PER_CHUNK = 200
 
12
  chunk = chunk.astype(str)
13
  try:
14
  inputs = tokenizer(table=chunk, queries=questions, padding="max_length", truncation=True, return_tensors="pt")
15
+ st.write(f"Token shape: {inputs['input_ids'].shape[1]}") # Debugging line
16
+
17
+ # Check for token limit
18
+ if inputs["input_ids"].shape[1] > 512:
19
+ st.warning("Token limit exceeded for chunk")
20
+ return ["Token limit exceeded for chunk"] * len(questions)
21
+
22
+ outputs = model(**inputs)
23
+ predicted_answer_coordinates, predicted_aggregation_indices = tokenizer.convert_logits_to_predictions(
24
+ inputs,
25
+ outputs.logits.detach(),
26
+ outputs.logits_aggregation.detach()
27
+ )
28
+
29
+ answers = []
30
+ for coordinates in predicted_answer_coordinates:
31
+ st.write(f"Type of coordinates[0]: {type(coordinates[0])}") # Debugging line
32
+ st.write(f"Value of coordinates[0]: {coordinates[0]}") # Debugging line
33
+ if len(coordinates) == 1:
34
+ answers.append(chunk.iloc[coordinates[0]].values)
35
+ else:
36
+ cell_values = []
37
+ for coordinate in coordinates:
38
+ cell_values.append(chunk.iloc[coordinate].values)
39
+ answers.append(", ".join(cell_values))
40
+ return answers
41
  except Exception as e:
42
  st.write(f"An error occurred: {e}")
43
  return ["Error occurred while tokenizing"] * len(questions)
44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
 
47
  MAX_ROWS_PER_CHUNK = 200