Spaces:

jskinner215
/

TAPAS_WTQ_Chunking

Build error

jskinner215 commited on Aug 31, 2023

Commit

b1fc865

1 Parent(s): 414bc96

Had an error about incorrect type (str) so adding error handling to debug

def ask_llm_chunk(chunk, questions):
chunk = chunk.astype(str)
try:
inputs = tokenizer(table=chunk, queries=questions, padding="max_length", truncation=True, return_tensors="pt")
st.write(f"Token shape: {inputs['input_ids'].shape[1]}") # Debugging line

# Check for token limit
if inputs["input_ids"].shape[1] > 512:
st.warning("Token limit exceeded for chunk")
return ["Token limit exceeded for chunk"] * len(questions)

outputs = model(**inputs)
predicted_answer_coordinates, predicted_aggregation_indices = tokenizer.convert_logits_to_predictions(
inputs,
outputs.logits.detach(),
outputs.logits_aggregation.detach()
)

answers = []
for coordinates in predicted_answer_coordinates:
st.write(f"Type of coordinates[0]: {type(coordinates[0])}") # Debugging line
st.write(f"Value of coordinates[0]: {coordinates[0]}") # Debugging line
if len(coordinates) == 1:
answers.append(chunk.iloc[coordinates[0]].values)
else:
cell_values = []
for coordinate in coordinates:
cell_values.append(chunk.iloc[coordinate].values)
answers.append(", ".join(cell_values))
return answers
except Exception as e:
st.write(f"An error occurred: {e}")
return ["Error occurred while tokenizing"] * len(questions)

Files changed (1) hide show

app.py +26 -21

app.py CHANGED Viewed

@@ -12,31 +12,36 @@ def ask_llm_chunk(chunk, questions):
     chunk = chunk.astype(str)
     try:
         inputs = tokenizer(table=chunk, queries=questions, padding="max_length", truncation=True, return_tensors="pt")
     except Exception as e:
         st.write(f"An error occurred: {e}")
         return ["Error occurred while tokenizing"] * len(questions)
-    # Check for token limit
-    if inputs["input_ids"].shape[1] > 512:
-        st.warning("Token limit exceeded for chunk")
-        return ["Token limit exceeded for chunk"] * len(questions)
-    outputs = model(**inputs)
-    predicted_answer_coordinates, predicted_aggregation_indices = tokenizer.convert_logits_to_predictions(
-        inputs,
-        outputs.logits.detach(),
-        outputs.logits_aggregation.detach()
-    )
-    answers = []
-    for coordinates in predicted_answer_coordinates:
-        if len(coordinates) == 1:
-            answers.append(chunk.iloc[coordinates[0]].values)
-        else:
-            cell_values = []
-            for coordinate in coordinates:
-                cell_values.append(chunk.iloc[coordinate].values)
-            answers.append(", ".join(cell_values))
-    return answers
 MAX_ROWS_PER_CHUNK = 200

     chunk = chunk.astype(str)
     try:
         inputs = tokenizer(table=chunk, queries=questions, padding="max_length", truncation=True, return_tensors="pt")
+        st.write(f"Token shape: {inputs['input_ids'].shape[1]}")  # Debugging line
+        # Check for token limit
+        if inputs["input_ids"].shape[1] > 512:
+            st.warning("Token limit exceeded for chunk")
+            return ["Token limit exceeded for chunk"] * len(questions)
+        outputs = model(**inputs)
+        predicted_answer_coordinates, predicted_aggregation_indices = tokenizer.convert_logits_to_predictions(
+            inputs,
+            outputs.logits.detach(),
+            outputs.logits_aggregation.detach()
+        )
+        answers = []
+        for coordinates in predicted_answer_coordinates:
+            st.write(f"Type of coordinates[0]: {type(coordinates[0])}")  # Debugging line
+            st.write(f"Value of coordinates[0]: {coordinates[0]}")  # Debugging line
+            if len(coordinates) == 1:
+                answers.append(chunk.iloc[coordinates[0]].values)
+            else:
+                cell_values = []
+                for coordinate in coordinates:
+                    cell_values.append(chunk.iloc[coordinate].values)
+                answers.append(", ".join(cell_values))
+        return answers
     except Exception as e:
         st.write(f"An error occurred: {e}")
         return ["Error occurred while tokenizing"] * len(questions)
 MAX_ROWS_PER_CHUNK = 200