Upload folder using huggingface_hub
Browse files- .env +1 -0
- README.md +113 -113
- requirements.txt +6 -6
- src/README.md +2 -1
- src/backend/gradio_spreadsheetcomponent/spreadsheetcomponent.py +113 -14
- src/demo/.env +1 -0
- src/demo/requirements.txt +1 -1
- src/pyproject.toml +1 -1
.env
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
HF_TOKEN="************"
|
README.md
CHANGED
@@ -1,113 +1,113 @@
|
|
1 |
-
---
|
2 |
-
tags: [gradio-custom-component, custom-component-track, gradio-spreadsheet-custom-component]
|
3 |
-
title: gradio_spreadsheetcomponent
|
4 |
-
short_description: This component answers questions about spreadsheets.
|
5 |
-
colorFrom: blue
|
6 |
-
colorTo: yellow
|
7 |
-
sdk: gradio
|
8 |
-
pinned: false
|
9 |
-
app_file: space.py
|
10 |
-
app_link: https://huggingface.co/spaces/Mustafiz996/gradio_spreadsheetcomponent
|
11 |
-
---
|
12 |
-
|
13 |
-
# `gradio_spreadsheetcomponent`
|
14 |
-
<a href="https://pypi.org/project/gradio_spreadsheetcomponent/" target="_blank"><img alt="PyPI - Version" src="https://img.shields.io/pypi/v/gradio_spreadsheetcomponent"></a>
|
15 |
-
|
16 |
-
This component is used to answer questions about spreadsheets.
|
17 |
-
|
18 |
-
## Installation
|
19 |
-
|
20 |
-
```bash
|
21 |
-
pip install gradio_spreadsheetcomponent
|
22 |
-
```
|
23 |
-
|
24 |
-
## Usage
|
25 |
-
|
26 |
-
```python
|
27 |
-
import gradio as gr
|
28 |
-
from gradio_spreadsheetcomponent import SpreadsheetComponent
|
29 |
-
from dotenv import load_dotenv
|
30 |
-
import os
|
31 |
-
import pandas as pd
|
32 |
-
|
33 |
-
def answer_question(file, question):
|
34 |
-
if not file or not question:
|
35 |
-
return "Please upload a file and enter a question."
|
36 |
-
|
37 |
-
# Load the spreadsheet data
|
38 |
-
df = pd.read_excel(file.name)
|
39 |
-
|
40 |
-
# Create a SpreadsheetComponent instance
|
41 |
-
spreadsheet = SpreadsheetComponent(value=df)
|
42 |
-
|
43 |
-
# Use the component to answer the question
|
44 |
-
return spreadsheet.answer_question(question)
|
45 |
-
|
46 |
-
with gr.Blocks() as demo:
|
47 |
-
gr.Markdown("# Spreadsheet Question Answering")
|
48 |
-
|
49 |
-
with gr.Row():
|
50 |
-
file_input = gr.File(label="Upload Spreadsheet", file_types=[".xlsx"])
|
51 |
-
question_input = gr.Textbox(label="Ask a Question")
|
52 |
-
|
53 |
-
answer_output = gr.Textbox(label="Answer", interactive=False, lines=4)
|
54 |
-
|
55 |
-
submit_button = gr.Button("Submit")
|
56 |
-
submit_button.click(answer_question, inputs=[file_input, question_input], outputs=answer_output)
|
57 |
-
|
58 |
-
|
59 |
-
if __name__ == "__main__":
|
60 |
-
demo.launch()
|
61 |
-
|
62 |
-
```
|
63 |
-
|
64 |
-
## `SpreadsheetComponent`
|
65 |
-
|
66 |
-
### Initialization
|
67 |
-
|
68 |
-
<table>
|
69 |
-
<thead>
|
70 |
-
<tr>
|
71 |
-
<th align="left">name</th>
|
72 |
-
<th align="left" style="width: 25%;">type</th>
|
73 |
-
<th align="left">default</th>
|
74 |
-
<th align="left">description</th>
|
75 |
-
</tr>
|
76 |
-
</thead>
|
77 |
-
<tbody>
|
78 |
-
<tr>
|
79 |
-
<td align="left"><code>value</code></td>
|
80 |
-
<td align="left" style="width: 25%;">
|
81 |
-
|
82 |
-
```python
|
83 |
-
pandas.core.frame.DataFrame | list | dict | None
|
84 |
-
```
|
85 |
-
|
86 |
-
</td>
|
87 |
-
<td align="left"><code>None</code></td>
|
88 |
-
<td align="left">Default value to show in spreadsheet. Can be a pandas DataFrame, list of lists, or dictionary</td>
|
89 |
-
</tr>
|
90 |
-
</tbody></table>
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
### User function
|
96 |
-
|
97 |
-
The impact on the users predict function varies depending on whether the component is used as an input or output for an event (or both).
|
98 |
-
|
99 |
-
- When used as an Input, the component only impacts the input signature of the user function.
|
100 |
-
- When used as an output, the component only impacts the return signature of the user function.
|
101 |
-
|
102 |
-
The code snippet below is accurate in cases where the component is used as both an input and an output.
|
103 |
-
|
104 |
-
- **As output:** Is passed, the preprocessed input data sent to the user's function in the backend.
|
105 |
-
|
106 |
-
|
107 |
-
```python
|
108 |
-
def predict(
|
109 |
-
value: typing.Any
|
110 |
-
) -> Unknown:
|
111 |
-
return value
|
112 |
-
```
|
113 |
-
|
|
|
1 |
+
---
|
2 |
+
tags: [gradio-custom-component, custom-component-track, gradio-spreadsheet-custom-component]
|
3 |
+
title: gradio_spreadsheetcomponent
|
4 |
+
short_description: This component answers questions about spreadsheets.
|
5 |
+
colorFrom: blue
|
6 |
+
colorTo: yellow
|
7 |
+
sdk: gradio
|
8 |
+
pinned: false
|
9 |
+
app_file: space.py
|
10 |
+
app_link: https://huggingface.co/spaces/Mustafiz996/gradio_spreadsheetcomponent
|
11 |
+
---
|
12 |
+
|
13 |
+
# `gradio_spreadsheetcomponent`
|
14 |
+
<a href="https://pypi.org/project/gradio_spreadsheetcomponent/" target="_blank"><img alt="PyPI - Version" src="https://img.shields.io/pypi/v/gradio_spreadsheetcomponent"></a>
|
15 |
+
|
16 |
+
This component is used to answer questions about spreadsheets.
|
17 |
+
|
18 |
+
## Installation
|
19 |
+
|
20 |
+
```bash
|
21 |
+
pip install gradio_spreadsheetcomponent
|
22 |
+
```
|
23 |
+
|
24 |
+
## Usage
|
25 |
+
|
26 |
+
```python
|
27 |
+
import gradio as gr
|
28 |
+
from gradio_spreadsheetcomponent import SpreadsheetComponent
|
29 |
+
from dotenv import load_dotenv
|
30 |
+
import os
|
31 |
+
import pandas as pd
|
32 |
+
|
33 |
+
def answer_question(file, question):
|
34 |
+
if not file or not question:
|
35 |
+
return "Please upload a file and enter a question."
|
36 |
+
|
37 |
+
# Load the spreadsheet data
|
38 |
+
df = pd.read_excel(file.name)
|
39 |
+
|
40 |
+
# Create a SpreadsheetComponent instance
|
41 |
+
spreadsheet = SpreadsheetComponent(value=df)
|
42 |
+
|
43 |
+
# Use the component to answer the question
|
44 |
+
return spreadsheet.answer_question(question)
|
45 |
+
|
46 |
+
with gr.Blocks() as demo:
|
47 |
+
gr.Markdown("# Spreadsheet Question Answering")
|
48 |
+
|
49 |
+
with gr.Row():
|
50 |
+
file_input = gr.File(label="Upload Spreadsheet", file_types=[".xlsx"])
|
51 |
+
question_input = gr.Textbox(label="Ask a Question")
|
52 |
+
|
53 |
+
answer_output = gr.Textbox(label="Answer", interactive=False, lines=4)
|
54 |
+
|
55 |
+
submit_button = gr.Button("Submit")
|
56 |
+
submit_button.click(answer_question, inputs=[file_input, question_input], outputs=answer_output)
|
57 |
+
|
58 |
+
|
59 |
+
if __name__ == "__main__":
|
60 |
+
demo.launch()
|
61 |
+
|
62 |
+
```
|
63 |
+
|
64 |
+
## `SpreadsheetComponent`
|
65 |
+
|
66 |
+
### Initialization
|
67 |
+
|
68 |
+
<table>
|
69 |
+
<thead>
|
70 |
+
<tr>
|
71 |
+
<th align="left">name</th>
|
72 |
+
<th align="left" style="width: 25%;">type</th>
|
73 |
+
<th align="left">default</th>
|
74 |
+
<th align="left">description</th>
|
75 |
+
</tr>
|
76 |
+
</thead>
|
77 |
+
<tbody>
|
78 |
+
<tr>
|
79 |
+
<td align="left"><code>value</code></td>
|
80 |
+
<td align="left" style="width: 25%;">
|
81 |
+
|
82 |
+
```python
|
83 |
+
pandas.core.frame.DataFrame | list | dict | None
|
84 |
+
```
|
85 |
+
|
86 |
+
</td>
|
87 |
+
<td align="left"><code>None</code></td>
|
88 |
+
<td align="left">Default value to show in spreadsheet. Can be a pandas DataFrame, list of lists, or dictionary</td>
|
89 |
+
</tr>
|
90 |
+
</tbody></table>
|
91 |
+
|
92 |
+
|
93 |
+
|
94 |
+
|
95 |
+
### User function
|
96 |
+
|
97 |
+
The impact on the users predict function varies depending on whether the component is used as an input or output for an event (or both).
|
98 |
+
|
99 |
+
- When used as an Input, the component only impacts the input signature of the user function.
|
100 |
+
- When used as an output, the component only impacts the return signature of the user function.
|
101 |
+
|
102 |
+
The code snippet below is accurate in cases where the component is used as both an input and an output.
|
103 |
+
|
104 |
+
- **As output:** Is passed, the preprocessed input data sent to the user's function in the backend.
|
105 |
+
|
106 |
+
|
107 |
+
```python
|
108 |
+
def predict(
|
109 |
+
value: typing.Any
|
110 |
+
) -> Unknown:
|
111 |
+
return value
|
112 |
+
```
|
113 |
+
|
requirements.txt
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
-
gradio==5.32.1
|
2 |
-
pandas
|
3 |
-
git+https://github.com/huggingface/huggingface_hub.git
|
4 |
-
openpyxl
|
5 |
-
python-dotenv
|
6 |
-
numpy
|
7 |
gradio_spreadsheetcomponent
|
|
|
1 |
+
gradio==5.32.1
|
2 |
+
pandas
|
3 |
+
git+https://github.com/huggingface/huggingface_hub.git
|
4 |
+
openpyxl
|
5 |
+
python-dotenv
|
6 |
+
numpy
|
7 |
gradio_spreadsheetcomponent
|
src/README.md
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
---
|
2 |
-
tags: [gradio-custom-component,
|
3 |
title: gradio_spreadsheetcomponent
|
4 |
short_description: This component answers questions about spreadsheets.
|
5 |
colorFrom: blue
|
@@ -7,6 +7,7 @@ colorTo: yellow
|
|
7 |
sdk: gradio
|
8 |
pinned: false
|
9 |
app_file: space.py
|
|
|
10 |
---
|
11 |
|
12 |
# `gradio_spreadsheetcomponent`
|
|
|
1 |
---
|
2 |
+
tags: [gradio-custom-component, custom-component-track, gradio-spreadsheet-custom-component]
|
3 |
title: gradio_spreadsheetcomponent
|
4 |
short_description: This component answers questions about spreadsheets.
|
5 |
colorFrom: blue
|
|
|
7 |
sdk: gradio
|
8 |
pinned: false
|
9 |
app_file: space.py
|
10 |
+
app_link: https://huggingface.co/spaces/Mustafiz996/gradio_spreadsheetcomponent
|
11 |
---
|
12 |
|
13 |
# `gradio_spreadsheetcomponent`
|
src/backend/gradio_spreadsheetcomponent/spreadsheetcomponent.py
CHANGED
@@ -45,6 +45,116 @@ class SpreadsheetComponent(FormComponent):
|
|
45 |
|
46 |
self.hf_client = InferenceClient(provider="hf-inference", api_key=os.getenv("HF_TOKEN"))
|
47 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
def answer_question(self, question: str) -> str:
|
49 |
"""Ask a question about the current spreadsheet data"""
|
50 |
if self.hf_client is None:
|
@@ -57,6 +167,7 @@ class SpreadsheetComponent(FormComponent):
|
|
57 |
# Convert DataFrame to table format
|
58 |
table = {col: [str(val) if pd.notna(val) else "" for val in self.value[col]]
|
59 |
for col in self.value.columns}
|
|
|
60 |
# Get answer using table question answering
|
61 |
result = self.hf_client.table_question_answering(
|
62 |
table=table,
|
@@ -64,20 +175,8 @@ class SpreadsheetComponent(FormComponent):
|
|
64 |
model="google/tapas-large-finetuned-wtq"
|
65 |
)
|
66 |
|
67 |
-
#
|
68 |
-
|
69 |
-
parts.append(f"Answer: {result.answer}")
|
70 |
-
|
71 |
-
if hasattr(result, 'cells') and result.cells:
|
72 |
-
parts.append(f"Relevant cell values: {', '.join(result.cells)}")
|
73 |
-
|
74 |
-
if hasattr(result, 'coordinates') and result.coordinates:
|
75 |
-
parts.append("Location of relevant information:")
|
76 |
-
for coords in result.coordinates:
|
77 |
-
row, col = coords
|
78 |
-
parts.append(f"- Row {row}, Column '{col}'")
|
79 |
-
|
80 |
-
return "\n".join(parts)
|
81 |
|
82 |
except Exception as e:
|
83 |
return f"Error processing question: {str(e)}\nPlease try rephrasing your question or verify the data format."
|
|
|
45 |
|
46 |
self.hf_client = InferenceClient(provider="hf-inference", api_key=os.getenv("HF_TOKEN"))
|
47 |
|
48 |
+
def postprocess_answer(self, result) -> str:
|
49 |
+
"""Process and verify the model's answer, especially for aggregation operations."""
|
50 |
+
try:
|
51 |
+
# Extract answer and check if it's a number (potential aggregation)
|
52 |
+
answer = getattr(result, 'answer', None)
|
53 |
+
if not answer or str(answer).lower() in ['none', 'null', 'nan', '']:
|
54 |
+
return "No answer found"
|
55 |
+
|
56 |
+
# Detect aggregation keywords in the answer
|
57 |
+
agg_keywords = {
|
58 |
+
'sum': 'sum',
|
59 |
+
'average': 'mean',
|
60 |
+
'mean': 'mean',
|
61 |
+
'maximum': 'max',
|
62 |
+
'max': 'max',
|
63 |
+
'minimum': 'min',
|
64 |
+
'min': 'min',
|
65 |
+
'count': 'count'
|
66 |
+
}
|
67 |
+
|
68 |
+
# Check if we need to verify any aggregation
|
69 |
+
operation = None
|
70 |
+
for fun_name in agg_keywords.keys():
|
71 |
+
if fun_name in str(result.aggregator.lower()):
|
72 |
+
operation = fun_name
|
73 |
+
break
|
74 |
+
|
75 |
+
coordinates = getattr(result, 'coordinates', None)
|
76 |
+
if operation and coordinates and len(coordinates) > 0:
|
77 |
+
col_name = None
|
78 |
+
try:
|
79 |
+
# Group coordinates by column to ensure we're working with consistent data
|
80 |
+
col_groups = {}
|
81 |
+
for row_idx, col_idx in coordinates:
|
82 |
+
if col_name is None:
|
83 |
+
col_name = self.value.columns[col_idx]
|
84 |
+
elif col_name != self.value.columns[col_idx]:
|
85 |
+
continue # Skip if value is from a different column
|
86 |
+
|
87 |
+
value = self.value.iloc[row_idx, col_idx]
|
88 |
+
if pd.notna(value): # Only include non-NA values
|
89 |
+
col_groups.setdefault(col_name, []).append(value)
|
90 |
+
|
91 |
+
if col_name and col_groups:
|
92 |
+
# Convert collected values to numeric, handling non-numeric values
|
93 |
+
numeric_values = pd.to_numeric(col_groups[col_name], errors='coerce')
|
94 |
+
|
95 |
+
if len(numeric_values) > 0:
|
96 |
+
# Perform the aggregation on the specific values
|
97 |
+
if operation == 'sum':
|
98 |
+
computed_value = numeric_values.sum()
|
99 |
+
elif operation in ['mean', 'average']:
|
100 |
+
computed_value = numeric_values.mean()
|
101 |
+
elif operation in ['max', 'maximum']:
|
102 |
+
computed_value = numeric_values.max()
|
103 |
+
elif operation in ['min', 'minimum']:
|
104 |
+
computed_value = numeric_values.min()
|
105 |
+
elif operation == 'count':
|
106 |
+
computed_value = len(numeric_values)
|
107 |
+
else:
|
108 |
+
computed_value = None
|
109 |
+
|
110 |
+
# Format the computed value
|
111 |
+
if pd.notna(computed_value):
|
112 |
+
# Round floating point numbers to 2 decimal places
|
113 |
+
if isinstance(computed_value, float):
|
114 |
+
computed_value = round(computed_value, 2)
|
115 |
+
|
116 |
+
# Add verification to the answer
|
117 |
+
parts = []
|
118 |
+
parts.append(f"Answer: {computed_value}")
|
119 |
+
|
120 |
+
# Add information about the cells used
|
121 |
+
cells = getattr(result, 'cells', None)
|
122 |
+
if cells:
|
123 |
+
parts.append(f"Values used: {', '.join(str(x) for x in cells)}")
|
124 |
+
|
125 |
+
parts.append(f"Column used: '{col_name}'")
|
126 |
+
parts.append(f"Number of values considered: {len(numeric_values)}")
|
127 |
+
|
128 |
+
return "\n".join(parts)
|
129 |
+
|
130 |
+
except Exception as calc_error:
|
131 |
+
# If calculation fails, return original answer with error info
|
132 |
+
parts = []
|
133 |
+
parts.append(f"Answer: {answer}")
|
134 |
+
parts.append(f"Note: Could not verify {operation} calculation: {str(calc_error)}")
|
135 |
+
return "\n".join(parts)
|
136 |
+
|
137 |
+
# If no aggregation needed or verification failed, return the original formatted answer
|
138 |
+
parts = []
|
139 |
+
parts.append(f"Answer: {answer}")
|
140 |
+
|
141 |
+
cells = getattr(result, 'cells', None)
|
142 |
+
if cells:
|
143 |
+
parts.append(f"Relevant cell values: {', '.join(str(x) for x in cells)}")
|
144 |
+
|
145 |
+
coordinates = getattr(result, 'coordinates', None)
|
146 |
+
if coordinates:
|
147 |
+
parts.append("Location of relevant information:")
|
148 |
+
for coords in coordinates:
|
149 |
+
row_idx, col_idx = coords
|
150 |
+
col_name = self.value.columns[col_idx]
|
151 |
+
parts.append(f"- Row {row_idx}, Column '{col_name}'")
|
152 |
+
|
153 |
+
return "\n".join(parts)
|
154 |
+
|
155 |
+
except Exception as e:
|
156 |
+
return f"Error processing answer: {str(e)}"
|
157 |
+
|
158 |
def answer_question(self, question: str) -> str:
|
159 |
"""Ask a question about the current spreadsheet data"""
|
160 |
if self.hf_client is None:
|
|
|
167 |
# Convert DataFrame to table format
|
168 |
table = {col: [str(val) if pd.notna(val) else "" for val in self.value[col]]
|
169 |
for col in self.value.columns}
|
170 |
+
|
171 |
# Get answer using table question answering
|
172 |
result = self.hf_client.table_question_answering(
|
173 |
table=table,
|
|
|
175 |
model="google/tapas-large-finetuned-wtq"
|
176 |
)
|
177 |
|
178 |
+
# Use postprocess_answer to handle the result
|
179 |
+
return self.postprocess_answer(result)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
180 |
|
181 |
except Exception as e:
|
182 |
return f"Error processing question: {str(e)}\nPlease try rephrasing your question or verify the data format."
|
src/demo/.env
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
HF_TOKEN="************"
|
src/demo/requirements.txt
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
gradio==5.32.1
|
2 |
pandas
|
3 |
-
huggingface
|
4 |
openpyxl
|
5 |
python-dotenv
|
6 |
numpy
|
|
|
1 |
gradio==5.32.1
|
2 |
pandas
|
3 |
+
git+https://github.com/huggingface/huggingface_hub.git
|
4 |
openpyxl
|
5 |
python-dotenv
|
6 |
numpy
|
src/pyproject.toml
CHANGED
@@ -8,7 +8,7 @@ build-backend = "hatchling.build"
|
|
8 |
|
9 |
[project]
|
10 |
name = "gradio_spreadsheetcomponent"
|
11 |
-
version = "0.0.
|
12 |
description = "This component is used to answer questions about spreadsheets."
|
13 |
readme = "README.md"
|
14 |
license = "apache-2.0"
|
|
|
8 |
|
9 |
[project]
|
10 |
name = "gradio_spreadsheetcomponent"
|
11 |
+
version = "0.0.3"
|
12 |
description = "This component is used to answer questions about spreadsheets."
|
13 |
readme = "README.md"
|
14 |
license = "apache-2.0"
|