Mustafiz996 commited on
Commit
cf8051d
·
verified ·
1 Parent(s): 044ec3a

Upload folder using huggingface_hub

Browse files
.env ADDED
@@ -0,0 +1 @@
 
 
1
+ HF_TOKEN="************"
README.md CHANGED
@@ -1,113 +1,113 @@
1
- ---
2
- tags: [gradio-custom-component, custom-component-track, gradio-spreadsheet-custom-component]
3
- title: gradio_spreadsheetcomponent
4
- short_description: This component answers questions about spreadsheets.
5
- colorFrom: blue
6
- colorTo: yellow
7
- sdk: gradio
8
- pinned: false
9
- app_file: space.py
10
- app_link: https://huggingface.co/spaces/Mustafiz996/gradio_spreadsheetcomponent
11
- ---
12
-
13
- # `gradio_spreadsheetcomponent`
14
- <a href="https://pypi.org/project/gradio_spreadsheetcomponent/" target="_blank"><img alt="PyPI - Version" src="https://img.shields.io/pypi/v/gradio_spreadsheetcomponent"></a>
15
-
16
- This component is used to answer questions about spreadsheets.
17
-
18
- ## Installation
19
-
20
- ```bash
21
- pip install gradio_spreadsheetcomponent
22
- ```
23
-
24
- ## Usage
25
-
26
- ```python
27
- import gradio as gr
28
- from gradio_spreadsheetcomponent import SpreadsheetComponent
29
- from dotenv import load_dotenv
30
- import os
31
- import pandas as pd
32
-
33
- def answer_question(file, question):
34
- if not file or not question:
35
- return "Please upload a file and enter a question."
36
-
37
- # Load the spreadsheet data
38
- df = pd.read_excel(file.name)
39
-
40
- # Create a SpreadsheetComponent instance
41
- spreadsheet = SpreadsheetComponent(value=df)
42
-
43
- # Use the component to answer the question
44
- return spreadsheet.answer_question(question)
45
-
46
- with gr.Blocks() as demo:
47
- gr.Markdown("# Spreadsheet Question Answering")
48
-
49
- with gr.Row():
50
- file_input = gr.File(label="Upload Spreadsheet", file_types=[".xlsx"])
51
- question_input = gr.Textbox(label="Ask a Question")
52
-
53
- answer_output = gr.Textbox(label="Answer", interactive=False, lines=4)
54
-
55
- submit_button = gr.Button("Submit")
56
- submit_button.click(answer_question, inputs=[file_input, question_input], outputs=answer_output)
57
-
58
-
59
- if __name__ == "__main__":
60
- demo.launch()
61
-
62
- ```
63
-
64
- ## `SpreadsheetComponent`
65
-
66
- ### Initialization
67
-
68
- <table>
69
- <thead>
70
- <tr>
71
- <th align="left">name</th>
72
- <th align="left" style="width: 25%;">type</th>
73
- <th align="left">default</th>
74
- <th align="left">description</th>
75
- </tr>
76
- </thead>
77
- <tbody>
78
- <tr>
79
- <td align="left"><code>value</code></td>
80
- <td align="left" style="width: 25%;">
81
-
82
- ```python
83
- pandas.core.frame.DataFrame | list | dict | None
84
- ```
85
-
86
- </td>
87
- <td align="left"><code>None</code></td>
88
- <td align="left">Default value to show in spreadsheet. Can be a pandas DataFrame, list of lists, or dictionary</td>
89
- </tr>
90
- </tbody></table>
91
-
92
-
93
-
94
-
95
- ### User function
96
-
97
- The impact on the users predict function varies depending on whether the component is used as an input or output for an event (or both).
98
-
99
- - When used as an Input, the component only impacts the input signature of the user function.
100
- - When used as an output, the component only impacts the return signature of the user function.
101
-
102
- The code snippet below is accurate in cases where the component is used as both an input and an output.
103
-
104
- - **As output:** Is passed, the preprocessed input data sent to the user's function in the backend.
105
-
106
-
107
- ```python
108
- def predict(
109
- value: typing.Any
110
- ) -> Unknown:
111
- return value
112
- ```
113
-
 
1
+ ---
2
+ tags: [gradio-custom-component, custom-component-track, gradio-spreadsheet-custom-component]
3
+ title: gradio_spreadsheetcomponent
4
+ short_description: This component answers questions about spreadsheets.
5
+ colorFrom: blue
6
+ colorTo: yellow
7
+ sdk: gradio
8
+ pinned: false
9
+ app_file: space.py
10
+ app_link: https://huggingface.co/spaces/Mustafiz996/gradio_spreadsheetcomponent
11
+ ---
12
+
13
+ # `gradio_spreadsheetcomponent`
14
+ <a href="https://pypi.org/project/gradio_spreadsheetcomponent/" target="_blank"><img alt="PyPI - Version" src="https://img.shields.io/pypi/v/gradio_spreadsheetcomponent"></a>
15
+
16
+ This component is used to answer questions about spreadsheets.
17
+
18
+ ## Installation
19
+
20
+ ```bash
21
+ pip install gradio_spreadsheetcomponent
22
+ ```
23
+
24
+ ## Usage
25
+
26
+ ```python
27
+ import gradio as gr
28
+ from gradio_spreadsheetcomponent import SpreadsheetComponent
29
+ from dotenv import load_dotenv
30
+ import os
31
+ import pandas as pd
32
+
33
+ def answer_question(file, question):
34
+ if not file or not question:
35
+ return "Please upload a file and enter a question."
36
+
37
+ # Load the spreadsheet data
38
+ df = pd.read_excel(file.name)
39
+
40
+ # Create a SpreadsheetComponent instance
41
+ spreadsheet = SpreadsheetComponent(value=df)
42
+
43
+ # Use the component to answer the question
44
+ return spreadsheet.answer_question(question)
45
+
46
+ with gr.Blocks() as demo:
47
+ gr.Markdown("# Spreadsheet Question Answering")
48
+
49
+ with gr.Row():
50
+ file_input = gr.File(label="Upload Spreadsheet", file_types=[".xlsx"])
51
+ question_input = gr.Textbox(label="Ask a Question")
52
+
53
+ answer_output = gr.Textbox(label="Answer", interactive=False, lines=4)
54
+
55
+ submit_button = gr.Button("Submit")
56
+ submit_button.click(answer_question, inputs=[file_input, question_input], outputs=answer_output)
57
+
58
+
59
+ if __name__ == "__main__":
60
+ demo.launch()
61
+
62
+ ```
63
+
64
+ ## `SpreadsheetComponent`
65
+
66
+ ### Initialization
67
+
68
+ <table>
69
+ <thead>
70
+ <tr>
71
+ <th align="left">name</th>
72
+ <th align="left" style="width: 25%;">type</th>
73
+ <th align="left">default</th>
74
+ <th align="left">description</th>
75
+ </tr>
76
+ </thead>
77
+ <tbody>
78
+ <tr>
79
+ <td align="left"><code>value</code></td>
80
+ <td align="left" style="width: 25%;">
81
+
82
+ ```python
83
+ pandas.core.frame.DataFrame | list | dict | None
84
+ ```
85
+
86
+ </td>
87
+ <td align="left"><code>None</code></td>
88
+ <td align="left">Default value to show in spreadsheet. Can be a pandas DataFrame, list of lists, or dictionary</td>
89
+ </tr>
90
+ </tbody></table>
91
+
92
+
93
+
94
+
95
+ ### User function
96
+
97
+ The impact on the users predict function varies depending on whether the component is used as an input or output for an event (or both).
98
+
99
+ - When used as an Input, the component only impacts the input signature of the user function.
100
+ - When used as an output, the component only impacts the return signature of the user function.
101
+
102
+ The code snippet below is accurate in cases where the component is used as both an input and an output.
103
+
104
+ - **As output:** Is passed, the preprocessed input data sent to the user's function in the backend.
105
+
106
+
107
+ ```python
108
+ def predict(
109
+ value: typing.Any
110
+ ) -> Unknown:
111
+ return value
112
+ ```
113
+
requirements.txt CHANGED
@@ -1,7 +1,7 @@
1
- gradio==5.32.1
2
- pandas
3
- git+https://github.com/huggingface/huggingface_hub.git
4
- openpyxl
5
- python-dotenv
6
- numpy
7
  gradio_spreadsheetcomponent
 
1
+ gradio==5.32.1
2
+ pandas
3
+ git+https://github.com/huggingface/huggingface_hub.git
4
+ openpyxl
5
+ python-dotenv
6
+ numpy
7
  gradio_spreadsheetcomponent
src/README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- tags: [gradio-custom-component, SimpleTextbox, gradio-spreadsheet-custom-component]
3
  title: gradio_spreadsheetcomponent
4
  short_description: This component answers questions about spreadsheets.
5
  colorFrom: blue
@@ -7,6 +7,7 @@ colorTo: yellow
7
  sdk: gradio
8
  pinned: false
9
  app_file: space.py
 
10
  ---
11
 
12
  # `gradio_spreadsheetcomponent`
 
1
  ---
2
+ tags: [gradio-custom-component, custom-component-track, gradio-spreadsheet-custom-component]
3
  title: gradio_spreadsheetcomponent
4
  short_description: This component answers questions about spreadsheets.
5
  colorFrom: blue
 
7
  sdk: gradio
8
  pinned: false
9
  app_file: space.py
10
+ app_link: https://huggingface.co/spaces/Mustafiz996/gradio_spreadsheetcomponent
11
  ---
12
 
13
  # `gradio_spreadsheetcomponent`
src/backend/gradio_spreadsheetcomponent/spreadsheetcomponent.py CHANGED
@@ -45,6 +45,116 @@ class SpreadsheetComponent(FormComponent):
45
 
46
  self.hf_client = InferenceClient(provider="hf-inference", api_key=os.getenv("HF_TOKEN"))
47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  def answer_question(self, question: str) -> str:
49
  """Ask a question about the current spreadsheet data"""
50
  if self.hf_client is None:
@@ -57,6 +167,7 @@ class SpreadsheetComponent(FormComponent):
57
  # Convert DataFrame to table format
58
  table = {col: [str(val) if pd.notna(val) else "" for val in self.value[col]]
59
  for col in self.value.columns}
 
60
  # Get answer using table question answering
61
  result = self.hf_client.table_question_answering(
62
  table=table,
@@ -64,20 +175,8 @@ class SpreadsheetComponent(FormComponent):
64
  model="google/tapas-large-finetuned-wtq"
65
  )
66
 
67
- # Format the answer with more context
68
- parts = []
69
- parts.append(f"Answer: {result.answer}")
70
-
71
- if hasattr(result, 'cells') and result.cells:
72
- parts.append(f"Relevant cell values: {', '.join(result.cells)}")
73
-
74
- if hasattr(result, 'coordinates') and result.coordinates:
75
- parts.append("Location of relevant information:")
76
- for coords in result.coordinates:
77
- row, col = coords
78
- parts.append(f"- Row {row}, Column '{col}'")
79
-
80
- return "\n".join(parts)
81
 
82
  except Exception as e:
83
  return f"Error processing question: {str(e)}\nPlease try rephrasing your question or verify the data format."
 
45
 
46
  self.hf_client = InferenceClient(provider="hf-inference", api_key=os.getenv("HF_TOKEN"))
47
 
48
+ def postprocess_answer(self, result) -> str:
49
+ """Process and verify the model's answer, especially for aggregation operations."""
50
+ try:
51
+ # Extract answer and check if it's a number (potential aggregation)
52
+ answer = getattr(result, 'answer', None)
53
+ if not answer or str(answer).lower() in ['none', 'null', 'nan', '']:
54
+ return "No answer found"
55
+
56
+ # Detect aggregation keywords in the answer
57
+ agg_keywords = {
58
+ 'sum': 'sum',
59
+ 'average': 'mean',
60
+ 'mean': 'mean',
61
+ 'maximum': 'max',
62
+ 'max': 'max',
63
+ 'minimum': 'min',
64
+ 'min': 'min',
65
+ 'count': 'count'
66
+ }
67
+
68
+ # Check if we need to verify any aggregation
69
+ operation = None
70
+ for fun_name in agg_keywords.keys():
71
+ if fun_name in str(result.aggregator.lower()):
72
+ operation = fun_name
73
+ break
74
+
75
+ coordinates = getattr(result, 'coordinates', None)
76
+ if operation and coordinates and len(coordinates) > 0:
77
+ col_name = None
78
+ try:
79
+ # Group coordinates by column to ensure we're working with consistent data
80
+ col_groups = {}
81
+ for row_idx, col_idx in coordinates:
82
+ if col_name is None:
83
+ col_name = self.value.columns[col_idx]
84
+ elif col_name != self.value.columns[col_idx]:
85
+ continue # Skip if value is from a different column
86
+
87
+ value = self.value.iloc[row_idx, col_idx]
88
+ if pd.notna(value): # Only include non-NA values
89
+ col_groups.setdefault(col_name, []).append(value)
90
+
91
+ if col_name and col_groups:
92
+ # Convert collected values to numeric, handling non-numeric values
93
+ numeric_values = pd.to_numeric(col_groups[col_name], errors='coerce')
94
+
95
+ if len(numeric_values) > 0:
96
+ # Perform the aggregation on the specific values
97
+ if operation == 'sum':
98
+ computed_value = numeric_values.sum()
99
+ elif operation in ['mean', 'average']:
100
+ computed_value = numeric_values.mean()
101
+ elif operation in ['max', 'maximum']:
102
+ computed_value = numeric_values.max()
103
+ elif operation in ['min', 'minimum']:
104
+ computed_value = numeric_values.min()
105
+ elif operation == 'count':
106
+ computed_value = len(numeric_values)
107
+ else:
108
+ computed_value = None
109
+
110
+ # Format the computed value
111
+ if pd.notna(computed_value):
112
+ # Round floating point numbers to 2 decimal places
113
+ if isinstance(computed_value, float):
114
+ computed_value = round(computed_value, 2)
115
+
116
+ # Add verification to the answer
117
+ parts = []
118
+ parts.append(f"Answer: {computed_value}")
119
+
120
+ # Add information about the cells used
121
+ cells = getattr(result, 'cells', None)
122
+ if cells:
123
+ parts.append(f"Values used: {', '.join(str(x) for x in cells)}")
124
+
125
+ parts.append(f"Column used: '{col_name}'")
126
+ parts.append(f"Number of values considered: {len(numeric_values)}")
127
+
128
+ return "\n".join(parts)
129
+
130
+ except Exception as calc_error:
131
+ # If calculation fails, return original answer with error info
132
+ parts = []
133
+ parts.append(f"Answer: {answer}")
134
+ parts.append(f"Note: Could not verify {operation} calculation: {str(calc_error)}")
135
+ return "\n".join(parts)
136
+
137
+ # If no aggregation needed or verification failed, return the original formatted answer
138
+ parts = []
139
+ parts.append(f"Answer: {answer}")
140
+
141
+ cells = getattr(result, 'cells', None)
142
+ if cells:
143
+ parts.append(f"Relevant cell values: {', '.join(str(x) for x in cells)}")
144
+
145
+ coordinates = getattr(result, 'coordinates', None)
146
+ if coordinates:
147
+ parts.append("Location of relevant information:")
148
+ for coords in coordinates:
149
+ row_idx, col_idx = coords
150
+ col_name = self.value.columns[col_idx]
151
+ parts.append(f"- Row {row_idx}, Column '{col_name}'")
152
+
153
+ return "\n".join(parts)
154
+
155
+ except Exception as e:
156
+ return f"Error processing answer: {str(e)}"
157
+
158
  def answer_question(self, question: str) -> str:
159
  """Ask a question about the current spreadsheet data"""
160
  if self.hf_client is None:
 
167
  # Convert DataFrame to table format
168
  table = {col: [str(val) if pd.notna(val) else "" for val in self.value[col]]
169
  for col in self.value.columns}
170
+
171
  # Get answer using table question answering
172
  result = self.hf_client.table_question_answering(
173
  table=table,
 
175
  model="google/tapas-large-finetuned-wtq"
176
  )
177
 
178
+ # Use postprocess_answer to handle the result
179
+ return self.postprocess_answer(result)
 
 
 
 
 
 
 
 
 
 
 
 
180
 
181
  except Exception as e:
182
  return f"Error processing question: {str(e)}\nPlease try rephrasing your question or verify the data format."
src/demo/.env ADDED
@@ -0,0 +1 @@
 
 
1
+ HF_TOKEN="************"
src/demo/requirements.txt CHANGED
@@ -1,6 +1,6 @@
1
  gradio==5.32.1
2
  pandas
3
- huggingface-hub
4
  openpyxl
5
  python-dotenv
6
  numpy
 
1
  gradio==5.32.1
2
  pandas
3
+ git+https://github.com/huggingface/huggingface_hub.git
4
  openpyxl
5
  python-dotenv
6
  numpy
src/pyproject.toml CHANGED
@@ -8,7 +8,7 @@ build-backend = "hatchling.build"
8
 
9
  [project]
10
  name = "gradio_spreadsheetcomponent"
11
- version = "0.0.2"
12
  description = "This component is used to answer questions about spreadsheets."
13
  readme = "README.md"
14
  license = "apache-2.0"
 
8
 
9
  [project]
10
  name = "gradio_spreadsheetcomponent"
11
+ version = "0.0.3"
12
  description = "This component is used to answer questions about spreadsheets."
13
  readme = "README.md"
14
  license = "apache-2.0"