Spaces:
Running
Running
| import gradio as gr | |
| import numpy as np | |
| import os | |
| import pandas as pd | |
| import pysr | |
| import tempfile | |
| from typing import Optional | |
| empty_df = pd.DataFrame( | |
| { | |
| "equation": [], | |
| "loss": [], | |
| "complexity": [], | |
| } | |
| ) | |
| def greet( | |
| file_obj: Optional[tempfile._TemporaryFileWrapper], | |
| col_to_fit: str, | |
| niterations: int, | |
| maxsize: int, | |
| binary_operators: list, | |
| unary_operators: list, | |
| force_run: bool, | |
| ): | |
| if col_to_fit == "": | |
| return ( | |
| empty_df, | |
| "Please enter a column to predict!", | |
| ) | |
| if len(binary_operators) == 0 and len(unary_operators) == 0: | |
| return ( | |
| empty_df, | |
| "Please select at least one operator!", | |
| ) | |
| if file_obj is None: | |
| return ( | |
| empty_df, | |
| "Please upload a CSV file!", | |
| ) | |
| # Look at some statistics of the file: | |
| df = pd.read_csv(file_obj.name) | |
| if len(df) == 0: | |
| return ( | |
| empty_df, | |
| "The file is empty!", | |
| ) | |
| if len(df.columns) == 1: | |
| return ( | |
| empty_df, | |
| "The file has only one column!", | |
| ) | |
| if col_to_fit not in df.columns: | |
| return ( | |
| empty_df, | |
| f"The column to predict, {col_to_fit}, is not in the file!" | |
| f"I found {df.columns}.", | |
| ) | |
| if len(df) > 10_000 and not force_run: | |
| return ( | |
| empty_df, | |
| "You have uploaded a file with more than 10,000 rows. " | |
| "This will take very long to run. " | |
| "Please upload a subsample of the data, " | |
| "or check the box 'Ignore Warnings'.", | |
| ) | |
| binary_operators = str(binary_operators).replace("'", '"') | |
| unary_operators = str(unary_operators).replace("'", '"') | |
| df = pd.read_csv(file_obj) | |
| y = np.array(df[col_to_fit]) | |
| X = df.drop([col_to_fit], axis=1) | |
| model = pysr.PySRRegressor( | |
| progress=False, | |
| bumper=True, | |
| maxsize=maxsize, | |
| niterations=niterations, | |
| binary_operators=binary_operators, | |
| unary_operators=unary_operators, | |
| ) | |
| model.fit(X, y) | |
| df = model.equations_[["equation", "loss", "complexity"]] | |
| # Convert all columns to string type: | |
| df = df.astype(str) | |
| msg = ( | |
| "Success!\n" | |
| f"You may run the model locally (faster) with " | |
| f"the following parameters:" | |
| +f""" | |
| model = PySRRegressor( | |
| niterations={niterations}, | |
| binary_operators={str(binary_operators)}, | |
| unary_operators={str(unary_operators)}, | |
| maxsize={maxsize}, | |
| ) | |
| model.fit(X, y)""") | |
| df.to_csv("pysr_output.csv", index=False) | |
| return df, msg | |
| def main(): | |
| demo = gr.Interface( | |
| fn=greet, | |
| description="Symbolic Regression with PySR. Watch search progress by clicking 'See logs'!", | |
| inputs=[ | |
| gr.File(label="Upload a CSV File"), | |
| gr.Textbox(label="Column to Predict", placeholder="y"), | |
| gr.Slider( | |
| minimum=1, | |
| maximum=1000, | |
| value=40, | |
| label="Number of Iterations", | |
| step=1, | |
| ), | |
| gr.Slider( | |
| minimum=7, | |
| maximum=35, | |
| value=20, | |
| label="Maximum Complexity", | |
| step=1, | |
| ), | |
| gr.CheckboxGroup( | |
| choices=["+", "-", "*", "/", "^"], | |
| label="Binary Operators", | |
| value=["+", "-", "*", "/"], | |
| ), | |
| gr.CheckboxGroup( | |
| choices=[ | |
| "sin", | |
| "cos", | |
| "exp", | |
| "log", | |
| "square", | |
| "cube", | |
| "sqrt", | |
| "abs", | |
| "tan", | |
| ], | |
| label="Unary Operators", | |
| value=[], | |
| ), | |
| gr.Checkbox( | |
| value=False, | |
| label="Ignore Warnings", | |
| ), | |
| ], | |
| outputs=[ | |
| "dataframe", | |
| gr.Textbox(label="Error Log"), | |
| ], | |
| ) | |
| # Add file to the demo: | |
| demo.launch() | |
| if __name__ == "__main__": | |
| main() | |