File size: 2,538 Bytes
eb50e2e 8f9985e eb50e2e 8f9985e eb50e2e 8f9985e eb50e2e 8f9985e eb50e2e 8f9985e eb50e2e 8f9985e eb50e2e 8f9985e eb50e2e 8f9985e eb50e2e 8f9985e eb50e2e 8f9985e eb50e2e 8f9985e eb50e2e 8f9985e eb50e2e 8f9985e eb50e2e 8f9985e eb50e2e 8f9985e eb50e2e 8f9985e eb50e2e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
import pandas as pd
import io
import gradio as gr
from constants import REQUIRED_COLUMNS, MINIMAL_NUMBER_OF_ROWS
def validate_csv_can_be_read(file_content: str) -> pd.DataFrame:
"""
Validate that the CSV file can be read and parsed.
Parameters
----------
file_content: str
The content of the uploaded CSV file.
Returns
-------
pd.DataFrame
The parsed DataFrame if successful.
Raises
------
gr.Error: If CSV cannot be read or parsed
"""
try:
# Read CSV content
df = pd.read_csv(io.StringIO(file_content))
return df
except pd.errors.EmptyDataError:
raise gr.Error("β CSV file is empty or contains no valid data")
except pd.errors.ParserError as e:
raise gr.Error(f"β Invalid CSV format<br><br>" f"Error: {str(e)}")
except UnicodeDecodeError:
raise gr.Error(
"β File encoding error<br><br>"
"Your file appears to have an unsupported encoding.<br>"
"Please save your CSV file with UTF-8 encoding and try again."
)
def validate_dataframe(df: pd.DataFrame) -> None:
"""
Validate the DataFrame content and structure.
Parameters
----------
df: pd.DataFrame
The DataFrame to validate.
Raises
------
gr.Error: If validation fails
"""
# Required columns should be present
missing_columns = set(REQUIRED_COLUMNS) - set(df.columns)
if missing_columns:
raise gr.Error(f"β Missing required columns: {', '.join(missing_columns)}")
# Data should not be empty
if df.empty:
raise gr.Error("β CSV file is empty")
# Check for missing values in required columns
for col in REQUIRED_COLUMNS:
missing_count = df[col].isnull().sum()
if missing_count > 0:
raise gr.Error(f"β Column '{col}' contains {missing_count} missing values")
# Check for reasonable number of rows
if len(df) < MINIMAL_NUMBER_OF_ROWS:
raise gr.Error(f"β CSV should have at least {MINIMAL_NUMBER_OF_ROWS} rows")
print(
f"β
CSV validation passed! Found {len(df)} rows with columns: {', '.join(df.columns)}"
)
def validate_csv_file(file_content: str) -> None:
"""
Validate the uploaded CSV file.
Parameters
----------
file_content: str
The content of the uploaded CSV file.
Raises
------
gr.Error: If validation fails
"""
df = validate_csv_can_be_read(file_content)
validate_dataframe(df)
|