Final_Assignment_Template / result_dataset.py
martinsu's picture
Enhance app.py to validate and format SPACE_ID at startup, ensuring proper username/repo structure; update requirements.txt to add pandas dependency.
61feb08
raw
history blame contribute delete
858 Bytes
from datasets import load_dataset
import duckdb
# Login using e.g. `huggingface-cli login` to access this dataset
ds = load_dataset("agents-course/unit4-students-scores")
# Convert the dataset to a Pandas DataFrame
df = ds["train"].to_pandas()
# Initialize DuckDB connection
con = duckdb.connect(":memory:")
# Register the DataFrame as a table
con.register("train", df)
# Example SQL query: Get average score by subject
query = """
WITH FilteredTrains AS (
SELECT *
FROM train
WHERE (code LIKE '%' || username || '%') or username = 'martinsu'
),
RankedTrains AS (
SELECT
code,
username,
score,
RANK() OVER (ORDER BY score DESC) AS rank
FROM
FilteredTrains
)
SELECT
rank
FROM
RankedTrains
WHERE
username = 'martinsu';
"""
result = con.execute(query).fetchdf()
print(result)