Spaces:
Running
Running
Jon Solow
commited on
Commit
·
2a27c15
1
Parent(s):
64f6425
Add copy loader page that loads dbt
Browse files- src/dbt_data_client.py +16 -1
- src/pages/98_Load_Data copy.py +38 -0
- src/queries/nflverse/github_data.py +2 -2
src/dbt_data_client.py
CHANGED
|
@@ -1,8 +1,23 @@
|
|
| 1 |
import duckdb
|
| 2 |
import os
|
|
|
|
|
|
|
| 3 |
|
| 4 |
-
|
|
|
|
|
|
|
| 5 |
|
| 6 |
|
| 7 |
def get_db_conn():
|
| 8 |
return duckdb.connect(DUCKDB_PATH)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import duckdb
|
| 2 |
import os
|
| 3 |
+
import subprocess
|
| 4 |
+
import streamlit as st
|
| 5 |
|
| 6 |
+
|
| 7 |
+
DBT_DIR = os.path.join(os.path.dirname(__file__), "dbt_yfdash")
|
| 8 |
+
DUCKDB_PATH = os.path.join(DBT_DIR, "dev.duckdb")
|
| 9 |
|
| 10 |
|
| 11 |
def get_db_conn():
|
| 12 |
return duckdb.connect(DUCKDB_PATH)
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def run_dbt_build():
|
| 16 |
+
process = subprocess.Popen(
|
| 17 |
+
args=["dbt", "build"],
|
| 18 |
+
stdout=subprocess.PIPE,
|
| 19 |
+
stderr=subprocess.STDOUT,
|
| 20 |
+
cwd=DBT_DIR,
|
| 21 |
+
)
|
| 22 |
+
stdout, _ = process.communicate()
|
| 23 |
+
st.text("\n".join(stdout.decode().split("\n")[1:][:-1]))
|
src/pages/98_Load_Data copy.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
|
| 3 |
+
from config import DEFAULT_ICON
|
| 4 |
+
from shared_page import common_page_config
|
| 5 |
+
|
| 6 |
+
from queries.nflverse.github_data import get_current_tables
|
| 7 |
+
|
| 8 |
+
from dbt_data_client import get_db_conn, run_dbt_build
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def get_page():
|
| 12 |
+
page_title = "Data Loader - dbt"
|
| 13 |
+
st.set_page_config(page_title=page_title, page_icon=DEFAULT_ICON, layout="wide")
|
| 14 |
+
common_page_config()
|
| 15 |
+
st.title(page_title)
|
| 16 |
+
|
| 17 |
+
with get_db_conn() as duckdb_conn:
|
| 18 |
+
current_tables_list = get_current_tables(duckdb_conn)
|
| 19 |
+
|
| 20 |
+
if st.button("Refresh Data"):
|
| 21 |
+
run_dbt_build()
|
| 22 |
+
|
| 23 |
+
if selected_table := st.selectbox("Describe a table:", current_tables_list, index=0):
|
| 24 |
+
with get_db_conn() as duckdb_conn:
|
| 25 |
+
describe_df = duckdb_conn.sql(f"DESCRIBE {selected_table}").df()
|
| 26 |
+
st.dataframe(
|
| 27 |
+
describe_df,
|
| 28 |
+
hide_index=True,
|
| 29 |
+
use_container_width=True,
|
| 30 |
+
)
|
| 31 |
+
|
| 32 |
+
if st.checkbox("Explore data"):
|
| 33 |
+
with get_db_conn() as duckdb_conn:
|
| 34 |
+
st.dataframe(duckdb_conn.sql(f"SELECT * FROM {selected_table} LIMIT 50").df())
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
if __name__ == "__main__":
|
| 38 |
+
get_page()
|
src/queries/nflverse/github_data.py
CHANGED
|
@@ -140,6 +140,6 @@ def load_assets():
|
|
| 140 |
st.write(f"{tag} failed to load")
|
| 141 |
|
| 142 |
|
| 143 |
-
def get_current_tables() -> list[str]:
|
| 144 |
-
current_tables_df =
|
| 145 |
return current_tables_df["name"].tolist()
|
|
|
|
| 140 |
st.write(f"{tag} failed to load")
|
| 141 |
|
| 142 |
|
| 143 |
+
def get_current_tables(duckdb_conn=duckdb) -> list[str]:
|
| 144 |
+
current_tables_df = duckdb_conn.sql("SHOW TABLES").df()
|
| 145 |
return current_tables_df["name"].tolist()
|