n0v33n commited on
Commit
5c05919
Β·
1 Parent(s): 74058a7

Initial Space Setup

Browse files
Files changed (2) hide show
  1. app.py +116 -0
  2. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import kagglehub
2
+ import os
3
+ import polars as pl
4
+ import gradio as gr
5
+ import google.generativeai as genai
6
+ from kaggle_secrets import UserSecretsClient
7
+
8
+ # Download Meta-Kaggle dataset in parquet format
9
+ PARQUET_PATH = kagglehub.dataset_download("bwandowando/meta-kaggle-ported-to-parquet-format")
10
+ print("βœ… Downloaded Meta-Kaggle parquet data.")
11
+ print("πŸ“‚ PARQUET_PATH =", PARQUET_PATH)
12
+
13
+ # === Load schema from parquet files ===
14
+ parquet_files = sorted([f for f in os.listdir(PARQUET_PATH) if f.endswith(".parquet")])
15
+ schema_dict = {}
16
+ file_map = {}
17
+
18
+ for file in parquet_files:
19
+ name = file.replace(".parquet", "")
20
+ path = os.path.join(PARQUET_PATH, file)
21
+ # Read only schema using polars
22
+ schema_dict[name] = list(pl.read_parquet(path, n_rows=0).columns)
23
+ file_map[name] = path
24
+
25
+ # === Build schema as prompt context ===
26
+ schema_description = "\n\n".join(
27
+ [f"### {name}\n{', '.join(cols)}" for name, cols in schema_dict.items()]
28
+ )
29
+
30
+ context_prompt = f"""
31
+ You are a helpful assistant that helps users understand which parts of the Meta-Kaggle dataset they need for their analysis.
32
+
33
+ Below is the dataset schema (parquet files):
34
+
35
+ {schema_description}
36
+
37
+ When the user asks a question, respond with:
38
+ 1. Which parquet file(s) are needed
39
+ 2. Which column(s) are relevant
40
+ 3. If needed, describe any join keys (e.g., "CompetitionId", "UserId")
41
+ Do not generate or run any code. Just guide the user on what parts of the dataset are needed.
42
+ """
43
+
44
+ # === Gemini setup ===
45
+ try:
46
+ # In Hugging Face Spaces, use environment variable for the API key
47
+ GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
48
+ genai.configure(api_key=GOOGLE_API_KEY)
49
+ model = genai.GenerativeModel("gemini-1.5-flash")
50
+ except Exception as e:
51
+ print(f"Error setting up Gemini API: {e}")
52
+ model = None
53
+
54
+ # === Analysis Guide Function ===
55
+ def guide_user(prompt):
56
+ if model is None:
57
+ return "Error: Gemini API not properly configured."
58
+ full_prompt = context_prompt + f"\n\nUser question: {prompt}\n\nAnswer:"
59
+ result = model.generate_content(full_prompt)
60
+ return result.text.strip()
61
+
62
+ # === Custom CSS for Gradio UI ===
63
+ css = """
64
+ body {
65
+ background-color: #f0f4f8;
66
+ font-family: 'Arial', sans-serif;
67
+ }
68
+ .gradio-container {
69
+ max-width: 800px;
70
+ margin: auto;
71
+ padding: 20px;
72
+ background-color: white;
73
+ border-radius: 10px;
74
+ box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
75
+ }
76
+ h1 {
77
+ color: #1a73e8;
78
+ text-align: center;
79
+ }
80
+ h2, p.subtitle, .subheading, .description {
81
+ color: #555;
82
+ text-align: center;
83
+ margin-bottom: 20px;
84
+ }
85
+ input[type="text"] {
86
+ border: 1px solid #ccc;
87
+ border-radius: 5px;
88
+ padding: 10px;
89
+ }
90
+ button {
91
+ background-color: #1a73e8;
92
+ color: white;
93
+ border-radius: 5px;
94
+ padding: 10px 20px;
95
+ }
96
+ button:hover {
97
+ background-color: #1557b0;
98
+ }
99
+ .output-text {
100
+ background-color: #f9f9f9;
101
+ border: 1px solid #ddd;
102
+ border-radius: 5px;
103
+ padding: 15px;
104
+ }
105
+ """
106
+
107
+ # === Launch Gradio UI ===
108
+ with gr.Blocks(css=css) as demo:
109
+ gr.Markdown("# Meta-Kaggle Dataset Navigator")
110
+ gr.Markdown("Ask which parquet files and columns you need for your analysis!")
111
+ input_text = gr.Textbox(label="Your Question", placeholder="E.g., Which files and columns do I need to analyze competition rankings?")
112
+ output_text = gr.Textbox(label="Guidance", interactive=False)
113
+ submit_button = gr.Button("Get Guidance")
114
+ submit_button.click(fn=guide_user, inputs=input_text, outputs=output_text)
115
+
116
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ kagglehub
2
+ polars
3
+ gradio
4
+ google-generativeai
5
+ kaggle