Spaces:

claudion-ai
/

Frappe

Sleeping

HusnaManakkot commited on Feb 28, 2024

Commit

514fc02

verified ·

1 Parent(s): 887c95b

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -6,21 +6,28 @@ from datasets import load_dataset
 spider_dataset = load_dataset("spider", split='train')  # Load a subset of the dataset
 # Extract schema information from the dataset
 table_names = set()
 column_names = set()
 for item in spider_dataset:
-    for table in item['db']['table_names_original']:
-        table_names.add(table)
-    for column in item['db']['column_names_original']:
-        column_names.add(column[1])
 # Load tokenizer and model
-tokenizer = AutoTokenizer.from_pretrained("mrm8488/t5-base-finetuned-wikiSQL")  # Update this to a model fine-tuned on Spider if available
-model = AutoModelForSeq2SeqLM.from_pretrained("mrm8488/t5-base-finetuned-wikiSQL")  # Update this to a model fine-tuned on Spider if available
 def post_process_sql_query(sql_query):
     # Modify the SQL query to match the dataset's schema
     # This is just an example and might need to be adapted based on the dataset and model output
     for table_name in table_names:
         if "TABLE" in sql_query:
             sql_query = sql_query.replace("TABLE", table_name)

 spider_dataset = load_dataset("spider", split='train')  # Load a subset of the dataset
 # Extract schema information from the dataset
+db_ids = set()
 table_names = set()
 column_names = set()
 for item in spider_dataset:
+    db_ids.add(item['db_id'])
+    for table in item['sql']['from']['table_units']:
+        if isinstance(table, list):
+            table_names.add(table[1])
+    for column in item['sql']['select'][1]:
+        column_names.add(column[1][1])
 # Load tokenizer and model
+tokenizer = AutoTokenizer.from_pretrained("mrm8488/t5-base-finetuned-wikiSQL")
+model = AutoModelForSeq2SeqLM.from_pretrained("mrm8488/t5-base-finetuned-wikiSQL")
 def post_process_sql_query(sql_query):
     # Modify the SQL query to match the dataset's schema
     # This is just an example and might need to be adapted based on the dataset and model output
+    for db_id in db_ids:
+        if "DB_ID" in sql_query:
+            sql_query = sql_query.replace("DB_ID", db_id)
+            break  # Assuming only one database is referenced in the query
     for table_name in table_names:
         if "TABLE" in sql_query:
             sql_query = sql_query.replace("TABLE", table_name)