Spaces:

datajoi
/

DataViz-Agent

Runtime error

App Files Files Community

Mustehson commited on Sep 20, 2024

Commit

e44c00b

1 Parent(s): 0eb72dc

Gemma Model

Browse files

Files changed (2) hide show

app.py +15 -17
visualization_prompt.py +4 -1

app.py CHANGED Viewed

@@ -31,12 +31,16 @@ else:
 print('Loading Model...')
-tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.3")
 quantization_config = BitsAndBytesConfig(
-    load_in_4bit=True)
-model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.3", quantization_config=quantization_config,
                                              device_map="auto", torch_dtype=torch.bfloat16)
 pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, return_full_text=False, max_new_tokens=512)
 llm = HuggingFacePipeline(pipeline=pipe)
@@ -45,17 +49,6 @@ llm = HuggingFacePipeline(pipeline=pipe)
 print('Model Loaded...')
 print(f'Model Device: {model.device}')
-# def generate_output(schema, prompt, generated_sql_query, result_output, result_visulaized):
-#     return {
-#         table_schema: schema,
-#         input_prompt: prompt,
-#         generated_query: generated_sql_query,
-#         result_output: result_output,
-#         result_visulaized: result_visulaized
-#     }
-# Get Databases
 def get_schemas():
     schemas = conn.execute("""
     SELECT DISTINCT schema_name
@@ -148,25 +141,29 @@ Recommend a visualization:
     final_prompt = prompt.format_prompt(question=text_query,
                                         sql_query=sql_query, results=sql_result)
     response = run_llm(final_prompt)
     lines = response.strip().split('\n')
     visualization = lines[0].split(': ')[1]
     reason = lines[1].split(': ')[1]
     return visualization, reason
 def format_data(text_query, sql_query, sql_result, visualization_type):
     instruction = graph_instructions[visualization_type]
     template = ChatPromptTemplate.from_messages([
     ("system", "You are a Data expert who formats data according to the required needs. You are given the question asked by the user, it's sql query, the result of the query and the format you need to format it in."),
-    ("human", "For the given question: {question}\n\nSQL query: {sql_query}\n\Result: {results}\n\nUse the following example to structure the data: {instructions}. If there is None in Result please change it to '0'. Just give the json string. Do not format it or add any label or text."),
     ])
     prompt = template.format_prompt(question=text_query, sql_query=sql_query,
                                     results=sql_result, instructions=instruction)
     formatted_data = run_llm(prompt)
     print(f'Formatted Data {formatted_data}')
     return json.loads(formatted_data.replace('.', '').strip())
@@ -205,6 +202,7 @@ def visualize_result(text_query, visualization_type, sql_query,
         return fig
 def main(table, text_query):
     if table is None:
         return ["", "", "", pd.DataFrame([{"error": "❌ Table is None."}])]

 print('Loading Model...')
+tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-9b-it")
 quantization_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_compute_dtype=torch.bfloat16,
+    bnb_4bit_use_double_quant=True,
+    bnb_4bit_quant_type= "nf4")
+model = AutoModelForCausalLM.from_pretrained("google/gemma-2-9b-it", quantization_config=quantization_config,
                                              device_map="auto", torch_dtype=torch.bfloat16)
 pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, return_full_text=False, max_new_tokens=512)
 llm = HuggingFacePipeline(pipeline=pipe)
 print('Model Loaded...')
 print(f'Model Device: {model.device}')
 def get_schemas():
     schemas = conn.execute("""
     SELECT DISTINCT schema_name
     final_prompt = prompt.format_prompt(question=text_query,
                                         sql_query=sql_query, results=sql_result)
     response = run_llm(final_prompt)
+    response = response.replace('```', '')
     lines = response.strip().split('\n')
+    print(lines)
     visualization = lines[0].split(': ')[1]
     reason = lines[1].split(': ')[1]
     return visualization, reason
 def format_data(text_query, sql_query, sql_result, visualization_type):
     instruction = graph_instructions[visualization_type]
     template = ChatPromptTemplate.from_messages([
     ("system", "You are a Data expert who formats data according to the required needs. You are given the question asked by the user, it's sql query, the result of the query and the format you need to format it in."),
+    ("human", "For the given question: {question}\n\nSQL query: {sql_query}\n\Result: {results}\n\nUse the following example to structure the data: {instructions}. If there is None in Result please change it to '0'. Just give the json string. Do not format it. Do not give backticks."),
     ])
     prompt = template.format_prompt(question=text_query, sql_query=sql_query,
                                     results=sql_result, instructions=instruction)
+    print(prompt)
     formatted_data = run_llm(prompt)
     print(f'Formatted Data {formatted_data}')
     return json.loads(formatted_data.replace('.', '').strip())
         return fig
 def main(table, text_query):
     if table is None:
         return ["", "", "", pd.DataFrame([{"error": "❌ Table is None."}])]

visualization_prompt.py CHANGED Viewed

@@ -4,7 +4,8 @@ barGraphIntstruction = '''
     labels: string[]
     values: {\data: number[], label: string}[]
   }
 // Examples of usage:
 Each label represents a column on the x axis.
 Each array in values represents a different entity.
@@ -20,6 +21,8 @@ Here we are looking at the performance of american and european players for each
   labels: ['series A', 'series B', 'series C'],
   values: [{data:[10, 15, 20], label: 'American'}, {data:[20, 25, 30], label: 'European'}],
 }
 '''
 horizontalBarGraphIntstruction = '''

     labels: string[]
     values: {\data: number[], label: string}[]
   }
+The output must follow this format strictly, even if the input data differs from the examples below.
 // Examples of usage:
 Each label represents a column on the x axis.
 Each array in values represents a different entity.
   labels: ['series A', 'series B', 'series C'],
   values: [{data:[10, 15, 20], label: 'American'}, {data:[20, 25, 30], label: 'European'}],
 }
+The output format must be consistent with this structure, regardless of the specific input data.
 '''
 horizontalBarGraphIntstruction = '''