Spaces:

nolanzandi
/

virtual-data-analyst

Running

File size: 17,197 Bytes

from utils import message_dict

from haystack.dataclasses import ChatMessage
from haystack.components.generators.chat import OpenAIChatGenerator

chat_generator = OpenAIChatGenerator(model="gpt-4o")
response = None

def example_question_message(data_source, name, titles, schema):

    example_message_dict = {
        'file_upload' : ["You are a helpful and knowledgeable agent who has access to an SQLite database which has a table called 'data_source'.",
                         f"""We have a SQLite database with the following {titles}. 

                        We also have an AI agent with access to the same database that will be performing data analysis.

                        Please return an array of seven strings, each one being a question for our data analysis agent

                        that we can suggest that you believe will be insightful or helpful to a data analyst looking for

                        data insights. Return nothing more than the array of questions because I need that specific data structure

                        to process your response. No other response type or data structure will work."""],

        'sql' : [f"You are a helpful and knowledgeable agent who has access to an MongoDB NoSQL document database called {name}.",
                 f"""We have a PostgreSQL database with the following tables: {titles}. 

                        We also have an AI agent with access to the same database that will be performing data analysis.

                        Please return an array of seven strings, each one being a question for our data analysis agent

                        that we can suggest that you believe will be insightful or helpful to a data analyst looking for

                        data insights. Return nothing more than the array of questions because I need that specific data structure

                        to process your response. No other response type or data structure will work."""],

        'doc_db' : [f"You are a helpful and knowledgeable agent who has access to an MongoDB NoSQL document database called {name}.",
                    f"""We have a MongoDB NoSQL document database with the following collections: {titles}.

                        The schema of these collections is: {schema}. 

                        We also have an AI agent with access to the same database that will be performing data analysis.

                        Please return an array of seven strings, each one being a question for our data analysis agent

                        that we can suggest that you believe will be insightful or helpful to a data analyst looking for

                        data insights. Return nothing more than the array of questions because I need that specific data structure

                        to process your response. No other response type or data structure will work."""],

        'graphql' : [f"You are a helpful and knowledgeable agent who has access to an GraphQL API endpoint called {name}.",
                     f"""We have a GraphQL API endpoint with the following types: {titles}.

                        We also have an AI agent with access to the same GraphQL API endpoint that will be performing data analysis.

                        Please return an array of seven strings, each one being a question for our data analysis agent

                        that we can suggest that you believe will be insightful or helpful to a data analyst looking for

                        data insights. Return nothing more than the array of questions because I need that specific data structure

                        to process your response. No other response type or data structure will work."""]         

    }

    return example_message_dict[data_source]

def example_question_generator(session_hash, data_source, name, titles, schema):
    example_response = None
    example_message_list = example_question_message(data_source, name, titles, schema)
    example_messages = [
        ChatMessage.from_system(
            example_message_list[0]
        )
    ]

    example_messages.append(ChatMessage.from_user(text=example_message_list[1]))

    example_response = chat_generator.run(messages=example_messages)

    response_text = example_response["replies"][0].text
    start = response_text.index("[") + 1
    end = response_text.index("]")
    response_content = response_text[start:end]
    response_list = '[' + response_content + ']'
    print(response_list)

    return response_list

def system_message(data_source, titles, schema=""):
    print("TITLES")
    print(titles)
    system_message_dict = {
        'file_upload' : f"""You are a helpful and knowledgeable agent who has access to an SQLite database which has a table called 'data_source' that contains the following columns: {titles}. 

                    You also have access to a function, called table_generation_func, that can take a query.csv file generated from our sql query and returns an iframe that we should display in our chat window.

                    You also have access to a scatter plot function, called scatter_chart_generation_func, that can take a query.csv file generated from our sql query and uses plotly dictionaries to generate a scatter plot and returns an iframe that we should display in our chat window.

                    You also have access to a line chart function, called line_chart_generation_func, that can take a query.csv file generated from our sql query and uses plotly dictionaries to generate a line chart and returns an iframe that we should display in our chat window.

                    You also have access to a bar graph function, called line_chart_generation_func, that can take a query.csv file generated from our sql query and uses plotly dictionaries to generate a bar graph and returns an iframe that we should display in our chat window.

                    You also have access to a pie chart function, called pie_chart_generation_func, that can take a query.csv file generated from our sql query and uses plotly dictionaries to generate a pie chart and returns an iframe that we should display in our chat window.

                    You also have access to a histogram function, called histogram_generation_func, that can take a query.csv file generated from our sql query and uses plotly dictionaries to generate a histogram and returns an iframe that we should display in our chat window.

                    You also have access to a linear regression function, called regression_func, that can take a query.csv file generated from our sql query and a list of column names for our independent and dependent variables and return a regression data string and a regression chart which is returned as an iframe.

                    Could you please always display the generated charts, tables, and visualizations as part of your output?""",

        'sql' : f"""You are a helpful and knowledgeable agent who has access to an PostgreSQL database which has a series of tables called {titles}. 

                    You also have access to a function, called table_generation_func, that can take a query.csv file generated from our sql query and returns an iframe that we should display in our chat window.

                    You also have access to a scatter plot function, called scatter_chart_generation_func, that can take a query.csv file generated from our sql query and uses plotly dictionaries to generate a scatter plot and returns an iframe that we should display in our chat window.

                    You also have access to a line chart function, called line_chart_generation_func, that can take a query.csv file generated from our sql query and uses plotly dictionaries to generate a line chart and returns an iframe that we should display in our chat window.

                    You also have access to a bar graph function, called line_chart_generation_func, that can take a query.csv file generated from our sql query and uses plotly dictionaries to generate a bar graph and returns an iframe that we should display in our chat window.

                    You also have access to a pie chart function, called pie_chart_generation_func, that can take a query.csv file generated from our sql query and uses plotly dictionaries to generate a pie chart and returns an iframe that we should display in our chat window.

                    You also have access to a histogram function, called histogram_generation_func, that can take a query.csv file generated from our sql query and uses plotly dictionaries to generate a histogram and returns an iframe that we should display in our chat window.

                    You also have access to a linear regression function, called regression_func, that can take a query.csv file generated from our sql query and a list of column names for our independent and dependent variables and return a regression data string and a regression chart which is returned as an iframe.

                    Could you please always display the generated charts, tables, and visualizations as part of your output?""",

        'doc_db' : f"""You are a helpful and knowledgeable agent who has access to a NoSQL MongoDB Document database which has a series of collections called {titles}.

                    The schema of these collections is: {schema}.

                    You also have access to a function, called table_generation_func, that can take a query.csv file generated from our MongoDB query and returns an iframe that we should display in our chat window.

                    You also have access to a scatter plot function, called scatter_chart_generation_func, that can take a query.csv file generated from our MongoDB query and uses plotly dictionaries to generate a scatter plot and returns an iframe that we should display in our chat window.

                    You also have access to a line chart function, called line_chart_generation_func, that can take a query.csv file generated from our MongoDB query and uses plotly dictionaries to generate a line chart and returns an iframe that we should display in our chat window.

                    You also have access to a bar graph function, called line_chart_generation_func, that can take a query.csv file generated from our MongoDB query and uses plotly dictionaries to generate a bar graph and returns an iframe that we should display in our chat window.

                    You also have access to a pie chart function, called pie_chart_generation_func, that can take a query.csv file generated from our MongoDB query and uses plotly dictionaries to generate a pie chart and returns an iframe that we should display in our chat window.

                    You also have access to a histogram function, called histogram_generation_func, that can take a query.csv file generated from our MongoDB query and uses plotly dictionaries to generate a histogram and returns an iframe that we should display in our chat window.

                    You also have access to a linear regression function, called regression_func, that can take a query.csv file generated from our MongoDB query and a list of column names for our independent and dependent variables and return a regression data string and a regression chart which is returned as an iframe.

                    Could you please always display the generated charts, tables, and visualizations as part of your output?""",

        'graphql' : f"""You are a helpful and knowledgeable agent who has access to a GraphQL API which has the following types: {titles}.

                    We have also saved a schema.json file that contains the entire introspection query that we can use to find out more about each type before making a query.

                    You also have access to a function, called table_generation_func, that can take a query.csv file generated from our GraphQL API query and returns an iframe that we should display in our chat window.

                    You also have access to a scatter plot function, called scatter_chart_generation_func, that can take a query.csv file generated from our GraphQL API query and uses plotly dictionaries to generate a scatter plot and returns an iframe that we should display in our chat window.

                    You also have access to a line chart function, called line_chart_generation_func, that can take a query.csv file generated from our GraphQL API query and uses plotly dictionaries to generate a line chart and returns an iframe that we should display in our chat window.

                    You also have access to a bar graph function, called line_chart_generation_func, that can take a query.csv file generated from our GraphQL API query and uses plotly dictionaries to generate a bar graph and returns an iframe that we should display in our chat window.

                    You also have access to a pie chart function, called pie_chart_generation_func, that can take a query.csv file generated from our GraphQL API query and uses plotly dictionaries to generate a pie chart and returns an iframe that we should display in our chat window.

                    You also have access to a histogram function, called histogram_generation_func, that can take a query.csv file generated from our GraphQL API query and uses plotly dictionaries to generate a histogram and returns an iframe that we should display in our chat window.

                    You also have access to a linear regression function, called regression_func, that can take a query.csv file generated from our GraphQL API query and a list of column names for our independent and dependent variables and return a regression data string and a regression chart which is returned as an iframe.

                    Could you please always display the generated charts, tables, and visualizations as part of your output?"""            

    }

    return system_message_dict[data_source]

def chatbot_func(message, history, session_hash, data_source, titles, schema, *args):
    from functions import table_generation_func, regression_func, scatter_chart_generation_func, \
        query_func, graphql_schema_query, graphql_csv_query, \
        line_chart_generation_func,bar_chart_generation_func,pie_chart_generation_func,histogram_generation_func
    import tools.tools as tools

    available_functions = {"query_func":query_func,"graphql_schema_query": graphql_schema_query,"graphql_csv_query": graphql_csv_query,
                           "table_generation_func":table_generation_func,
                           "line_chart_generation_func":line_chart_generation_func,"bar_chart_generation_func":bar_chart_generation_func,
                           "scatter_chart_generation_func":scatter_chart_generation_func, "pie_chart_generation_func":pie_chart_generation_func,
                           "histogram_generation_func":histogram_generation_func,
                           "regression_func":regression_func }
    
    if message_dict[session_hash][data_source] != None:
        message_dict[session_hash][data_source].append(ChatMessage.from_user(message))
    else:
        messages = [
            ChatMessage.from_system(system_message(data_source, titles, schema))
        ]
        messages.append(ChatMessage.from_user(message))
        message_dict[session_hash][data_source] = messages
    
    response = chat_generator.run(messages=message_dict[session_hash][data_source], generation_kwargs={"tools": tools.tools_call(session_hash, data_source, titles)})

    while True:
        # if OpenAI response is a tool call
        if response and response["replies"][0].meta["finish_reason"] == "tool_calls" or response["replies"][0].tool_calls:
            function_calls = response["replies"][0].tool_calls
            for function_call in function_calls:
                message_dict[session_hash][data_source].append(ChatMessage.from_assistant(tool_calls=[function_call]))
                ## Parse function calling information
                function_name = function_call.tool_name
                function_args = function_call.arguments

                ## Find the corresponding function and call it with the given arguments
                function_to_call = available_functions[function_name]
                function_response = function_to_call(**function_args, session_hash=session_hash, session_folder=data_source, args=args)
                print(function_name)
                ## Append function response to the messages list using `ChatMessage.from_tool`
                message_dict[session_hash][data_source].append(ChatMessage.from_tool(tool_result=function_response['reply'], origin=function_call))
                response = chat_generator.run(messages=message_dict[session_hash][data_source], generation_kwargs={"tools": tools.tools_call(session_hash, data_source, titles)})

        # Regular Conversation
        else:
            message_dict[session_hash][data_source].append(response["replies"][0])
            break

    return response["replies"][0].text