surveyia / main.py
datacipen's picture
Update main.py
ac83ae2 verified
raw
history blame
6 kB
import os
import json
import bcrypt
import pandas as pd
import numpy as np
from typing import List
from pathlib import Path
from langchain_huggingface import HuggingFaceEndpoint
from langchain.schema.runnable.config import RunnableConfig
from langchain.schema import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain.agents import AgentExecutor
from langchain.agents.agent_types import AgentType
from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent, create_csv_agent
import chainlit as cl
from chainlit.input_widget import TextInput, Select, Switch, Slider
from deep_translator import GoogleTranslator
@cl.password_auth_callback
def auth_callback(username: str, password: str):
auth = json.loads(os.environ['CHAINLIT_AUTH_LOGIN'])
ident = next(d['ident'] for d in auth if d['ident'] == username)
pwd = next(d['pwd'] for d in auth if d['ident'] == username)
resultLogAdmin = bcrypt.checkpw(username.encode('utf-8'), bcrypt.hashpw(ident.encode('utf-8'), bcrypt.gensalt()))
resultPwdAdmin = bcrypt.checkpw(password.encode('utf-8'), bcrypt.hashpw(pwd.encode('utf-8'), bcrypt.gensalt()))
resultRole = next(d['role'] for d in auth if d['ident'] == username)
if resultLogAdmin and resultPwdAdmin and resultRole == "admindatapcc":
return cl.User(
identifier=ident + " : 🧑‍💼 Admin Datapcc", metadata={"role": "admin", "provider": "credentials"}
)
elif resultLogAdmin and resultPwdAdmin and resultRole == "userdatapcc":
return cl.User(
identifier=ident + " : 🧑‍🎓 User Datapcc", metadata={"role": "user", "provider": "credentials"}
)
def create_agent(filename: str):
"""
Create an agent that can access and use a large language model (LLM).
Args:
filename: The path to the CSV file that contains the data.
Returns:
An agent that can access and use the LLM.
"""
# Create an OpenAI object.
os.environ['HUGGINGFACEHUB_API_TOKEN'] = os.environ['HUGGINGFACEHUB_API_TOKEN']
repo_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
llm = HuggingFaceEndpoint(
repo_id=repo_id, max_new_tokens=5300, temperature=0.1, task="text2text-generation", streaming=True
)
# Read the CSV file into a Pandas DataFrame.
df = pd.read_csv(filename)
# Create a Pandas DataFrame agent.
return create_pandas_dataframe_agent(llm, df, verbose=False, allow_dangerous_code=True)
def query_agent(agent, query):
"""
Query an agent and return the response as a string.
Args:
agent: The agent to query.
query: The query to ask the agent.
Returns:
The response from the agent as a string.
"""
prompt = (
"""
For the following query, if it requires drawing a table, reply as follows:
{"table": {"columns": ["column1", "column2", ...], "data": [[value1, value2, ...], [value1, value2, ...], ...]}}
If the query requires creating a bar chart, reply as follows:
{"bar": {"columns": ["A", "B", "C", ...], "data": [25, 24, 10, ...]}}
If the query requires creating a line chart, reply as follows:
{"line": {"columns": ["A", "B", "C", ...], "data": [25, 24, 10, ...]}}
There can only be two types of chart, "bar" and "line".
If it is just asking a question that requires neither, reply as follows:
{"answer": "answer"}
Example:
{"answer": "The title with the highest rating is 'Gilead'"}
If you do not know the answer, reply as follows:
{"answer": "I do not know."}
Return all output as a string.
All strings in "columns" list and data list, should be in double quotes,
For example: {"columns": ["title", "ratings_count"], "data": [["Gilead", 361], ["Spider's Web", 5164]]}
Lets think step by step.
Below is the query.
Query:
"""
+ query
)
# Run the prompt through the agent.
response = agent.invoke(prompt)
# Convert the response to a string.
return response.__str__()
def decode_response(response: str) -> dict:
"""This function converts the string response from the model to a dictionary object.
Args:
response (str): response from the model
Returns:
dict: dictionary with response data
"""
return json.loads("[" + response + "]")
def write_response(response_dict: dict):
"""
Write a response from an agent to a Streamlit app.
Args:
response_dict: The response from the agent.
Returns:
None.
"""
# Check if the response is an answer.
return response_dict["answer"]
@cl.set_chat_profiles
async def chat_profile():
return [
cl.ChatProfile(name="Traitement des données d'enquête : «Expé CFA : questionnaire auprès des professionnels de la branche de l'agencement»",markdown_description="Vidéo exploratoire autour de l'événement",icon="/public/logo-ofipe.png",),
]
@cl.set_starters
async def set_starters():
return [
cl.Starter(
label="Répartition du nombre de CAA dans les entreprises",
message="Quel est le nombre de chargé.e d'affaires en agencement dans chaque type d'entreprises?",
icon="/public/request-theme.svg",
)
]
@cl.on_message
async def on_message(message: cl.Message):
await cl.Message(f"> SURVEYIA").send()
agent = create_agent("./public/ExpeCFA_LP_CAA.csv")
# Query the agent.
response = query_agent(agent=agent, query=message.content)
# Decode the response.
decoded_response = decode_response(response)
# Write the response to the Streamlit app.
result = write_response(decoded_response)
await cl.Message(author="COPILOT",content=GoogleTranslator(source='auto', target='fr').translate(result)).send()