Spaces:

CyranoB
/

search_agent

Running

App Files Files Community

CyranoB commited on Apr 8, 2024

Commit

8d1e83e

1 Parent(s): a63f98f

Trying multi query retrieve

Browse files

Files changed (3) hide show

messages.py +114 -85
requirements.txt +1 -0
search_agent.py +77 -87

messages.py CHANGED Viewed

@@ -1,92 +1,121 @@
-import json
-from langchain.schema import SystemMessage, HumanMessage
-def get_optimized_search_messages(query):
-    messages = [
-        SystemMessage(
-            content="""
-                You are a serach query optimizer specialist.
-                Provide a better search query for web search engine to answer the given question, end the queries with ’**’
-                Tips:
-                    Identify the key concepts in the question
-                    Remove filler words like "how to", "what is", "I want to"
-                    Removed style such as "in the style of", "engaging", "short", "long"
-                    Remove lenght instruction (example: essay, article, letter, blog, post, blogpost, etc)
-                    Keep it short, around 3-7 words total
-                    Put the most important keywords first
-                    Remove formatting instructions
-                    Remove style instructions (exmaple: in the style of, engaging, short, long)
-                    Remove lenght instruction (example: essay, article, letter, etc)
-                Example:
-                    Question: How do I bake chocolate chip cookies from scratch?
-                    Search query: chocolate chip cookies recipe from scratch**
-                Example:
-                    Question: I would like you to show me a time line of Marie Curie life. Show results as a markdown table
-                    Search query: Marie Curie timeline**
-                Example:
-                    Question: I would like you to write a long article on nato vs russia. Use know geopolical frameworks.
-                    Search query: geopolitics nato russia**
-                Example:
-                    Question: Write a engaging linkedin post about Andrew Ng
-                    Search query: Andrew Ng**
-                Example:
-                    Question: Write a short artible about the solar system in the style of Carl Sagan
-                    Search query: solar system**
-                Example:
-                    Question: Should I use Kubernetes? Answer in the style of Gilfoyde from the TV show Silicon Valley
-                    Search query: Kubernetes decision**
-                Example:
-                    Question: biography of napoleon. include a table with the major events.
-                    Search query: napoleon biography events**
-                """
-        ),
-        HumanMessage(
-            content=f"""
-                Provide a better search query for web search engine to answer the given question, provide only one search query and nothing else, end the queries with ’**’.
-                Question: {query}
-                Search query:
-            """
-        ),
-    ]
-    return messages
-def get_query_with_sources_messages(query, relevant_docs):
-    messages = [
-        SystemMessage(
-            content="""
-    You are an expert research assistant.
-    You are provided with a Context in JSON format and a Question.
-    Use RAG to answer the Question, providing references and links to the Context material you retrieve and use in your answer:
-    When generating your answer, follow these steps:
-    - Retrieve the most relevant context material from your knowledge base to help answer the question
-    - Cite the references you use by including the title, author, publication, and a link to each source
-    - Synthesize the retrieved information into a clear, informative answer to the question
-    - Format your answer in Markdown, using heading levels 2-3 as needed
-    - Include a "References" section at the end with the full citations and link for each source you used
-    Example of Context JSON entry:
-    {
-        "page_content": "This provides access to material related to ...",
-        "metadata": {
-            "title": "Introduction - Marie Curie: Topics in Chronicling America",
-            "link": "https://guides.loc.gov/chronicling-america-marie-curie"
-        }
-    }
     """
-        ),
-        HumanMessage(
-            content= f"""
-        Context information is below.
-        Context:
-        ---------------------
-        {json.dumps(relevant_docs, indent=2, ensure_ascii=False)}
-        ---------------------
-        Question: {query}
-        Answer:
     """
-        ),
-    ]
-    return messages

+"""
+This module provides functions for generating optimized search messages, RAG prompt templates,
+and messages for queries with relevant source documents using the LangChain library.
+"""
+from langchain.schema import SystemMessage, HumanMessage
+from langchain.prompts.chat import (
+    HumanMessagePromptTemplate,
+    SystemMessagePromptTemplate,
+    ChatPromptTemplate
+)
+from langchain.prompts.prompt import PromptTemplate
+def get_optimized_search_messages(query):
+    """
+    Generate optimized search messages for a given query.
+    Args:
+        query (str): The user's query.
+    Returns:
+        list: A list containing the system message and human message for optimized search.
+    """
+    system_message = SystemMessage(
+        content="""
+            I want you to act as a prompt optimizer for web search. I will provide you with a chat prompt, and your goal is to optimize it into a search string that will yield the most relevant and useful information from a search engine like Google.
+            To optimize the prompt:
+            Identify the key information being requested
+            Arrange the keywords into a concise search string
+            Keep it short, around 1 to 5 words total
+            Put the most important keywords first
+            Some tips and things to be sure to remove:
+            - Remove any conversational or instructional phrases
+            - Removed style such as "in the style of", "engaging", "short", "long"
+            - Remove lenght instruction (example: essay, article, letter, blog, post, blogpost, etc)
+            - Remove style instructions (exmaple: "in the style of", engaging, short, long)
+            - Remove lenght instruction (example: essay, article, letter, etc)
+            Add "**" to the end of the search string to indicate the end of the query
+            Provide your output in this format: optimized search string**
+            Example:
+                Question: How do I bake chocolate chip cookies from scratch?
+                Search query: chocolate chip cookies recipe from scratch**
+            Example:
+                Question: I would like you to show me a timeline of Marie Curie's life. Show results as a markdown table
+                Search query: Marie Curie timeline**
+            Example:
+                Question: I would like you to write a long article on NATO vs Russia. Use known geopolitical frameworks.
+                Search query: geopolitics nato russia**
+            Example:
+                Question: Write an engaging LinkedIn post about Andrew Ng
+                Search query: Andrew Ng**
+            Example:
+                Question: Write a short article about the solar system in the style of Carl Sagan
+                Search query: solar system**
+            Example:
+                Question: Should I use Kubernetes? Answer in the style of Gilfoyle from the TV show Silicon Valley
+                Search query: Kubernetes decision**
+            Example:
+                Question: Biography of Napoleon. Include a table with the major events.
+                Search query: napoleon biography events**
+            Example:
+                Question: Write a short article on the history of the United States. Include a table with the major events.
+                Search query: united states history events**
+            Example:
+                Question: Write a short article about the solar system in the style of donald trump
+                Search query: solar system**
+        """
+    )
+    human_message = HumanMessage(
+        content=f"""
+            Question: {query}
+            Search query:
+        """
+    )
+    return [system_message, human_message]
+def get_rag_prompt_template():
     """
+    Get the prompt template for Retrieval-Augmented Generation (RAG).
+    Returns:
+        ChatPromptTemplate: The prompt template for RAG.
     """
+    system_prompt = SystemMessagePromptTemplate(
+        prompt=PromptTemplate(
+            input_variables=[],
+            template="""
+                You are an expert research assistant.
+                You are provided with a Context in JSON format and a Question.
+                Each JSON entry contains: content, title, link
+                Use RAG to answer the Question, providing references and links to the Context material you retrieve and use in your answer:
+                When generating your answer, follow these steps:
+                - Retrieve the most relevant context material from your knowledge base to help answer the question
+                - Cite the references you use by including the title, author, publication, and a link to each source
+                - Synthesize the retrieved information into a clear, informative answer to the question
+                - Format your answer in Markdown, using heading levels 2-3 as needed
+                - Include a "References" section at the end with the full citations and link for each source you used
+            """
+        )
+    )
+    human_prompt = HumanMessagePromptTemplate(
+        prompt=PromptTemplate(
+            input_variables=["context", "query"],
+            template="""
+                Context:
+                ---------------------
+                {context}
+                ---------------------
+                Question: {query}
+                Answer:
+            """
+        )
+    )
+    return ChatPromptTemplate(
+        input_variables=["context", "query"],
+        messages=[system_prompt, human_prompt],
+    )

requirements.txt CHANGED Viewed

@@ -4,6 +4,7 @@ docopt
 faiss-cpu
 python-dotenv
 langchain
 langchain_community
 langchain_openai
 langchain_groq

 faiss-cpu
 python-dotenv
 langchain
+langchain_core
 langchain_community
 langchain_openai
 langchain_groq

search_agent.py CHANGED Viewed

@@ -32,28 +32,31 @@ from bs4 import BeautifulSoup
 from docopt import docopt
 import dotenv
 from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain.schema import SystemMessage, HumanMessage
 from langchain.callbacks import LangChainTracer
 from langchain_groq import ChatGroq
 from langchain_openai import ChatOpenAI
-from langchain_community.chat_models import ChatOllama
 from langchain_openai import OpenAIEmbeddings
-from langchain_community.vectorstores.faiss import FAISS
 from langchain_community.chat_models.bedrock import BedrockChat
 from langsmith import Client
 import requests
 from rich.console import Console
-from rich.rule import Rule
 from rich.markdown import Markdown
-def get_chat_llm(provider, model, temperature=0.0):
     match provider:
         case 'bedrock':
-            if(model == None):
                 model = "anthropic.claude-3-sonnet-20240229-v1:0"
             chat_llm = BedrockChat(
                 credentials_profile_name=os.getenv('CREDENTIALS_PROFILE_NAME'),
@@ -61,29 +64,28 @@ def get_chat_llm(provider, model, temperature=0.0):
                 model_kwargs={"temperature": temperature },
             )
         case 'openai':
-            if(model == None):
                 model = "gpt-3.5-turbo"
             chat_llm = ChatOpenAI(model_name=model, temperature=temperature)
         case 'groq':
-            if(model == None):
                 model = 'mixtral-8x7b-32768'
             chat_llm = ChatGroq(model_name=model, temperature=temperature)
         case 'ollama':
-            if(model == None):
-                model = 'llam2'
             chat_llm = ChatOllama(model=model, temperature=temperature)
         case _:
             raise ValueError(f"Unknown LLM provider {provider}")
-    console.log(f"Using {model} on {provider} with temperature {temperature}")
     return chat_llm
-def optimize_search_query(query):
-    from messages import get_optimized_search_messages
     messages = get_optimized_search_messages(query)
-    response = chat.invoke(messages, config={"callbacks": callbacks})
     optimized_search_query = response.content
-    return optimized_search_query.strip('"').strip("**")
 def get_sources(query, max_pages=10, domain=None):
@@ -99,10 +101,10 @@ def get_sources(query, max_pages=10, domain=None):
     }
     try:
-        response = requests.get(url, headers=headers)
         if response.status_code != 200:
-            raise Exception(f"HTTP error! status: {response.status_code}")
         json_response = response.json()
@@ -140,8 +142,7 @@ def extract_main_content(html):
             element.extract()
         main_content = ' '.join(soup.body.get_text().split())
         return main_content
-    except Exception as error:
-        #console.log(f"Error extracting main content: {error}")
         return None
 def process_source(source):
@@ -159,68 +160,57 @@ def get_links_contents(sources):
     # Filter out None results
     return [result for result in results if result is not None]
-def process_and_vectorize_content(
-    contents,
-    query,
-    text_chunk_size=1000,
-    text_chunk_overlap=200,
-    number_of_similarity_results=5
-):
-    """
-    Process and vectorize content using Langchain.
-    Args:
-        contents (list): List of dictionaries containing 'title', 'link', and 'html' keys.
-        query (str): Query string for similarity search.
-        text_chunk_size (int): Size of each text chunk.
-        text_chunk_overlap (int): Overlap between text chunks.
-        number_of_similarity_results (int): Number of most similar results to return.
-    Returns:
-        list: List of most similar documents.
-    """
     documents = []
     for content in contents:
         if content['html']:
             try:
-                # Split text into chunks
-                text_splitter = RecursiveCharacterTextSplitter(
-                    chunk_size=text_chunk_size,
-                    chunk_overlap=text_chunk_overlap
-                )
-                texts = text_splitter.split_text(content['html'])
-                # Create metadata for each text chunk
-                metadatas = [{'title': content['title'], 'link': content['link']} for _ in range(len(texts))]
-                # Create vector store
-                embeddings = OpenAIEmbeddings()
-                docsearch = FAISS.from_texts(texts, embedding=embeddings, metadatas=metadatas)
-                # Perform similarity search
-                docs = docsearch.similarity_search(query, k=number_of_similarity_results)
-                doc_dicts = [{'page_content': doc.page_content, 'metadata': doc.metadata} for doc in docs]
-                documents.extend(doc_dicts)
             except Exception as e:
                 console.log(f"[gray]Error processing content for {content['link']}: {e}")
-    return documents
-def answer_query_with_sources(query, relevant_docs):
-    from messages import get_query_with_sources_messages
-    messages = get_query_with_sources_messages(query, relevant_docs)
-    response = chat.invoke(messages, config={"callbacks": callbacks})
-    return response
 console = Console()
 dotenv.load_dotenv()
 callbacks = []
-if(os.getenv("LANGCHAIN_API_KEY")):
     callbacks.append(
         LangChainTracer(
             project_name="search agent",
@@ -230,44 +220,44 @@ if(os.getenv("LANGCHAIN_API_KEY")):
         )
     )
-if __name__ == '__main__':
     arguments = docopt(__doc__, version='Search Agent 0.1')
     provider = arguments["--provider"]
     model = arguments["--model"]
     temperature = float(arguments["--temperature"])
-    domain=arguments["--domain"]
     max_pages=arguments["--max_pages"]
     output=arguments["--output"]
     query = arguments["SEARCH_QUERY"]
     chat = get_chat_llm(provider, model, temperature)
     with console.status(f"[bold green]Optimizing query for search: {query}"):
-        optimize_search_query = optimize_search_query(query)
-    console.log(f"Optimized search query: [bold blue]{optimize_search_query}")
-    with console.status(f"[bold green]Searching sources using the optimized query: {optimize_search_query}"):
         sources = get_sources(optimize_search_query, max_pages=max_pages, domain=domain)
     console.log(f"Found {len(sources)} sources {'on ' + domain if domain else ''}")
-    with console.status(f"[bold green]Fetching content for {len(sources)} sources", spinner="growVertical"):
         contents = get_links_contents(sources)
     console.log(f"Managed to extract content from {len(contents)} sources")
-    with console.status(
-            f"[bold green]Processing {len(contents)} contents and finding relevant extracts",
-            spinner="dots8Bit"
-        ):
-        relevant_docs = process_and_vectorize_content(contents, query)
-    console.log(f"Filtered {len(relevant_docs)} relevant content extracts")
-    with console.status(f"[bold green]Querying LLM with {len(relevant_docs)} relevant extracts", spinner='dots8Bit'):
-        respomse = answer_query_with_sources(query, relevant_docs)
     console.rule(f"[bold green]Response from {provider}")
     if output == "text":
-        console.print(respomse.content)
     else:
-        console.print(Markdown(respomse.content))
     console.rule("[bold green]")

 from docopt import docopt
 import dotenv
+from langchain_core.documents.base import Document
 from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.retrievers.multi_query import MultiQueryRetriever
 from langchain.callbacks import LangChainTracer
 from langchain_groq import ChatGroq
 from langchain_openai import ChatOpenAI
 from langchain_openai import OpenAIEmbeddings
 from langchain_community.chat_models.bedrock import BedrockChat
+from langchain_community.chat_models.ollama import ChatOllama
+from langchain_community.vectorstores.faiss import FAISS
 from langsmith import Client
 import requests
 from rich.console import Console
 from rich.markdown import Markdown
+from messages import get_rag_prompt_template, get_optimized_search_messages
+def get_chat_llm(provider, model=None, temperature=0.0):
     match provider:
         case 'bedrock':
+            if model is None:
                 model = "anthropic.claude-3-sonnet-20240229-v1:0"
             chat_llm = BedrockChat(
                 credentials_profile_name=os.getenv('CREDENTIALS_PROFILE_NAME'),
                 model_kwargs={"temperature": temperature },
             )
         case 'openai':
+            if model is None:
                 model = "gpt-3.5-turbo"
             chat_llm = ChatOpenAI(model_name=model, temperature=temperature)
         case 'groq':
+            if model is None:
                 model = 'mixtral-8x7b-32768'
             chat_llm = ChatGroq(model_name=model, temperature=temperature)
         case 'ollama':
+            if model is None:
+                model = 'llama2'
             chat_llm = ChatOllama(model=model, temperature=temperature)
         case _:
             raise ValueError(f"Unknown LLM provider {provider}")
+    console.log(f"Using {model} on {provider} with temperature {temperature}")
     return chat_llm
+def optimize_search_query(chat_llm, query):
     messages = get_optimized_search_messages(query)
+    response = chat_llm.invoke(messages, config={"callbacks": callbacks})
     optimized_search_query = response.content
+    return optimized_search_query.strip('"').split("**", 1)[0]
 def get_sources(query, max_pages=10, domain=None):
     }
     try:
+        response = requests.get(url, headers=headers, timeout=30)
         if response.status_code != 200:
+            return []
         json_response = response.json()
             element.extract()
         main_content = ' '.join(soup.body.get_text().split())
         return main_content
+    except Exception:
         return None
 def process_source(source):
     # Filter out None results
     return [result for result in results if result is not None]
+def vectorize(contents, text_chunk_size=1000,text_chunk_overlap=200,):
     documents = []
     for content in contents:
         if content['html']:
             try:
+                page_content = content['html']
+                metadata = {'title': content['title'], 'source': content['link']}
+                doc = Document(page_content=page_content, metadata=metadata)
+                documents.append(doc)
             except Exception as e:
                 console.log(f"[gray]Error processing content for {content['link']}: {e}")
+    text_splitter = RecursiveCharacterTextSplitter(
+        chunk_size=text_chunk_size,
+        chunk_overlap=text_chunk_overlap
+    )
+    docs = text_splitter.split_documents(documents)
+    embeddings = OpenAIEmbeddings()
+    store = FAISS.from_documents(docs, embeddings)
+    return store
+def format_docs(docs):
+    formatted_docs = []
+    for d in docs:
+        content = d.page_content
+        title = d.metadata['title']
+        source = d.metadata['source']
+        doc = {"content": content, "title": title, "link": source}
+        formatted_docs.append(doc)
+    docs_as_json = json.dumps(formatted_docs, indent=2, ensure_ascii=False)
+    return docs_as_json
+def query_rag(chat_llm, question, search_query, vectorstore):
+    retriever_from_llm = MultiQueryRetriever.from_llm(
+        retriever=vectorstore.as_retriever(), llm=chat_llm,
+    )
+    unique_docs = retriever_from_llm.get_relevant_documents(query=search_query, config={"callbacks": callbacks})
+    context = format_docs(unique_docs)
+    prompt = get_rag_prompt_template().format(query=question, context=context)
+    response = chat_llm.invoke(prompt, config={"callbacks": callbacks})
+    return response.content
 console = Console()
 dotenv.load_dotenv()
 callbacks = []
+if os.getenv("LANGCHAIN_API_KEY"):
     callbacks.append(
         LangChainTracer(
             project_name="search agent",
         )
     )
+if __name__ == '__main__':
     arguments = docopt(__doc__, version='Search Agent 0.1')
     provider = arguments["--provider"]
     model = arguments["--model"]
     temperature = float(arguments["--temperature"])
+    domain=arguments["--domain"]
     max_pages=arguments["--max_pages"]
     output=arguments["--output"]
     query = arguments["SEARCH_QUERY"]
     chat = get_chat_llm(provider, model, temperature)
     with console.status(f"[bold green]Optimizing query for search: {query}"):
+        optimize_search_query = optimize_search_query(chat, query)
+    console.log(f"Optimized search query: [bold blue]{optimize_search_query}")
+    with console.status(
+            f"[bold green]Searching sources using the optimized query: {optimize_search_query}"
+        ):
         sources = get_sources(optimize_search_query, max_pages=max_pages, domain=domain)
     console.log(f"Found {len(sources)} sources {'on ' + domain if domain else ''}")
+    with console.status(
+        f"[bold green]Fetching content for {len(sources)} sources", spinner="growVertical"
+    ):
         contents = get_links_contents(sources)
     console.log(f"Managed to extract content from {len(contents)} sources")
+    with console.status(f"[bold green]Embeddubg {len(sources)} sources", spinner="growVertical"):
+        vector_store = vectorize(contents)
+    with console.status("[bold green]Querying LLM relevant context", spinner='dots8Bit'):
+        respomse = query_rag(chat, query, optimize_search_query, vector_store)
     console.rule(f"[bold green]Response from {provider}")
     if output == "text":
+        console.print(respomse)
     else:
+        console.print(Markdown(respomse))
     console.rule("[bold green]")