Spaces:

hanchraizedai
/

semsearch

Running

App Files Files

hanoch.rahimi@gmail commited on Nov 15, 2023

Commit

aac3522

1 Parent(s): e54b3e0

moved organization ot secrets

Browse files

Files changed (4) hide show

app.py +27 -26
openai_utils.py +18 -0
requirements.txt +1 -1
utils.py +15 -20

app.py CHANGED Viewed

@@ -13,12 +13,13 @@ from sentence_transformers import SentenceTransformer
 import streamlit.components.v1 as components
-import utils
 PINECONE_KEY = st.secrets["PINECONE_API_KEY"]  # app.pinecone.io
 OPENAI_API_KEY = st.secrets["OPENAI_API_KEY"]  # app.pinecone.io
 PINE_CONE_ENVIRONMENT = st.secrets["PINE_CONE_ENVIRONMENT"]  # app.pinecone.io
 model_name = 'text-embedding-ada-002'
 embed = OpenAIEmbeddings(
@@ -43,7 +44,6 @@ def init_models():
     #reader = pipeline(tokenizer=model_name, model=model_name, task='question-answering')
     tokenizer = AutoTokenizer.from_pretrained(model_name)
     #vectorstore = Pinecone(st.session_state.index, embed.embed_query, text_field)
-    st.session_state.openai_client = openai.OpenAI(api_key = OPENAI_API_KEY,organization='org-EEpryZYLlh0mZJOGxVko32qP')
     # client.beta.assistants.create(
     #     instructions=utils.assistant_instructions,
     #     model="gpt-4-1106-preview",
@@ -51,6 +51,7 @@ def init_models():
     return retriever, tokenizer#, vectorstore
 retriever, tokenizer = init_models()
 #st.session_state.messages = [{"role":"system", "content":"You are an assistant who helps users find startups to invest in."}]
@@ -115,7 +116,7 @@ def index_query(xq, top_k, regions=[], countries=[], index_namespace="websummari
     return xc
-def run_query(query, prompt, scrape_boost, top_k , regions, countries, is_debug, index_namespace, openai_model):
     xq = retriever.encode([query]).tolist()
     try:
         xc = index_query(xq, top_k, regions, countries)
@@ -129,8 +130,8 @@ def run_query(query, prompt, scrape_boost, top_k , regions, countries, is_debug,
     for match in xc['matches']:
         #answer = reader(question=query, context=match["metadata"]['context'])
         score = match['score']
-        if 'type' in match['metadata'] and match['metadata']['type']!='description-webcontent' and scrape_boost>0:
-            score = score / scrape_boost
         answer = {'score': score, 'metadata': match['metadata']}
         if match['id'].endswith("_description"):
             answer['id'] = match['id'][:-12]
@@ -158,7 +159,8 @@ def run_query(query, prompt, scrape_boost, top_k , regions, countries, is_debug,
     # Create a summarized report focusing on the top3 companies.
     # For every company find its uniqueness over the other companies. Use only information from the descriptions.
     # """
-    if prompt!="":
         descriptions = "\n".join([f"Description of company \"{res['name']}\":  {res['data']['Summary']}.\n" for res in results[:20] if 'Summary' in res['data']])
         ntokens = len(descriptions.split(" "))
@@ -172,11 +174,12 @@ def run_query(query, prompt, scrape_boost, top_k , regions, countries, is_debug,
         prompt = prompt_template.format(descriptions = descriptions, query = query)
         print(f"==============================\nPrompt:\n{prompt}\n==============================\n")
-        new_message = {"role": "user", "content": prompt}
-        m_text = utils.call_openai(prompt, engine=openai_model, temp=0, top_p=1.0)
         m_text
     else:
         new_message = {"role": "user", "content": query}
@@ -238,39 +241,36 @@ def render_history():
     with st.session_state.history_container:
         s = f"""
-            <div style='overflow: hidden;'>
-                <div id="chat_history" style='overflow-y: scroll;height: 100px;'>
         """
         for m in st.session_state.messages:
             #print(f"Printing message\t {m['role']}: {m['content']}")
-            s = s + f"<div>{m['role']}: {m['content']}</div>"
         s = s + f"""</div>
             </div>
             <script>
                 var el = document.getElementById("chat_history");
-                console.log(el.scrollTop, el.scrollHeight);
                 el.scrollTop = el.scrollHeight;
-                console.log(el.scrollTop, el.scrollHeight);
             </script>
         """
-        components.html(s, height=140)
         #st.markdown(s, unsafe_allow_html=True)
 if utils.check_password():
     st.markdown("<script language='javascript'>console.log('scrolling');</script>", unsafe_allow_html=True)
-    if "messages" not in st.session_state:
-        st.session_state.messages = [{"role":"system", "content":"You are an assistant who helps users find startups to invest in."}]
-    st.title("Raized")
-    st.write("""
-    Search for a company in free text. Describe the type of company you are looking for, the problem they solve and the solution they provide. You can also copy in the description of a similar company to kick off the search.
-    """)
     st.markdown("""
     <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" integrity="sha384-Gn5384xqQ1aoWXA+058RXPxPg6fy4IWvTNh0E263XmFcJlSAwiGgFAW/dAiS6JXm" crossorigin="anonymous">
@@ -278,6 +278,7 @@ if utils.check_password():
     with open("data/countries.json", "r") as f:
         countries = json.load(f)['countries']
     header = st.sidebar.markdown("Filters")
     countries_selectbox = st.sidebar.multiselect("Country", countries, default=[])
     all_regions = ('Africa', 'Europe', 'Asia & Pacific', 'North America', 'South/Latin America')
     region_selectbox = st.sidebar.multiselect("Region", all_regions, default=all_regions)
@@ -314,7 +315,6 @@ if utils.check_password():
         ''',
         unsafe_allow_html=True
     )
-    st.session_state.history_container = st.container()
     tab_search, tab_advanced = st.tabs(["Search", "Settings"])
@@ -322,12 +322,13 @@ if utils.check_password():
     with tab_advanced:
         #prompt_title = st.selectbox("Report Type", index = 0, options = utils.get_prompts(), on_change=on_prompt_selected, key="advanced_prompts_select", )
         #prompt_title_editable = st.text_input("Title", key="prompt_title_editable")
         default_prompt = st.text_area("Default Prompt", value = utils.default_prompt, height=400, key="advanced_default_prompt_content")
         clustering_prompt = st.text_area("Clustering Prompt", value = utils.clustering_prompt, height=400, key="advanced_clustering_prompt_content")
         #prompt_new = st.button("New", on_click = _prompt(prompt_title, prompt))
         #prompt_delete = st.button("Del", on_click = utils.del_prompt(prompt_title_editable))
         #prompt_save = st.button("Save", on_click = utils.save_prompt(prompt_title_editable, prompt))
-        scrape_boost = st.number_input('Web to API content ratio', value=1.)
         top_k = st.number_input('# Top Results', value=20)
         is_debug = st.checkbox("Debug output", value = False, key="debug")
         openai_model = st.selectbox(label="Model", options=["gpt-4-1106-preview", "gpt-3.5-turbo-16k-0613", "gpt-3.5-turbo-16k"], index=0, key="openai_model")
@@ -339,9 +340,9 @@ if utils.check_password():
     with tab_search:
         #report_type = st.multiselect("Report Type", utils.get_prompts(), key="search_prompts_multiselect")
         query = st.text_input("Search!", "")
         #cluster = st.checkbox("Cluster the results", value = False, key = "cluster")
-        report_type = st.selectbox(label="Response Type", options=["company_list", "standard", "clustered"], index=0)
         #prompt_new = st.button("New", on_click = _prompt(prompt_title, prompt))
     if query != "":
@@ -351,5 +352,5 @@ if utils.check_password():
             prompt = clustering_prompt
         else:
             prompt = ""
-        run_query(query, prompt, scrape_boost, top_k, region_selectbox, countries_selectbox, is_debug, index_namespace, openai_model)

 import streamlit.components.v1 as components
+import utils
+import openai_utils as oai
 PINECONE_KEY = st.secrets["PINECONE_API_KEY"]  # app.pinecone.io
 OPENAI_API_KEY = st.secrets["OPENAI_API_KEY"]  # app.pinecone.io
 PINE_CONE_ENVIRONMENT = st.secrets["PINE_CONE_ENVIRONMENT"]  # app.pinecone.io
+OPENAI_ORGANIZATION_ID = st.secrets["OPENAI_ORGANIZATION_ID"]
 model_name = 'text-embedding-ada-002'
 embed = OpenAIEmbeddings(
     #reader = pipeline(tokenizer=model_name, model=model_name, task='question-answering')
     tokenizer = AutoTokenizer.from_pretrained(model_name)
     #vectorstore = Pinecone(st.session_state.index, embed.embed_query, text_field)
     # client.beta.assistants.create(
     #     instructions=utils.assistant_instructions,
     #     model="gpt-4-1106-preview",
     return retriever, tokenizer#, vectorstore
+st.session_state.openai_client = openai.OpenAI(api_key = OPENAI_API_KEY,organization=OPENAI_ORGANIZATION_ID)
 retriever, tokenizer = init_models()
 #st.session_state.messages = [{"role":"system", "content":"You are an assistant who helps users find startups to invest in."}]
     return xc
+def run_query(query, prompt, top_k , regions, countries, is_debug, index_namespace, openai_model):
     xq = retriever.encode([query]).tolist()
     try:
         xc = index_query(xq, top_k, regions, countries)
     for match in xc['matches']:
         #answer = reader(question=query, context=match["metadata"]['context'])
         score = match['score']
+        # if 'type' in match['metadata'] and match['metadata']['type']!='description-webcontent' and scrape_boost>0:
+        #     score = score / scrape_boost
         answer = {'score': score, 'metadata': match['metadata']}
         if match['id'].endswith("_description"):
             answer['id'] = match['id'][:-12]
     # Create a summarized report focusing on the top3 companies.
     # For every company find its uniqueness over the other companies. Use only information from the descriptions.
     # """
+    if prompt!="" or st.session_state.new_conversation:
+        st.session_state.new_conversation = False
         descriptions = "\n".join([f"Description of company \"{res['name']}\":  {res['data']['Summary']}.\n" for res in results[:20] if 'Summary' in res['data']])
         ntokens = len(descriptions.split(" "))
         prompt = prompt_template.format(descriptions = descriptions, query = query)
         print(f"==============================\nPrompt:\n{prompt}\n==============================\n")
+        m_text = oai.call_openai(prompt, engine=openai_model, temp=0, top_p=1.0)
         m_text
+        new_message = {"role": "user", "content": query}
     else:
         new_message = {"role": "user", "content": query}
     with st.session_state.history_container:
         s = f"""
+            <div style='overflow: hidden; padding:10px 0px;'>
+                <div id="chat_history" style='overflow-y: scroll;height: 200px;'>
         """
         for m in st.session_state.messages:
             #print(f"Printing message\t {m['role']}: {m['content']}")
+            s = s + f"<div class='chat_message'><b>{m['role']}</b>: {m['content']}</div>"
         s = s + f"""</div>
             </div>
             <script>
                 var el = document.getElementById("chat_history");
                 el.scrollTop = el.scrollHeight;
             </script>
         """
+        components.html(s, height=220)
         #st.markdown(s, unsafe_allow_html=True)
 if utils.check_password():
     st.markdown("<script language='javascript'>console.log('scrolling');</script>", unsafe_allow_html=True)
+    if st.sidebar.button("New Conversation") or "messages" not in st.session_state:
+        st.session_state.new_conversation = True
+        st.session_state.messages = [{"role":"system", "content":"Hello. I'm your startups discovery assistant."}]
+    st.title("Raized- Startups discovery demo")
+    #st.write("Search for a company in free text. Describe the type of company you are looking for, the problem they solve and the solution they provide. You can also copy in the description of a similar company to kick off the search.")
     st.markdown("""
     <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" integrity="sha384-Gn5384xqQ1aoWXA+058RXPxPg6fy4IWvTNh0E263XmFcJlSAwiGgFAW/dAiS6JXm" crossorigin="anonymous">
     with open("data/countries.json", "r") as f:
         countries = json.load(f)['countries']
     header = st.sidebar.markdown("Filters")
+    #new_conversation = st.sidebar.button("New Conversation", key="new_conversation")
     countries_selectbox = st.sidebar.multiselect("Country", countries, default=[])
     all_regions = ('Africa', 'Europe', 'Asia & Pacific', 'North America', 'South/Latin America')
     region_selectbox = st.sidebar.multiselect("Region", all_regions, default=all_regions)
         ''',
         unsafe_allow_html=True
     )
     tab_search, tab_advanced = st.tabs(["Search", "Settings"])
     with tab_advanced:
         #prompt_title = st.selectbox("Report Type", index = 0, options = utils.get_prompts(), on_change=on_prompt_selected, key="advanced_prompts_select", )
         #prompt_title_editable = st.text_input("Title", key="prompt_title_editable")
+        report_type = st.selectbox(label="Response Type", options=["company_list", "standard", "clustered"], index=0)
         default_prompt = st.text_area("Default Prompt", value = utils.default_prompt, height=400, key="advanced_default_prompt_content")
         clustering_prompt = st.text_area("Clustering Prompt", value = utils.clustering_prompt, height=400, key="advanced_clustering_prompt_content")
         #prompt_new = st.button("New", on_click = _prompt(prompt_title, prompt))
         #prompt_delete = st.button("Del", on_click = utils.del_prompt(prompt_title_editable))
         #prompt_save = st.button("Save", on_click = utils.save_prompt(prompt_title_editable, prompt))
+        #scrape_boost = st.number_input('Web to API content ratio', value=1.)
         top_k = st.number_input('# Top Results', value=20)
         is_debug = st.checkbox("Debug output", value = False, key="debug")
         openai_model = st.selectbox(label="Model", options=["gpt-4-1106-preview", "gpt-3.5-turbo-16k-0613", "gpt-3.5-turbo-16k"], index=0, key="openai_model")
     with tab_search:
         #report_type = st.multiselect("Report Type", utils.get_prompts(), key="search_prompts_multiselect")
+        st.session_state.history_container = st.container()
         query = st.text_input("Search!", "")
         #cluster = st.checkbox("Cluster the results", value = False, key = "cluster")
         #prompt_new = st.button("New", on_click = _prompt(prompt_title, prompt))
     if query != "":
             prompt = clustering_prompt
         else:
             prompt = ""
+        run_query(query, prompt, top_k, region_selectbox, countries_selectbox, is_debug, index_namespace, openai_model)

openai_utils.py CHANGED Viewed

@@ -2,6 +2,24 @@ import time
 import streamlit as st
 def send_message(role, content):
     message = st.session_state.openai_client.beta.threads.messages.create(
         thread_id=st.session_state.assistant_thread.id,

 import streamlit as st
+def call_openai(prompt, engine="gpt-3.5-turbo", temp=0, top_p=1.0, max_tokens=4048):
+    try:
+        response = st.session_state.openai_client.chat.completions.create(
+            model=engine,
+            messages=st.session_state.messages,
+            temperature=temp,
+            max_tokens=max_tokens
+        )
+        print(f"====================\nOpen AI response\n {response}\n====================\n")
+        text = response.choices[0].message.content.strip()
+        st.session_state.messages.append({"role": "system", "content": text})
+        return text
+    except Exception as e:
+    #except openai.error.OpenAIError as e:
+        print(f"An error occurred: {str(e)}")
+    return "Failed to generate a response."
 def send_message(role, content):
     message = st.session_state.openai_client.beta.threads.messages.create(
         thread_id=st.session_state.assistant_thread.id,

requirements.txt CHANGED Viewed

@@ -1,5 +1,5 @@
 langchain
-openai
 pinecone-client
 psycopg2-binary==2.8.6
 sentence_transformers

 langchain
+openai==1.2.4
 pinecone-client
 psycopg2-binary==2.8.6
 sentence_transformers

utils.py CHANGED Viewed

@@ -19,23 +19,6 @@ import openai
 ###
-def call_openai(prompt, engine="gpt-3.5-turbo", temp=0, top_p=1.0, max_tokens=4048):
-    try:
-        response = st.session_state.openai_client.chat.completions.create(
-            model=engine,
-            messages=st.session_state.messages,
-            temperature=temp,
-            max_tokens=max_tokens
-        )
-        print(f"Open AI response\n {response}")
-        text = response.choices[0].message.content.strip()
-        st.session_state.messages.append({"role": "system", "content": text})
-        return text
-    except Exception as e:
-    #except openai.error.OpenAIError as e:
-        print(f"An error occurred: {str(e)}")
-    return "Failed to generate a response."
 def check_password():
     """Returns `True` if the user had the correct password."""
@@ -113,14 +96,26 @@ Also name the ranking criteria and suggest how to combine them to best meet the
 # - name customers and technology if they are mentioned
 # - compare them to each other and point out what they do differently or what is their unique selling proposition
 # ----"""
 default_prompt = """
-You are an invesment assistant. Below is a user query followed by a list of company descriptions that match the user query.
-the report should mention the most important companies and how they compare to each other and contain the following sections
 - summarize what those companies they are doing
 - name customers and technology if they are mentioned
 - compare the companies to each other and point out what they do differently or what is their unique selling proposition
 ----"""
 clustering_prompt = """Please create a document with the following headings:
 H2: Recap of your question
 H2: Clusters of relevant companies
@@ -159,7 +154,7 @@ H1: How you could improve your search
 def on_prompt_selected():
     title = st.session_state.advanced_prompts_select
-    new_prompt = utils.get_prompt(title)
     if len(new_prompt)>0 and len(new_prompt[0])>0:
         print(f"Got a prompt for title {title}\n {new_prompt[0]}")
         st.session_state.prompt_title_editable = st.session_state.advanced_prompts_select

 ###
 def check_password():
     """Returns `True` if the user had the correct password."""
 # - name customers and technology if they are mentioned
 # - compare them to each other and point out what they do differently or what is their unique selling proposition
 # ----"""
 default_prompt = """
+You are an assistant and your job is to help the user discover and analyze startups companies. You first need to understand what type of startups the user is looking and then create a report with an analysis of companies relevant to the user's query.
+Use only information from the explicit list of companies provided!
+Below is the user query followed by a list of company descriptions that match the user query. If you don't have enough information in the user query, offer the user ways to improve the query.
+Don't teach the user about investment though.
+The report should mention the most important companies and how they compare to each other and contain the following sections
 - summarize what those companies they are doing
 - name customers and technology if they are mentioned
 - compare the companies to each other and point out what they do differently or what is their unique selling proposition
 ----"""
+query_finetune_prompt = """
+You are an assistant and your job is to help the user discover and analyze startups companies.
+You first need to understand what type of startups the user is looking and then create a report with an analysis of companies relevant to the user's query.
+"""
+summarization_prompt = """
+"""
 clustering_prompt = """Please create a document with the following headings:
 H2: Recap of your question
 H2: Clusters of relevant companies
 def on_prompt_selected():
     title = st.session_state.advanced_prompts_select
+    new_prompt = get_prompt(title)
     if len(new_prompt)>0 and len(new_prompt[0])>0:
         print(f"Got a prompt for title {title}\n {new_prompt[0]}")
         st.session_state.prompt_title_editable = st.session_state.advanced_prompts_select