Founder_Name_Extraction_v3

Sleeping

App Files Files Community

dygoo commited on Jun 6

Commit

6bc66b1

verified ·

1 Parent(s): 68aa122

Update model.py

Browse files

Files changed (1) hide show

model.py +9 -28

model.py CHANGED Viewed

@@ -8,67 +8,51 @@ def search_articles(name: str) -> str:
     """Search for 3 newspaper articles containing the name and keywords using DuckDuckGo"""
     keywords = ['founders', 'partners', 'funders', 'owners']
     search_query = f'"{name}" ({" OR ".join(keywords)}) site:news'
     try:
         with DDGS() as ddgs:
             results = list(ddgs.text(search_query, max_results=3))
         if not results:
             return f"No articles found for {name}"
         articles = []
         for i, result in enumerate(results, 1):
             article = f"**{i}. {result['title']}**\n"
             article += f"Source: {result['href']}\n"
             article += f"{result['body']}\n"
             articles.append(article)
         return "\n\n".join(articles)
     except Exception as e:
         return f"Search failed: {str(e)}"
 def extract_entities(search_results: str) -> str:
-    """Extract entities using Modal Labs Llama API"""
-    modal_endpoint = os.getenv("MODAL_ENDPOINT")
-    modal_token = os.getenv("MODAL_TOKEN")
-    if not modal_endpoint or not modal_token:
-        return "Modal Labs credentials not configured"
-    prompt = f"""Extract all person names and organization names from the following text.
 Format as:
 PERSON: [name]
 ORG: [organization name]
 Text: {search_results}"""
     try:
         response = requests.post(
             modal_endpoint,
-            headers={"Authorization": f"Bearer {modal_token}"},
             json={
                 "prompt": prompt,
                 "max_tokens": 500,
                 "temperature": 0.1
             }
         )
         if response.status_code == 200:
-            return response.json().get("text", "No entities extracted")
         else:
             return f"API Error: {response.status_code}"
     except Exception as e:
         return f"Extraction failed: {str(e)}"
 def find_full_names(search_results: str, entities: str) -> str:
-    """Find full names using Modal Labs Llama API"""
-    modal_endpoint = os.getenv("MODAL_ENDPOINT")
-    modal_token = os.getenv("MODAL_TOKEN")
-    if not modal_endpoint or not modal_token:
-        return "Modal Labs credentials not configured"
     prompt = f"""Based on the search results, find the full names and titles/roles for these entities:
@@ -77,22 +61,19 @@ Entities: {entities}
 Search Results: {search_results}
 Provide full names with their roles/titles where mentioned."""
     try:
         response = requests.post(
             modal_endpoint,
-            headers={"Authorization": f"Bearer {modal_token}"},
             json={
                 "prompt": prompt,
                 "max_tokens": 300,
                 "temperature": 0.1
             }
         )
         if response.status_code == 200:
-            return response.json().get("text", "No full names found")
         else:
             return f"API Error: {response.status_code}"
     except Exception as e:
         return f"Full name extraction failed: {str(e)}"

     """Search for 3 newspaper articles containing the name and keywords using DuckDuckGo"""
     keywords = ['founders', 'partners', 'funders', 'owners']
     search_query = f'"{name}" ({" OR ".join(keywords)}) site:news'
     try:
         with DDGS() as ddgs:
             results = list(ddgs.text(search_query, max_results=3))
         if not results:
             return f"No articles found for {name}"
         articles = []
         for i, result in enumerate(results, 1):
             article = f"**{i}. {result['title']}**\n"
             article += f"Source: {result['href']}\n"
             article += f"{result['body']}\n"
             articles.append(article)
         return "\n\n".join(articles)
     except Exception as e:
         return f"Search failed: {str(e)}"
 def extract_entities(search_results: str) -> str:
+    """Extract entities using Mistral 7B endpoint"""
+    modal_endpoint = "https://msoaresdiego--mistral-llm-endpoint-fastapi-app.modal.run/generate"
+    prompt = f"""Extract all person names and organization names from the following text.
 Format as:
 PERSON: [name]
 ORG: [organization name]
 Text: {search_results}"""
     try:
         response = requests.post(
             modal_endpoint,
             json={
                 "prompt": prompt,
                 "max_tokens": 500,
                 "temperature": 0.1
             }
         )
         if response.status_code == 200:
+            return response.json().get("response", "No entities extracted")
         else:
             return f"API Error: {response.status_code}"
     except Exception as e:
         return f"Extraction failed: {str(e)}"
 def find_full_names(search_results: str, entities: str) -> str:
+    """Find full names using Mistral 7B endpoint"""
+    modal_endpoint = "https://msoaresdiego--mistral-llm-endpoint-fastapi-app.modal.run/generate"
     prompt = f"""Based on the search results, find the full names and titles/roles for these entities:
 Search Results: {search_results}
 Provide full names with their roles/titles where mentioned."""
     try:
         response = requests.post(
             modal_endpoint,
             json={
                 "prompt": prompt,
                 "max_tokens": 300,
                 "temperature": 0.1
             }
         )
         if response.status_code == 200:
+            return response.json().get("response", "No full names found")
         else:
             return f"API Error: {response.status_code}"
     except Exception as e:
         return f"Full name extraction failed: {str(e)}"