Founder_Name_Extraction_v3

Sleeping

App Files Files Community

dygoo commited on Jun 9

Commit

41c9f53

verified ·

1 Parent(s): 4821f3c

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -2

app.py CHANGED Viewed

@@ -127,7 +127,14 @@ def search_articles(name: str, max_articles: int = 2) -> str:
 def extract_entities(search_results: str) -> str:
     modal_endpoint = "https://msoaresdiego--mistral-llm-endpoint-fastapi-app.modal.run/generate"
     prompt = f"""Extract all person names and organization names from the following text. Do not extract products and service names. Only individuals and organizations. Bring the full details of the name in the newspaper article. For example, if only ACME is mentioned as company name, bring only ACME. IF ACME Inc is mentioned as company name, then you have to extract ACME Inc. In addition, define the relationship between the entity and the company that is being searched. For example, is ACME Inc an owner of the company being searched? Then write 'owner'. Is ACME Inc. a funder of the company being searched? Then write 'funder'
 Format as:
 PERSON: [name] - [relationship]
@@ -137,8 +144,8 @@ Text: {search_results}"""
     try:
         response = requests.post(
             modal_endpoint,
-            json={"prompt": prompt, "max_tokens": 10000, "temperature": 0.15},
-            timeout=60
         )
         if response.status_code == 200:
             return response.json().get("response", "No entities extracted")
@@ -149,6 +156,7 @@ Text: {search_results}"""
     except Exception as e:
         return f"[ERROR] Extraction failed: {str(e)}"
 # === Gradio interface functions ===
 def search_only(name: str, article_count: int):

 def extract_entities(search_results: str) -> str:
+    """Extract entities using Mistral 7B endpoint"""
     modal_endpoint = "https://msoaresdiego--mistral-llm-endpoint-fastapi-app.modal.run/generate"
+    # Truncate input to avoid excessive model load
+    MAX_CHARS = 8000
+    if len(search_results) > MAX_CHARS:
+        search_results = search_results[:MAX_CHARS]
     prompt = f"""Extract all person names and organization names from the following text. Do not extract products and service names. Only individuals and organizations. Bring the full details of the name in the newspaper article. For example, if only ACME is mentioned as company name, bring only ACME. IF ACME Inc is mentioned as company name, then you have to extract ACME Inc. In addition, define the relationship between the entity and the company that is being searched. For example, is ACME Inc an owner of the company being searched? Then write 'owner'. Is ACME Inc. a funder of the company being searched? Then write 'funder'
 Format as:
 PERSON: [name] - [relationship]
     try:
         response = requests.post(
             modal_endpoint,
+            json={"prompt": prompt, "max_tokens": 1000, "temperature": 0.15},
+            timeout=90  # Increased timeout
         )
         if response.status_code == 200:
             return response.json().get("response", "No entities extracted")
     except Exception as e:
         return f"[ERROR] Extraction failed: {str(e)}"
 # === Gradio interface functions ===
 def search_only(name: str, article_count: int):