Spaces:

raznis
/

OSINT_Investigator

Running

App Files Files Community

raznis commited on 18 days ago

Commit

71aa48c

verified ·

1 Parent(s): f3e1565

Upload folder using huggingface_hub

Browse files

Files changed (11) hide show

.DS_Store +0 -0
.gitattributes +2 -0
.gitignore +2 -1
app.py +56 -13
images/logo1.png +3 -0
images/logo2.png +3 -0
investigators/src/investigators/config/agents.yaml +2 -0
investigators/src/investigators/config/tasks.yaml +6 -3
investigators/src/investigators/crew.py +30 -2
pyproject.toml +2 -0
uv.lock +4 -0

.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+images/logo1.png filter=lfs diff=lfs merge=lfs -text
+images/logo2.png filter=lfs diff=lfs merge=lfs -text

.gitignore CHANGED Viewed

@@ -175,4 +175,5 @@ cython_debug/
 # outputs
 output/*
-investigators/output/*

 # outputs
 output/*
+investigators/output/*
+search_results_2025*

app.py CHANGED Viewed

@@ -15,16 +15,59 @@ def investigate(target_name, affiliations):
     return crew_output.raw
-view = gr.Interface(
-    fn=investigate,
-    inputs=[
-        gr.Textbox(label="Target name:"),
-        gr.Textbox(label="Target Affiliations (comma separated):")],
-    outputs=[gr.Markdown(label="Risk Assessment Report:")],
-    flagging_mode="never",
-    examples=[
-        ["Raz Nissim", "Ben Gurion University, General Motors"],
-    ],
-    title="OSINT Investigator",
-    description="Enter the name of your target and their affiliations (to make search easier), and get a AML Risk assessment based on their public information.",)
-view.launch(inbrowser=True)

     return crew_output.raw
+# view = gr.Interface(
+#     fn=investigate,
+#     inputs=[
+#         gr.Textbox(label="Target name:"),
+#         gr.Textbox(label="Target Affiliations (comma separated):")],
+#     outputs=[gr.Markdown(label="Risk Assessment Report:")],
+#     flagging_mode="never",
+#     examples=[
+#         ["Raz Nissim", "Ben Gurion University, General Motors"],
+#     ],
+#     title="OSINT Investigator",
+#     description="Enter the name of your target and their affiliations (to make search easier), and get a AML Risk assessment based on their public information.",
+#     layout="vertical")
+#
+# view.launch(inbrowser=True)
+    # Clear button functionality
+def clear_inputs():
+    return "", ""
+with gr.Blocks() as view:
+    gr.Markdown("# OSINT Investigator")
+    gr.Markdown("#### Enter the name of your target and their affiliations (to make search easier), and get a AML Risk assessment based on their public information.")
+    with gr.Row(equal_height=True):
+        with gr.Column(scale=3):
+            name_input = gr.Textbox(label="Target name:")
+            affiliation_input = gr.Textbox(label="Target Affiliations (comma separated):")
+            with gr.Row():
+                clear_btn = gr.Button("Clear")
+                submit_btn = gr.Button("Investigate")
+        with gr.Column(scale=1):
+            img1 = gr.Image("images/logo1.png", show_download_button=False, show_fullscreen_button=False, show_label=False, show_share_button=False)
+    with gr.Row():
+        output = gr.Markdown(label="Risk Assessment Report:", container=True, show_copy_button=True)
+    submit_btn.click(
+        fn=investigate,
+        inputs=[name_input, affiliation_input],
+        outputs=output
+    )
+    clear_btn.click(
+        fn=clear_inputs,
+        inputs=[],
+        outputs=[name_input, affiliation_input]
+    )
+    gr.Examples(
+        examples=[["Raz Nissim", "Ben Gurion University, General Motors"]],
+        inputs=[name_input, affiliation_input]
+    )
+view.launch(inbrowser=True)

images/logo1.png ADDED Viewed

Git LFS Details

SHA256: 1d3d86208c6b36cf1cc5ad36dbbb76e617bff9a7e6f1a75cfc51d5f57a77944f
Pointer size: 131 Bytes
Size of remote file: 210 kB

images/logo2.png ADDED Viewed

Git LFS Details

SHA256: 1169321c173811e0ef8628a099df70273a8e96c865701dcf27def8001daad9a3
Pointer size: 131 Bytes
Size of remote file: 432 kB

investigators/src/investigators/config/agents.yaml CHANGED Viewed

@@ -3,10 +3,12 @@ researcher:
     Research Specialist
   goal: >
     Gather comprehensive information from public sources about the target: {target}, that has the following affiliations: {affiliations}.
   backstory: >
     You are an expert OSINT researcher with deep experience in
     finding and connecting information about people and businesses. You know how to
     follow information trails and identify valuable data sources.
   llm: openai/gpt-4o
 fincrime_analyst:

     Research Specialist
   goal: >
     Gather comprehensive information from public sources about the target: {target}, that has the following affiliations: {affiliations}.
+    If no information is found, just say "No information found.".
   backstory: >
     You are an expert OSINT researcher with deep experience in
     finding and connecting information about people and businesses. You know how to
     follow information trails and identify valuable data sources.
+    You have the ability to search the web using search tools.
   llm: openai/gpt-4o
 fincrime_analyst:

investigators/src/investigators/config/tasks.yaml CHANGED Viewed

@@ -10,11 +10,13 @@ research_target:
     3. Public records and news mentions
     4. Legal or regulatory issues
-    Run multiple searches until your findings are adequate.
     Organize your findings clearly and be sure to track your sources.
   expected_output: >
     Comprehensive research findings on {target}
   agent: researcher
 analyze_target:
   description: >
@@ -53,12 +55,12 @@ reporting_task:
     Your report should include:
     1. Executive Summary
-       - Brief overview of the target
        - Summary of key findings and risk assessment
     2. Target Profile
        - Background information
-       - Known associates and relationships
        - Business activities and corporate structure (if applicable)
     3. Red Flag Analysis
@@ -81,4 +83,5 @@ reporting_task:
   agent: osint_reporter
   context:
     - analyze_target
   output_file: output/osint_report_{target}.md

     3. Public records and news mentions
     4. Legal or regulatory issues
+    Run multiple searches until your findings are adequate.
     Organize your findings clearly and be sure to track your sources.
+    If no information is found, just say "No information found.".
   expected_output: >
     Comprehensive research findings on {target}
   agent: researcher
+  output_file: output/research_{target}
 analyze_target:
   description: >
     Your report should include:
     1. Executive Summary
+       - Overall risk assessment score [between 0.0 (no risk) to 1.0 (high risk)]
        - Summary of key findings and risk assessment
     2. Target Profile
        - Background information
+       - Known associates and relationships (if applicable)
        - Business activities and corporate structure (if applicable)
     3. Red Flag Analysis
   agent: osint_reporter
   context:
     - analyze_target
+    - research_target
   output_file: output/osint_report_{target}.md

investigators/src/investigators/crew.py CHANGED Viewed

@@ -1,8 +1,31 @@
 from crewai import Agent, Crew, Process, Task
 from crewai.project import CrewBase, agent, crew, task
 from crewai_tools import SerperDevTool
 @CrewBase
 class Investigators():
@@ -16,9 +39,9 @@ class Investigators():
         return Agent(
             config=self.agents_config['researcher'],
             verbose=True,
-            tools=[SerperDevTool()],
             retry_on_fail=True,  # Enable retry
-            max_retries=3  # Set maximum retries
         )
     @agent
@@ -38,10 +61,15 @@ class Investigators():
     # Tasks
     tasks_config = 'config/tasks.yaml'
     @task
     def research_target(self) -> Task:
         return Task(
             config=self.tasks_config['research_target'],
         )
     @task

+from typing import Tuple, Any
 from crewai import Agent, Crew, Process, Task
 from crewai.project import CrewBase, agent, crew, task
+from crewai import TaskOutput
 from crewai_tools import SerperDevTool
+from crewai_tools import ScrapeWebsiteTool
+from crewai_tools import BraveSearchTool
+NO_INFORMATION_FOUND="no information found"
+def validate_researcher_content(result: TaskOutput) -> Tuple[bool, Any]:
+    """Validate research content meets requirements."""
+    try:
+        # print("In validate_researcher_content...")
+        # Check word count
+        word_count = len(result.raw.split())
+        # print("Word Count:", word_count)
+        # print(result.raw)
+        if word_count < 100:
+            return (False, "Not enough content")
+        if NO_INFORMATION_FOUND in result.raw.lower():
+            return (False, "Model says that no information was found")
+        return (True, result)
+    except Exception as e:
+        print("The exception was:", e)
+        return (False, "Unexpected error during validation")
 @CrewBase
 class Investigators():
         return Agent(
             config=self.agents_config['researcher'],
             verbose=True,
+            tools=[SerperDevTool(save_file=True)],
             retry_on_fail=True,  # Enable retry
+            max_retries=3,  # Set maximum retries
         )
     @agent
     # Tasks
     tasks_config = 'config/tasks.yaml'
     @task
     def research_target(self) -> Task:
         return Task(
             config=self.tasks_config['research_target'],
+            guardrail=validate_researcher_content,
         )
     @task

pyproject.toml CHANGED Viewed

@@ -10,6 +10,8 @@ dependencies = [
     "openai-agents>=0.0.6",
     "python-dotenv>=1.0.1",
     "crewai[tools]>=0.119.0,<1.0.0",
 ]
 [dependency-groups]

     "openai-agents>=0.0.6",
     "python-dotenv>=1.0.1",
     "crewai[tools]>=0.119.0,<1.0.0",
+    "requests>=2.32.3",
+    "beautifulsoup4>=4.13.4",
 ]
 [dependency-groups]

uv.lock CHANGED Viewed

@@ -2308,11 +2308,13 @@ name = "osint-agent"
 version = "0.1.0"
 source = { virtual = "." }
 dependencies = [
     { name = "crewai", extra = ["tools"] },
     { name = "gradio" },
     { name = "openai" },
     { name = "openai-agents" },
     { name = "python-dotenv" },
 ]
 [package.dev-dependencies]
@@ -2322,11 +2324,13 @@ dev = [
 [package.metadata]
 requires-dist = [
     { name = "crewai", extras = ["tools"], specifier = ">=0.119.0,<1.0.0" },
     { name = "gradio", specifier = ">=5.22.0" },
     { name = "openai", specifier = ">=1.68.2" },
     { name = "openai-agents", specifier = ">=0.0.6" },
     { name = "python-dotenv", specifier = ">=1.0.1" },
 ]
 [package.metadata.requires-dev]

 version = "0.1.0"
 source = { virtual = "." }
 dependencies = [
+    { name = "beautifulsoup4" },
     { name = "crewai", extra = ["tools"] },
     { name = "gradio" },
     { name = "openai" },
     { name = "openai-agents" },
     { name = "python-dotenv" },
+    { name = "requests" },
 ]
 [package.dev-dependencies]
 [package.metadata]
 requires-dist = [
+    { name = "beautifulsoup4", specifier = ">=4.13.4" },
     { name = "crewai", extras = ["tools"], specifier = ">=0.119.0,<1.0.0" },
     { name = "gradio", specifier = ">=5.22.0" },
     { name = "openai", specifier = ">=1.68.2" },
     { name = "openai-agents", specifier = ">=0.0.6" },
     { name = "python-dotenv", specifier = ">=1.0.1" },
+    { name = "requests", specifier = ">=2.32.3" },
 ]
 [package.metadata.requires-dev]