Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files- .DS_Store +0 -0
- .gitattributes +2 -0
- .gitignore +2 -1
- app.py +56 -13
- images/logo1.png +3 -0
- images/logo2.png +3 -0
- investigators/src/investigators/config/agents.yaml +2 -0
- investigators/src/investigators/config/tasks.yaml +6 -3
- investigators/src/investigators/crew.py +30 -2
- pyproject.toml +2 -0
- uv.lock +4 -0
.DS_Store
ADDED
|
Binary file (6.15 kB). View file
|
|
|
.gitattributes
CHANGED
|
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
images/logo1.png filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
images/logo2.png filter=lfs diff=lfs merge=lfs -text
|
.gitignore
CHANGED
|
@@ -175,4 +175,5 @@ cython_debug/
|
|
| 175 |
|
| 176 |
# outputs
|
| 177 |
output/*
|
| 178 |
-
investigators/output/*
|
|
|
|
|
|
| 175 |
|
| 176 |
# outputs
|
| 177 |
output/*
|
| 178 |
+
investigators/output/*
|
| 179 |
+
search_results_2025*
|
app.py
CHANGED
|
@@ -15,16 +15,59 @@ def investigate(target_name, affiliations):
|
|
| 15 |
|
| 16 |
return crew_output.raw
|
| 17 |
|
| 18 |
-
view = gr.Interface(
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
return crew_output.raw
|
| 17 |
|
| 18 |
+
# view = gr.Interface(
|
| 19 |
+
# fn=investigate,
|
| 20 |
+
# inputs=[
|
| 21 |
+
# gr.Textbox(label="Target name:"),
|
| 22 |
+
# gr.Textbox(label="Target Affiliations (comma separated):")],
|
| 23 |
+
# outputs=[gr.Markdown(label="Risk Assessment Report:")],
|
| 24 |
+
# flagging_mode="never",
|
| 25 |
+
# examples=[
|
| 26 |
+
# ["Raz Nissim", "Ben Gurion University, General Motors"],
|
| 27 |
+
# ],
|
| 28 |
+
# title="OSINT Investigator",
|
| 29 |
+
# description="Enter the name of your target and their affiliations (to make search easier), and get a AML Risk assessment based on their public information.",
|
| 30 |
+
# layout="vertical")
|
| 31 |
+
#
|
| 32 |
+
# view.launch(inbrowser=True)
|
| 33 |
+
|
| 34 |
+
# Clear button functionality
|
| 35 |
+
def clear_inputs():
|
| 36 |
+
return "", ""
|
| 37 |
+
|
| 38 |
+
with gr.Blocks() as view:
|
| 39 |
+
gr.Markdown("# OSINT Investigator")
|
| 40 |
+
gr.Markdown("#### Enter the name of your target and their affiliations (to make search easier), and get a AML Risk assessment based on their public information.")
|
| 41 |
+
with gr.Row(equal_height=True):
|
| 42 |
+
with gr.Column(scale=3):
|
| 43 |
+
name_input = gr.Textbox(label="Target name:")
|
| 44 |
+
affiliation_input = gr.Textbox(label="Target Affiliations (comma separated):")
|
| 45 |
+
with gr.Row():
|
| 46 |
+
clear_btn = gr.Button("Clear")
|
| 47 |
+
submit_btn = gr.Button("Investigate")
|
| 48 |
+
with gr.Column(scale=1):
|
| 49 |
+
img1 = gr.Image("images/logo1.png", show_download_button=False, show_fullscreen_button=False, show_label=False, show_share_button=False)
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
with gr.Row():
|
| 53 |
+
output = gr.Markdown(label="Risk Assessment Report:", container=True, show_copy_button=True)
|
| 54 |
+
|
| 55 |
+
submit_btn.click(
|
| 56 |
+
fn=investigate,
|
| 57 |
+
inputs=[name_input, affiliation_input],
|
| 58 |
+
outputs=output
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
clear_btn.click(
|
| 64 |
+
fn=clear_inputs,
|
| 65 |
+
inputs=[],
|
| 66 |
+
outputs=[name_input, affiliation_input]
|
| 67 |
+
)
|
| 68 |
+
gr.Examples(
|
| 69 |
+
examples=[["Raz Nissim", "Ben Gurion University, General Motors"]],
|
| 70 |
+
inputs=[name_input, affiliation_input]
|
| 71 |
+
)
|
| 72 |
+
|
| 73 |
+
view.launch(inbrowser=True)
|
images/logo1.png
ADDED
|
Git LFS Details
|
images/logo2.png
ADDED
|
Git LFS Details
|
investigators/src/investigators/config/agents.yaml
CHANGED
|
@@ -3,10 +3,12 @@ researcher:
|
|
| 3 |
Research Specialist
|
| 4 |
goal: >
|
| 5 |
Gather comprehensive information from public sources about the target: {target}, that has the following affiliations: {affiliations}.
|
|
|
|
| 6 |
backstory: >
|
| 7 |
You are an expert OSINT researcher with deep experience in
|
| 8 |
finding and connecting information about people and businesses. You know how to
|
| 9 |
follow information trails and identify valuable data sources.
|
|
|
|
| 10 |
llm: openai/gpt-4o
|
| 11 |
|
| 12 |
fincrime_analyst:
|
|
|
|
| 3 |
Research Specialist
|
| 4 |
goal: >
|
| 5 |
Gather comprehensive information from public sources about the target: {target}, that has the following affiliations: {affiliations}.
|
| 6 |
+
If no information is found, just say "No information found.".
|
| 7 |
backstory: >
|
| 8 |
You are an expert OSINT researcher with deep experience in
|
| 9 |
finding and connecting information about people and businesses. You know how to
|
| 10 |
follow information trails and identify valuable data sources.
|
| 11 |
+
You have the ability to search the web using search tools.
|
| 12 |
llm: openai/gpt-4o
|
| 13 |
|
| 14 |
fincrime_analyst:
|
investigators/src/investigators/config/tasks.yaml
CHANGED
|
@@ -10,11 +10,13 @@ research_target:
|
|
| 10 |
3. Public records and news mentions
|
| 11 |
4. Legal or regulatory issues
|
| 12 |
|
| 13 |
-
Run multiple searches until your findings are adequate.
|
| 14 |
Organize your findings clearly and be sure to track your sources.
|
|
|
|
| 15 |
expected_output: >
|
| 16 |
Comprehensive research findings on {target}
|
| 17 |
agent: researcher
|
|
|
|
| 18 |
|
| 19 |
analyze_target:
|
| 20 |
description: >
|
|
@@ -53,12 +55,12 @@ reporting_task:
|
|
| 53 |
Your report should include:
|
| 54 |
|
| 55 |
1. Executive Summary
|
| 56 |
-
-
|
| 57 |
- Summary of key findings and risk assessment
|
| 58 |
|
| 59 |
2. Target Profile
|
| 60 |
- Background information
|
| 61 |
-
- Known associates and relationships
|
| 62 |
- Business activities and corporate structure (if applicable)
|
| 63 |
|
| 64 |
3. Red Flag Analysis
|
|
@@ -81,4 +83,5 @@ reporting_task:
|
|
| 81 |
agent: osint_reporter
|
| 82 |
context:
|
| 83 |
- analyze_target
|
|
|
|
| 84 |
output_file: output/osint_report_{target}.md
|
|
|
|
| 10 |
3. Public records and news mentions
|
| 11 |
4. Legal or regulatory issues
|
| 12 |
|
| 13 |
+
Run multiple searches until your findings are adequate.
|
| 14 |
Organize your findings clearly and be sure to track your sources.
|
| 15 |
+
If no information is found, just say "No information found.".
|
| 16 |
expected_output: >
|
| 17 |
Comprehensive research findings on {target}
|
| 18 |
agent: researcher
|
| 19 |
+
output_file: output/research_{target}
|
| 20 |
|
| 21 |
analyze_target:
|
| 22 |
description: >
|
|
|
|
| 55 |
Your report should include:
|
| 56 |
|
| 57 |
1. Executive Summary
|
| 58 |
+
- Overall risk assessment score [between 0.0 (no risk) to 1.0 (high risk)]
|
| 59 |
- Summary of key findings and risk assessment
|
| 60 |
|
| 61 |
2. Target Profile
|
| 62 |
- Background information
|
| 63 |
+
- Known associates and relationships (if applicable)
|
| 64 |
- Business activities and corporate structure (if applicable)
|
| 65 |
|
| 66 |
3. Red Flag Analysis
|
|
|
|
| 83 |
agent: osint_reporter
|
| 84 |
context:
|
| 85 |
- analyze_target
|
| 86 |
+
- research_target
|
| 87 |
output_file: output/osint_report_{target}.md
|
investigators/src/investigators/crew.py
CHANGED
|
@@ -1,8 +1,31 @@
|
|
|
|
|
| 1 |
from crewai import Agent, Crew, Process, Task
|
| 2 |
from crewai.project import CrewBase, agent, crew, task
|
|
|
|
| 3 |
from crewai_tools import SerperDevTool
|
|
|
|
|
|
|
|
|
|
| 4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
@CrewBase
|
| 8 |
class Investigators():
|
|
@@ -16,9 +39,9 @@ class Investigators():
|
|
| 16 |
return Agent(
|
| 17 |
config=self.agents_config['researcher'],
|
| 18 |
verbose=True,
|
| 19 |
-
tools=[SerperDevTool()],
|
| 20 |
retry_on_fail=True, # Enable retry
|
| 21 |
-
max_retries=3 # Set maximum retries
|
| 22 |
)
|
| 23 |
|
| 24 |
@agent
|
|
@@ -38,10 +61,15 @@ class Investigators():
|
|
| 38 |
# Tasks
|
| 39 |
tasks_config = 'config/tasks.yaml'
|
| 40 |
|
|
|
|
|
|
|
| 41 |
@task
|
| 42 |
def research_target(self) -> Task:
|
|
|
|
|
|
|
| 43 |
return Task(
|
| 44 |
config=self.tasks_config['research_target'],
|
|
|
|
| 45 |
)
|
| 46 |
|
| 47 |
@task
|
|
|
|
| 1 |
+
from typing import Tuple, Any
|
| 2 |
from crewai import Agent, Crew, Process, Task
|
| 3 |
from crewai.project import CrewBase, agent, crew, task
|
| 4 |
+
from crewai import TaskOutput
|
| 5 |
from crewai_tools import SerperDevTool
|
| 6 |
+
from crewai_tools import ScrapeWebsiteTool
|
| 7 |
+
from crewai_tools import BraveSearchTool
|
| 8 |
+
NO_INFORMATION_FOUND="no information found"
|
| 9 |
|
| 10 |
+
def validate_researcher_content(result: TaskOutput) -> Tuple[bool, Any]:
|
| 11 |
+
"""Validate research content meets requirements."""
|
| 12 |
+
try:
|
| 13 |
+
# print("In validate_researcher_content...")
|
| 14 |
+
# Check word count
|
| 15 |
+
word_count = len(result.raw.split())
|
| 16 |
+
# print("Word Count:", word_count)
|
| 17 |
+
# print(result.raw)
|
| 18 |
|
| 19 |
+
if word_count < 100:
|
| 20 |
+
return (False, "Not enough content")
|
| 21 |
+
|
| 22 |
+
if NO_INFORMATION_FOUND in result.raw.lower():
|
| 23 |
+
return (False, "Model says that no information was found")
|
| 24 |
+
|
| 25 |
+
return (True, result)
|
| 26 |
+
except Exception as e:
|
| 27 |
+
print("The exception was:", e)
|
| 28 |
+
return (False, "Unexpected error during validation")
|
| 29 |
|
| 30 |
@CrewBase
|
| 31 |
class Investigators():
|
|
|
|
| 39 |
return Agent(
|
| 40 |
config=self.agents_config['researcher'],
|
| 41 |
verbose=True,
|
| 42 |
+
tools=[SerperDevTool(save_file=True)],
|
| 43 |
retry_on_fail=True, # Enable retry
|
| 44 |
+
max_retries=3, # Set maximum retries
|
| 45 |
)
|
| 46 |
|
| 47 |
@agent
|
|
|
|
| 61 |
# Tasks
|
| 62 |
tasks_config = 'config/tasks.yaml'
|
| 63 |
|
| 64 |
+
|
| 65 |
+
|
| 66 |
@task
|
| 67 |
def research_target(self) -> Task:
|
| 68 |
+
|
| 69 |
+
|
| 70 |
return Task(
|
| 71 |
config=self.tasks_config['research_target'],
|
| 72 |
+
guardrail=validate_researcher_content,
|
| 73 |
)
|
| 74 |
|
| 75 |
@task
|
pyproject.toml
CHANGED
|
@@ -10,6 +10,8 @@ dependencies = [
|
|
| 10 |
"openai-agents>=0.0.6",
|
| 11 |
"python-dotenv>=1.0.1",
|
| 12 |
"crewai[tools]>=0.119.0,<1.0.0",
|
|
|
|
|
|
|
| 13 |
]
|
| 14 |
|
| 15 |
[dependency-groups]
|
|
|
|
| 10 |
"openai-agents>=0.0.6",
|
| 11 |
"python-dotenv>=1.0.1",
|
| 12 |
"crewai[tools]>=0.119.0,<1.0.0",
|
| 13 |
+
"requests>=2.32.3",
|
| 14 |
+
"beautifulsoup4>=4.13.4",
|
| 15 |
]
|
| 16 |
|
| 17 |
[dependency-groups]
|
uv.lock
CHANGED
|
@@ -2308,11 +2308,13 @@ name = "osint-agent"
|
|
| 2308 |
version = "0.1.0"
|
| 2309 |
source = { virtual = "." }
|
| 2310 |
dependencies = [
|
|
|
|
| 2311 |
{ name = "crewai", extra = ["tools"] },
|
| 2312 |
{ name = "gradio" },
|
| 2313 |
{ name = "openai" },
|
| 2314 |
{ name = "openai-agents" },
|
| 2315 |
{ name = "python-dotenv" },
|
|
|
|
| 2316 |
]
|
| 2317 |
|
| 2318 |
[package.dev-dependencies]
|
|
@@ -2322,11 +2324,13 @@ dev = [
|
|
| 2322 |
|
| 2323 |
[package.metadata]
|
| 2324 |
requires-dist = [
|
|
|
|
| 2325 |
{ name = "crewai", extras = ["tools"], specifier = ">=0.119.0,<1.0.0" },
|
| 2326 |
{ name = "gradio", specifier = ">=5.22.0" },
|
| 2327 |
{ name = "openai", specifier = ">=1.68.2" },
|
| 2328 |
{ name = "openai-agents", specifier = ">=0.0.6" },
|
| 2329 |
{ name = "python-dotenv", specifier = ">=1.0.1" },
|
|
|
|
| 2330 |
]
|
| 2331 |
|
| 2332 |
[package.metadata.requires-dev]
|
|
|
|
| 2308 |
version = "0.1.0"
|
| 2309 |
source = { virtual = "." }
|
| 2310 |
dependencies = [
|
| 2311 |
+
{ name = "beautifulsoup4" },
|
| 2312 |
{ name = "crewai", extra = ["tools"] },
|
| 2313 |
{ name = "gradio" },
|
| 2314 |
{ name = "openai" },
|
| 2315 |
{ name = "openai-agents" },
|
| 2316 |
{ name = "python-dotenv" },
|
| 2317 |
+
{ name = "requests" },
|
| 2318 |
]
|
| 2319 |
|
| 2320 |
[package.dev-dependencies]
|
|
|
|
| 2324 |
|
| 2325 |
[package.metadata]
|
| 2326 |
requires-dist = [
|
| 2327 |
+
{ name = "beautifulsoup4", specifier = ">=4.13.4" },
|
| 2328 |
{ name = "crewai", extras = ["tools"], specifier = ">=0.119.0,<1.0.0" },
|
| 2329 |
{ name = "gradio", specifier = ">=5.22.0" },
|
| 2330 |
{ name = "openai", specifier = ">=1.68.2" },
|
| 2331 |
{ name = "openai-agents", specifier = ">=0.0.6" },
|
| 2332 |
{ name = "python-dotenv", specifier = ">=1.0.1" },
|
| 2333 |
+
{ name = "requests", specifier = ">=2.32.3" },
|
| 2334 |
]
|
| 2335 |
|
| 2336 |
[package.metadata.requires-dev]
|