Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files- .DS_Store +0 -0
- .gitattributes +2 -0
- .gitignore +2 -1
- app.py +56 -13
- images/logo1.png +3 -0
- images/logo2.png +3 -0
- investigators/src/investigators/config/agents.yaml +2 -0
- investigators/src/investigators/config/tasks.yaml +6 -3
- investigators/src/investigators/crew.py +30 -2
- pyproject.toml +2 -0
- uv.lock +4 -0
.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
.gitattributes
CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
images/logo1.png filter=lfs diff=lfs merge=lfs -text
|
37 |
+
images/logo2.png filter=lfs diff=lfs merge=lfs -text
|
.gitignore
CHANGED
@@ -175,4 +175,5 @@ cython_debug/
|
|
175 |
|
176 |
# outputs
|
177 |
output/*
|
178 |
-
investigators/output/*
|
|
|
|
175 |
|
176 |
# outputs
|
177 |
output/*
|
178 |
+
investigators/output/*
|
179 |
+
search_results_2025*
|
app.py
CHANGED
@@ -15,16 +15,59 @@ def investigate(target_name, affiliations):
|
|
15 |
|
16 |
return crew_output.raw
|
17 |
|
18 |
-
view = gr.Interface(
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
return crew_output.raw
|
17 |
|
18 |
+
# view = gr.Interface(
|
19 |
+
# fn=investigate,
|
20 |
+
# inputs=[
|
21 |
+
# gr.Textbox(label="Target name:"),
|
22 |
+
# gr.Textbox(label="Target Affiliations (comma separated):")],
|
23 |
+
# outputs=[gr.Markdown(label="Risk Assessment Report:")],
|
24 |
+
# flagging_mode="never",
|
25 |
+
# examples=[
|
26 |
+
# ["Raz Nissim", "Ben Gurion University, General Motors"],
|
27 |
+
# ],
|
28 |
+
# title="OSINT Investigator",
|
29 |
+
# description="Enter the name of your target and their affiliations (to make search easier), and get a AML Risk assessment based on their public information.",
|
30 |
+
# layout="vertical")
|
31 |
+
#
|
32 |
+
# view.launch(inbrowser=True)
|
33 |
+
|
34 |
+
# Clear button functionality
|
35 |
+
def clear_inputs():
|
36 |
+
return "", ""
|
37 |
+
|
38 |
+
with gr.Blocks() as view:
|
39 |
+
gr.Markdown("# OSINT Investigator")
|
40 |
+
gr.Markdown("#### Enter the name of your target and their affiliations (to make search easier), and get a AML Risk assessment based on their public information.")
|
41 |
+
with gr.Row(equal_height=True):
|
42 |
+
with gr.Column(scale=3):
|
43 |
+
name_input = gr.Textbox(label="Target name:")
|
44 |
+
affiliation_input = gr.Textbox(label="Target Affiliations (comma separated):")
|
45 |
+
with gr.Row():
|
46 |
+
clear_btn = gr.Button("Clear")
|
47 |
+
submit_btn = gr.Button("Investigate")
|
48 |
+
with gr.Column(scale=1):
|
49 |
+
img1 = gr.Image("images/logo1.png", show_download_button=False, show_fullscreen_button=False, show_label=False, show_share_button=False)
|
50 |
+
|
51 |
+
|
52 |
+
with gr.Row():
|
53 |
+
output = gr.Markdown(label="Risk Assessment Report:", container=True, show_copy_button=True)
|
54 |
+
|
55 |
+
submit_btn.click(
|
56 |
+
fn=investigate,
|
57 |
+
inputs=[name_input, affiliation_input],
|
58 |
+
outputs=output
|
59 |
+
)
|
60 |
+
|
61 |
+
|
62 |
+
|
63 |
+
clear_btn.click(
|
64 |
+
fn=clear_inputs,
|
65 |
+
inputs=[],
|
66 |
+
outputs=[name_input, affiliation_input]
|
67 |
+
)
|
68 |
+
gr.Examples(
|
69 |
+
examples=[["Raz Nissim", "Ben Gurion University, General Motors"]],
|
70 |
+
inputs=[name_input, affiliation_input]
|
71 |
+
)
|
72 |
+
|
73 |
+
view.launch(inbrowser=True)
|
images/logo1.png
ADDED
![]() |
Git LFS Details
|
images/logo2.png
ADDED
![]() |
Git LFS Details
|
investigators/src/investigators/config/agents.yaml
CHANGED
@@ -3,10 +3,12 @@ researcher:
|
|
3 |
Research Specialist
|
4 |
goal: >
|
5 |
Gather comprehensive information from public sources about the target: {target}, that has the following affiliations: {affiliations}.
|
|
|
6 |
backstory: >
|
7 |
You are an expert OSINT researcher with deep experience in
|
8 |
finding and connecting information about people and businesses. You know how to
|
9 |
follow information trails and identify valuable data sources.
|
|
|
10 |
llm: openai/gpt-4o
|
11 |
|
12 |
fincrime_analyst:
|
|
|
3 |
Research Specialist
|
4 |
goal: >
|
5 |
Gather comprehensive information from public sources about the target: {target}, that has the following affiliations: {affiliations}.
|
6 |
+
If no information is found, just say "No information found.".
|
7 |
backstory: >
|
8 |
You are an expert OSINT researcher with deep experience in
|
9 |
finding and connecting information about people and businesses. You know how to
|
10 |
follow information trails and identify valuable data sources.
|
11 |
+
You have the ability to search the web using search tools.
|
12 |
llm: openai/gpt-4o
|
13 |
|
14 |
fincrime_analyst:
|
investigators/src/investigators/config/tasks.yaml
CHANGED
@@ -10,11 +10,13 @@ research_target:
|
|
10 |
3. Public records and news mentions
|
11 |
4. Legal or regulatory issues
|
12 |
|
13 |
-
Run multiple searches until your findings are adequate.
|
14 |
Organize your findings clearly and be sure to track your sources.
|
|
|
15 |
expected_output: >
|
16 |
Comprehensive research findings on {target}
|
17 |
agent: researcher
|
|
|
18 |
|
19 |
analyze_target:
|
20 |
description: >
|
@@ -53,12 +55,12 @@ reporting_task:
|
|
53 |
Your report should include:
|
54 |
|
55 |
1. Executive Summary
|
56 |
-
-
|
57 |
- Summary of key findings and risk assessment
|
58 |
|
59 |
2. Target Profile
|
60 |
- Background information
|
61 |
-
- Known associates and relationships
|
62 |
- Business activities and corporate structure (if applicable)
|
63 |
|
64 |
3. Red Flag Analysis
|
@@ -81,4 +83,5 @@ reporting_task:
|
|
81 |
agent: osint_reporter
|
82 |
context:
|
83 |
- analyze_target
|
|
|
84 |
output_file: output/osint_report_{target}.md
|
|
|
10 |
3. Public records and news mentions
|
11 |
4. Legal or regulatory issues
|
12 |
|
13 |
+
Run multiple searches until your findings are adequate.
|
14 |
Organize your findings clearly and be sure to track your sources.
|
15 |
+
If no information is found, just say "No information found.".
|
16 |
expected_output: >
|
17 |
Comprehensive research findings on {target}
|
18 |
agent: researcher
|
19 |
+
output_file: output/research_{target}
|
20 |
|
21 |
analyze_target:
|
22 |
description: >
|
|
|
55 |
Your report should include:
|
56 |
|
57 |
1. Executive Summary
|
58 |
+
- Overall risk assessment score [between 0.0 (no risk) to 1.0 (high risk)]
|
59 |
- Summary of key findings and risk assessment
|
60 |
|
61 |
2. Target Profile
|
62 |
- Background information
|
63 |
+
- Known associates and relationships (if applicable)
|
64 |
- Business activities and corporate structure (if applicable)
|
65 |
|
66 |
3. Red Flag Analysis
|
|
|
83 |
agent: osint_reporter
|
84 |
context:
|
85 |
- analyze_target
|
86 |
+
- research_target
|
87 |
output_file: output/osint_report_{target}.md
|
investigators/src/investigators/crew.py
CHANGED
@@ -1,8 +1,31 @@
|
|
|
|
1 |
from crewai import Agent, Crew, Process, Task
|
2 |
from crewai.project import CrewBase, agent, crew, task
|
|
|
3 |
from crewai_tools import SerperDevTool
|
|
|
|
|
|
|
4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
@CrewBase
|
8 |
class Investigators():
|
@@ -16,9 +39,9 @@ class Investigators():
|
|
16 |
return Agent(
|
17 |
config=self.agents_config['researcher'],
|
18 |
verbose=True,
|
19 |
-
tools=[SerperDevTool()],
|
20 |
retry_on_fail=True, # Enable retry
|
21 |
-
max_retries=3 # Set maximum retries
|
22 |
)
|
23 |
|
24 |
@agent
|
@@ -38,10 +61,15 @@ class Investigators():
|
|
38 |
# Tasks
|
39 |
tasks_config = 'config/tasks.yaml'
|
40 |
|
|
|
|
|
41 |
@task
|
42 |
def research_target(self) -> Task:
|
|
|
|
|
43 |
return Task(
|
44 |
config=self.tasks_config['research_target'],
|
|
|
45 |
)
|
46 |
|
47 |
@task
|
|
|
1 |
+
from typing import Tuple, Any
|
2 |
from crewai import Agent, Crew, Process, Task
|
3 |
from crewai.project import CrewBase, agent, crew, task
|
4 |
+
from crewai import TaskOutput
|
5 |
from crewai_tools import SerperDevTool
|
6 |
+
from crewai_tools import ScrapeWebsiteTool
|
7 |
+
from crewai_tools import BraveSearchTool
|
8 |
+
NO_INFORMATION_FOUND="no information found"
|
9 |
|
10 |
+
def validate_researcher_content(result: TaskOutput) -> Tuple[bool, Any]:
|
11 |
+
"""Validate research content meets requirements."""
|
12 |
+
try:
|
13 |
+
# print("In validate_researcher_content...")
|
14 |
+
# Check word count
|
15 |
+
word_count = len(result.raw.split())
|
16 |
+
# print("Word Count:", word_count)
|
17 |
+
# print(result.raw)
|
18 |
|
19 |
+
if word_count < 100:
|
20 |
+
return (False, "Not enough content")
|
21 |
+
|
22 |
+
if NO_INFORMATION_FOUND in result.raw.lower():
|
23 |
+
return (False, "Model says that no information was found")
|
24 |
+
|
25 |
+
return (True, result)
|
26 |
+
except Exception as e:
|
27 |
+
print("The exception was:", e)
|
28 |
+
return (False, "Unexpected error during validation")
|
29 |
|
30 |
@CrewBase
|
31 |
class Investigators():
|
|
|
39 |
return Agent(
|
40 |
config=self.agents_config['researcher'],
|
41 |
verbose=True,
|
42 |
+
tools=[SerperDevTool(save_file=True)],
|
43 |
retry_on_fail=True, # Enable retry
|
44 |
+
max_retries=3, # Set maximum retries
|
45 |
)
|
46 |
|
47 |
@agent
|
|
|
61 |
# Tasks
|
62 |
tasks_config = 'config/tasks.yaml'
|
63 |
|
64 |
+
|
65 |
+
|
66 |
@task
|
67 |
def research_target(self) -> Task:
|
68 |
+
|
69 |
+
|
70 |
return Task(
|
71 |
config=self.tasks_config['research_target'],
|
72 |
+
guardrail=validate_researcher_content,
|
73 |
)
|
74 |
|
75 |
@task
|
pyproject.toml
CHANGED
@@ -10,6 +10,8 @@ dependencies = [
|
|
10 |
"openai-agents>=0.0.6",
|
11 |
"python-dotenv>=1.0.1",
|
12 |
"crewai[tools]>=0.119.0,<1.0.0",
|
|
|
|
|
13 |
]
|
14 |
|
15 |
[dependency-groups]
|
|
|
10 |
"openai-agents>=0.0.6",
|
11 |
"python-dotenv>=1.0.1",
|
12 |
"crewai[tools]>=0.119.0,<1.0.0",
|
13 |
+
"requests>=2.32.3",
|
14 |
+
"beautifulsoup4>=4.13.4",
|
15 |
]
|
16 |
|
17 |
[dependency-groups]
|
uv.lock
CHANGED
@@ -2308,11 +2308,13 @@ name = "osint-agent"
|
|
2308 |
version = "0.1.0"
|
2309 |
source = { virtual = "." }
|
2310 |
dependencies = [
|
|
|
2311 |
{ name = "crewai", extra = ["tools"] },
|
2312 |
{ name = "gradio" },
|
2313 |
{ name = "openai" },
|
2314 |
{ name = "openai-agents" },
|
2315 |
{ name = "python-dotenv" },
|
|
|
2316 |
]
|
2317 |
|
2318 |
[package.dev-dependencies]
|
@@ -2322,11 +2324,13 @@ dev = [
|
|
2322 |
|
2323 |
[package.metadata]
|
2324 |
requires-dist = [
|
|
|
2325 |
{ name = "crewai", extras = ["tools"], specifier = ">=0.119.0,<1.0.0" },
|
2326 |
{ name = "gradio", specifier = ">=5.22.0" },
|
2327 |
{ name = "openai", specifier = ">=1.68.2" },
|
2328 |
{ name = "openai-agents", specifier = ">=0.0.6" },
|
2329 |
{ name = "python-dotenv", specifier = ">=1.0.1" },
|
|
|
2330 |
]
|
2331 |
|
2332 |
[package.metadata.requires-dev]
|
|
|
2308 |
version = "0.1.0"
|
2309 |
source = { virtual = "." }
|
2310 |
dependencies = [
|
2311 |
+
{ name = "beautifulsoup4" },
|
2312 |
{ name = "crewai", extra = ["tools"] },
|
2313 |
{ name = "gradio" },
|
2314 |
{ name = "openai" },
|
2315 |
{ name = "openai-agents" },
|
2316 |
{ name = "python-dotenv" },
|
2317 |
+
{ name = "requests" },
|
2318 |
]
|
2319 |
|
2320 |
[package.dev-dependencies]
|
|
|
2324 |
|
2325 |
[package.metadata]
|
2326 |
requires-dist = [
|
2327 |
+
{ name = "beautifulsoup4", specifier = ">=4.13.4" },
|
2328 |
{ name = "crewai", extras = ["tools"], specifier = ">=0.119.0,<1.0.0" },
|
2329 |
{ name = "gradio", specifier = ">=5.22.0" },
|
2330 |
{ name = "openai", specifier = ">=1.68.2" },
|
2331 |
{ name = "openai-agents", specifier = ">=0.0.6" },
|
2332 |
{ name = "python-dotenv", specifier = ">=1.0.1" },
|
2333 |
+
{ name = "requests", specifier = ">=2.32.3" },
|
2334 |
]
|
2335 |
|
2336 |
[package.metadata.requires-dev]
|