Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files- .gitignore +178 -0
- README.md +3 -8
- app.py +30 -0
- investigators/.gitignore +3 -0
- investigators/README.md +54 -0
- investigators/knowledge/user_preference.txt +4 -0
- investigators/pyproject.toml +23 -0
- investigators/src/investigators/__init__.py +0 -0
- investigators/src/investigators/config/agents.yaml +34 -0
- investigators/src/investigators/config/tasks.yaml +85 -0
- investigators/src/investigators/crew.py +68 -0
- investigators/src/investigators/main.py +60 -0
- investigators/src/investigators/tools/__init__.py +0 -0
- investigators/src/investigators/tools/custom_tool.py +19 -0
- investigators/uv.lock +0 -0
- pyproject.toml +18 -0
- requirements-doc.md +313 -0
- requirements.txt +9 -0
- test_serper.py +20 -0
- uv.lock +0 -0
.gitignore
ADDED
@@ -0,0 +1,178 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Byte-compiled / optimized / DLL files
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*$py.class
|
5 |
+
|
6 |
+
# C extensions
|
7 |
+
*.so
|
8 |
+
|
9 |
+
# Distribution / packaging
|
10 |
+
.Python
|
11 |
+
build/
|
12 |
+
develop-eggs/
|
13 |
+
dist/
|
14 |
+
downloads/
|
15 |
+
eggs/
|
16 |
+
.eggs/
|
17 |
+
lib/
|
18 |
+
lib64/
|
19 |
+
parts/
|
20 |
+
sdist/
|
21 |
+
var/
|
22 |
+
wheels/
|
23 |
+
share/python-wheels/
|
24 |
+
*.egg-info/
|
25 |
+
.installed.cfg
|
26 |
+
*.egg
|
27 |
+
MANIFEST
|
28 |
+
|
29 |
+
# PyInstaller
|
30 |
+
# Usually these files are written by a python script from a template
|
31 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
32 |
+
*.manifest
|
33 |
+
*.spec
|
34 |
+
|
35 |
+
# Installer logs
|
36 |
+
pip-log.txt
|
37 |
+
pip-delete-this-directory.txt
|
38 |
+
|
39 |
+
# Unit test / coverage reports
|
40 |
+
htmlcov/
|
41 |
+
.tox/
|
42 |
+
.nox/
|
43 |
+
.coverage
|
44 |
+
.coverage.*
|
45 |
+
.cache
|
46 |
+
nosetests.xml
|
47 |
+
coverage.xml
|
48 |
+
*.cover
|
49 |
+
*.py,cover
|
50 |
+
.hypothesis/
|
51 |
+
.pytest_cache/
|
52 |
+
cover/
|
53 |
+
|
54 |
+
# Translations
|
55 |
+
*.mo
|
56 |
+
*.pot
|
57 |
+
|
58 |
+
# Django stuff:
|
59 |
+
*.log
|
60 |
+
local_settings.py
|
61 |
+
db.sqlite3
|
62 |
+
db.sqlite3-journal
|
63 |
+
|
64 |
+
# Flask stuff:
|
65 |
+
instance/
|
66 |
+
.webassets-cache
|
67 |
+
|
68 |
+
# Scrapy stuff:
|
69 |
+
.scrapy
|
70 |
+
|
71 |
+
# Sphinx documentation
|
72 |
+
docs/_build/
|
73 |
+
|
74 |
+
# PyBuilder
|
75 |
+
.pybuilder/
|
76 |
+
target/
|
77 |
+
|
78 |
+
# Jupyter Notebook
|
79 |
+
.ipynb_checkpoints
|
80 |
+
|
81 |
+
# IPython
|
82 |
+
profile_default/
|
83 |
+
ipython_config.py
|
84 |
+
|
85 |
+
# pyenv
|
86 |
+
# For a library or package, you might want to ignore these files since the code is
|
87 |
+
# intended to run in multiple environments; otherwise, check them in:
|
88 |
+
# .python-version
|
89 |
+
|
90 |
+
# pipenv
|
91 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
92 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
93 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
94 |
+
# install all needed dependencies.
|
95 |
+
#Pipfile.lock
|
96 |
+
|
97 |
+
# UV
|
98 |
+
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
99 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
100 |
+
# commonly ignored for libraries.
|
101 |
+
#uv.lock
|
102 |
+
|
103 |
+
# poetry
|
104 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
105 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
106 |
+
# commonly ignored for libraries.
|
107 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
108 |
+
#poetry.lock
|
109 |
+
|
110 |
+
# pdm
|
111 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
112 |
+
#pdm.lock
|
113 |
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
114 |
+
# in version control.
|
115 |
+
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
|
116 |
+
.pdm.toml
|
117 |
+
.pdm-python
|
118 |
+
.pdm-build/
|
119 |
+
|
120 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
121 |
+
__pypackages__/
|
122 |
+
|
123 |
+
# Celery stuff
|
124 |
+
celerybeat-schedule
|
125 |
+
celerybeat.pid
|
126 |
+
|
127 |
+
# SageMath parsed files
|
128 |
+
*.sage.py
|
129 |
+
|
130 |
+
# Environments
|
131 |
+
.env
|
132 |
+
.venv
|
133 |
+
env/
|
134 |
+
venv/
|
135 |
+
ENV/
|
136 |
+
env.bak/
|
137 |
+
venv.bak/
|
138 |
+
|
139 |
+
# Spyder project settings
|
140 |
+
.spyderproject
|
141 |
+
.spyproject
|
142 |
+
|
143 |
+
# Rope project settings
|
144 |
+
.ropeproject
|
145 |
+
|
146 |
+
# mkdocs documentation
|
147 |
+
/site
|
148 |
+
|
149 |
+
# mypy
|
150 |
+
.mypy_cache/
|
151 |
+
.dmypy.json
|
152 |
+
dmypy.json
|
153 |
+
|
154 |
+
# Pyre type checker
|
155 |
+
.pyre/
|
156 |
+
|
157 |
+
# pytype static type analyzer
|
158 |
+
.pytype/
|
159 |
+
|
160 |
+
# Cython debug symbols
|
161 |
+
cython_debug/
|
162 |
+
|
163 |
+
# PyCharm
|
164 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
165 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
166 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
167 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
168 |
+
#.idea/
|
169 |
+
|
170 |
+
# Ruff stuff:
|
171 |
+
.ruff_cache/
|
172 |
+
|
173 |
+
# PyPI configuration file
|
174 |
+
.pypirc
|
175 |
+
|
176 |
+
# outputs
|
177 |
+
output/*
|
178 |
+
investigators/output/*
|
README.md
CHANGED
@@ -1,12 +1,7 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
|
4 |
-
colorFrom: indigo
|
5 |
-
colorTo: red
|
6 |
sdk: gradio
|
7 |
sdk_version: 5.30.0
|
8 |
-
app_file: app.py
|
9 |
-
pinned: false
|
10 |
---
|
11 |
-
|
12 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
+
title: OSINT_Investigator
|
3 |
+
app_file: app.py
|
|
|
|
|
4 |
sdk: gradio
|
5 |
sdk_version: 5.30.0
|
|
|
|
|
6 |
---
|
7 |
+
# OSINT_Agent
|
|
app.py
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from investigators.src.investigators.crew import Investigators
|
3 |
+
|
4 |
+
def investigate(target_name, affiliations):
|
5 |
+
inputs = {
|
6 |
+
'target': target_name,
|
7 |
+
'affiliations': affiliations,
|
8 |
+
# 'current_year': str(datetime.now().year)
|
9 |
+
}
|
10 |
+
|
11 |
+
try:
|
12 |
+
crew_output = Investigators().crew().kickoff(inputs=inputs)
|
13 |
+
except Exception as e:
|
14 |
+
raise Exception(f"An error occurred while running the crew: {e}")
|
15 |
+
|
16 |
+
return crew_output.raw
|
17 |
+
|
18 |
+
view = gr.Interface(
|
19 |
+
fn=investigate,
|
20 |
+
inputs=[
|
21 |
+
gr.Textbox(label="Target name:"),
|
22 |
+
gr.Textbox(label="Target Affiliations (comma separated):")],
|
23 |
+
outputs=[gr.Markdown(label="Risk Assessment Report:")],
|
24 |
+
flagging_mode="never",
|
25 |
+
examples=[
|
26 |
+
["Raz Nissim", "Ben Gurion University, General Motors"],
|
27 |
+
],
|
28 |
+
title="OSINT Investigator",
|
29 |
+
description="Enter the name of your target and their affiliations (to make search easier), and get a AML Risk assessment based on their public information.",)
|
30 |
+
view.launch(inbrowser=True)
|
investigators/.gitignore
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
.env
|
2 |
+
__pycache__/
|
3 |
+
.DS_Store
|
investigators/README.md
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Investigators Crew
|
2 |
+
|
3 |
+
Welcome to the Investigators Crew project, powered by [crewAI](https://crewai.com). This template is designed to help you set up a multi-agent AI system with ease, leveraging the powerful and flexible framework provided by crewAI. Our goal is to enable your agents to collaborate effectively on complex tasks, maximizing their collective intelligence and capabilities.
|
4 |
+
|
5 |
+
## Installation
|
6 |
+
|
7 |
+
Ensure you have Python >=3.10 <3.13 installed on your system. This project uses [UV](https://docs.astral.sh/uv/) for dependency management and package handling, offering a seamless setup and execution experience.
|
8 |
+
|
9 |
+
First, if you haven't already, install uv:
|
10 |
+
|
11 |
+
```bash
|
12 |
+
pip install uv
|
13 |
+
```
|
14 |
+
|
15 |
+
Next, navigate to your project directory and install the dependencies:
|
16 |
+
|
17 |
+
(Optional) Lock the dependencies and install them by using the CLI command:
|
18 |
+
```bash
|
19 |
+
crewai install
|
20 |
+
```
|
21 |
+
### Customizing
|
22 |
+
|
23 |
+
**Add your `OPENAI_API_KEY` into the `.env` file**
|
24 |
+
|
25 |
+
- Modify `src/investigators/config/agents.yaml` to define your agents
|
26 |
+
- Modify `src/investigators/config/tasks.yaml` to define your tasks
|
27 |
+
- Modify `src/investigators/crew.py` to add your own logic, tools and specific args
|
28 |
+
- Modify `src/investigators/main.py` to add custom inputs for your agents and tasks
|
29 |
+
|
30 |
+
## Running the Project
|
31 |
+
|
32 |
+
To kickstart your crew of AI agents and begin task execution, run this from the root folder of your project:
|
33 |
+
|
34 |
+
```bash
|
35 |
+
$ crewai run
|
36 |
+
```
|
37 |
+
|
38 |
+
This command initializes the investigators Crew, assembling the agents and assigning them tasks as defined in your configuration.
|
39 |
+
|
40 |
+
This example, unmodified, will run the create a `report.md` file with the output of a research on LLMs in the root folder.
|
41 |
+
|
42 |
+
## Understanding Your Crew
|
43 |
+
|
44 |
+
The investigators Crew is composed of multiple AI agents, each with unique roles, goals, and tools. These agents collaborate on a series of tasks, defined in `config/tasks.yaml`, leveraging their collective skills to achieve complex objectives. The `config/agents.yaml` file outlines the capabilities and configurations of each agent in your crew.
|
45 |
+
|
46 |
+
## Support
|
47 |
+
|
48 |
+
For support, questions, or feedback regarding the Investigators Crew or crewAI.
|
49 |
+
- Visit our [documentation](https://docs.crewai.com)
|
50 |
+
- Reach out to us through our [GitHub repository](https://github.com/joaomdmoura/crewai)
|
51 |
+
- [Join our Discord](https://discord.com/invite/X4JWnZnxPb)
|
52 |
+
- [Chat with our docs](https://chatg.pt/DWjSBZn)
|
53 |
+
|
54 |
+
Let's create wonders together with the power and simplicity of crewAI.
|
investigators/knowledge/user_preference.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
User name is John Doe.
|
2 |
+
User is an AI Engineer.
|
3 |
+
User is interested in AI Agents.
|
4 |
+
User is based in San Francisco, California.
|
investigators/pyproject.toml
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[project]
|
2 |
+
name = "investigators"
|
3 |
+
version = "0.1.0"
|
4 |
+
description = "investigators using crewAI"
|
5 |
+
authors = [{ name = "Raz Nissim", email = "[email protected]" }]
|
6 |
+
requires-python = ">=3.10,<3.13"
|
7 |
+
dependencies = [
|
8 |
+
"crewai[tools]>=0.119.0,<1.0.0"
|
9 |
+
]
|
10 |
+
|
11 |
+
[project.scripts]
|
12 |
+
investigators = "investigators.main:run"
|
13 |
+
run_crew = "investigators.main:run"
|
14 |
+
train = "investigators.main:train"
|
15 |
+
replay = "investigators.main:replay"
|
16 |
+
test = "investigators.main:test"
|
17 |
+
|
18 |
+
[build-system]
|
19 |
+
requires = ["hatchling"]
|
20 |
+
build-backend = "hatchling.build"
|
21 |
+
|
22 |
+
[tool.crewai]
|
23 |
+
type = "crew"
|
investigators/src/investigators/__init__.py
ADDED
File without changes
|
investigators/src/investigators/config/agents.yaml
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
researcher:
|
2 |
+
role: >
|
3 |
+
Research Specialist
|
4 |
+
goal: >
|
5 |
+
Gather comprehensive information from public sources about the target: {target}, that has the following affiliations: {affiliations}.
|
6 |
+
backstory: >
|
7 |
+
You are an expert OSINT researcher with deep experience in
|
8 |
+
finding and connecting information about people and businesses. You know how to
|
9 |
+
follow information trails and identify valuable data sources.
|
10 |
+
llm: openai/gpt-4o-mini
|
11 |
+
|
12 |
+
fincrime_analyst:
|
13 |
+
role: >
|
14 |
+
Financial Crime Analyst
|
15 |
+
goal: >
|
16 |
+
Analyze gathered information on {target} to identify potential money laundering indicators.
|
17 |
+
Focus on the person and not their affiliations.
|
18 |
+
Ignore any sources that are not relevant, especially if do not contain the full name of the target ({target}).
|
19 |
+
backstory: >
|
20 |
+
You are a seasoned financial crime expert with extensive knowledge
|
21 |
+
of money laundering techniques, red flags, and detection methods. You can spot
|
22 |
+
suspicious patterns that others might miss.
|
23 |
+
llm: openai/gpt-4o-mini
|
24 |
+
|
25 |
+
osint_reporter:
|
26 |
+
role: >
|
27 |
+
Intelligence Reporter
|
28 |
+
goal: >
|
29 |
+
Create clear, structured reports from analysis findings.
|
30 |
+
backstory: >
|
31 |
+
You are a skilled intelligence reporter who can transform complex
|
32 |
+
findings into clear, actionable reports. You know how to present information
|
33 |
+
in a way that highlights the most important elements.
|
34 |
+
llm: openai/gpt-4o-mini
|
investigators/src/investigators/config/tasks.yaml
ADDED
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
research_target:
|
2 |
+
description: >
|
3 |
+
Research {target} ({affiliations}) thoroughly using available search tools:
|
4 |
+
|
5 |
+
Name: {target}
|
6 |
+
|
7 |
+
Your research should focus on:
|
8 |
+
1. Basic background information
|
9 |
+
2. Professional history and business activities
|
10 |
+
3. Corporate structures and relationships
|
11 |
+
4. Public records and news mentions
|
12 |
+
5. Legal or regulatory issues
|
13 |
+
6. Unusual business patterns or inconsistencies
|
14 |
+
|
15 |
+
Organize your findings clearly and be sure to track your sources.
|
16 |
+
expected_output: >
|
17 |
+
Comprehensive research findings on {target}
|
18 |
+
agent: researcher
|
19 |
+
|
20 |
+
analyze_target:
|
21 |
+
description: >
|
22 |
+
Analyze the research findings on {target} to identify potential money laundering red flags. Focus only on {target} and not their affiliations.
|
23 |
+
Ignore any sources that do not contain the full name of the target ({target}) at least once.
|
24 |
+
|
25 |
+
If relevant, analyze for the following reputational Red Flags:
|
26 |
+
- Negative news coverage related to financial crimes
|
27 |
+
- Past investigations or regulatory actions
|
28 |
+
- Association with politically exposed persons (PEPs)
|
29 |
+
|
30 |
+
For each identified red flag, provide:
|
31 |
+
- A clear description
|
32 |
+
- Severity assessment (on a scale of 0.0 to 1.0)
|
33 |
+
- Supporting evidence
|
34 |
+
- Your reasoning
|
35 |
+
- Sources of information
|
36 |
+
|
37 |
+
Structure your output in JSON format.
|
38 |
+
IMPORTANT: Output ONLY the raw JSON any markdown formatting, code block delimiters, or backticks.
|
39 |
+
expected_output: >
|
40 |
+
JSON-structured analysis of {target}'s potential money laundering red flags
|
41 |
+
agent: fincrime_analyst
|
42 |
+
context:
|
43 |
+
- research_target
|
44 |
+
output_file: output/analysis_output_{target}.json
|
45 |
+
|
46 |
+
|
47 |
+
reporting_task:
|
48 |
+
description: >
|
49 |
+
Create a comprehensive intelligence report based on the following analysis
|
50 |
+
of potential money laundering red flags:
|
51 |
+
|
52 |
+
Target Name: {target}
|
53 |
+
|
54 |
+
Your report should include:
|
55 |
+
|
56 |
+
1. Executive Summary
|
57 |
+
- Brief overview of the target
|
58 |
+
- Summary of key findings and risk assessment
|
59 |
+
|
60 |
+
2. Target Profile
|
61 |
+
- Background information
|
62 |
+
- Known associates and relationships
|
63 |
+
- Business activities and corporate structure (if applicable)
|
64 |
+
|
65 |
+
3. Red Flag Analysis
|
66 |
+
- Detailed examination of each identified red flag
|
67 |
+
- Supporting evidence and reasoning
|
68 |
+
- Risk severity assessment
|
69 |
+
|
70 |
+
4. Overall Risk Assessment
|
71 |
+
- Holistic evaluation of money laundering risk
|
72 |
+
- Confidence level in findings
|
73 |
+
- Potential alternative explanations
|
74 |
+
|
75 |
+
5. Recommendations
|
76 |
+
- Suggested next steps for investigation
|
77 |
+
- Additional intelligence gaps to address
|
78 |
+
|
79 |
+
Format the report in a clear, professional structure with sections and subsections.
|
80 |
+
expected_output: >
|
81 |
+
Comprehensive intelligence report on money laundering risks, in Markdown.
|
82 |
+
agent: osint_reporter
|
83 |
+
context:
|
84 |
+
- analyze_target
|
85 |
+
output_file: output/osint_report_{target}.md
|
investigators/src/investigators/crew.py
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from crewai import Agent, Crew, Process, Task
|
2 |
+
from crewai.project import CrewBase, agent, crew, task
|
3 |
+
from crewai_tools import SerperDevTool
|
4 |
+
|
5 |
+
|
6 |
+
|
7 |
+
@CrewBase
|
8 |
+
class Investigators():
|
9 |
+
"""Investigators crew"""
|
10 |
+
|
11 |
+
#Agents
|
12 |
+
agents_config = 'config/agents.yaml'
|
13 |
+
|
14 |
+
@agent
|
15 |
+
def researcher(self) -> Agent:
|
16 |
+
return Agent(
|
17 |
+
config=self.agents_config['researcher'],
|
18 |
+
verbose=True,
|
19 |
+
tools=[SerperDevTool()],
|
20 |
+
retry_on_fail=True, # Enable retry
|
21 |
+
max_retries=3 # Set maximum retries
|
22 |
+
)
|
23 |
+
|
24 |
+
@agent
|
25 |
+
def fincrime_analyst(self) -> Agent:
|
26 |
+
return Agent(
|
27 |
+
config=self.agents_config['fincrime_analyst'],
|
28 |
+
verbose=False,
|
29 |
+
)
|
30 |
+
|
31 |
+
@agent
|
32 |
+
def osint_reporter(self) -> Agent:
|
33 |
+
return Agent(
|
34 |
+
config=self.agents_config['osint_reporter'],
|
35 |
+
verbose=False,
|
36 |
+
)
|
37 |
+
|
38 |
+
# Tasks
|
39 |
+
tasks_config = 'config/tasks.yaml'
|
40 |
+
|
41 |
+
@task
|
42 |
+
def research_target(self) -> Task:
|
43 |
+
return Task(
|
44 |
+
config=self.tasks_config['research_target'],
|
45 |
+
)
|
46 |
+
|
47 |
+
@task
|
48 |
+
def analyze_target(self) -> Task:
|
49 |
+
return Task(
|
50 |
+
config=self.tasks_config['analyze_target'],
|
51 |
+
)
|
52 |
+
|
53 |
+
@task
|
54 |
+
def reporting_task(self) -> Task:
|
55 |
+
return Task(
|
56 |
+
config=self.tasks_config['reporting_task'],
|
57 |
+
)
|
58 |
+
|
59 |
+
@crew
|
60 |
+
def crew(self) -> Crew:
|
61 |
+
"""Creates the Investigators crew"""
|
62 |
+
|
63 |
+
return Crew(
|
64 |
+
agents=self.agents, # Automatically created by the @agent decorator
|
65 |
+
tasks=self.tasks, # Automatically created by the @task decorator
|
66 |
+
process=Process.sequential,
|
67 |
+
verbose=True,
|
68 |
+
)
|
investigators/src/investigators/main.py
ADDED
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
import os
|
3 |
+
import sys
|
4 |
+
import warnings
|
5 |
+
|
6 |
+
from datetime import datetime
|
7 |
+
|
8 |
+
from investigators.crew import Investigators
|
9 |
+
|
10 |
+
warnings.filterwarnings("ignore", category=SyntaxWarning, module="pysbd")
|
11 |
+
|
12 |
+
# Create output directory if it doesn't exist
|
13 |
+
os.makedirs('output', exist_ok=True)
|
14 |
+
|
15 |
+
def run():
|
16 |
+
"""
|
17 |
+
Run the crew.
|
18 |
+
"""
|
19 |
+
# inputs = {
|
20 |
+
# 'target': 'Raz Nissim',
|
21 |
+
# 'affiliations': 'Ben Gurion University, General Motors',
|
22 |
+
# # 'current_year': str(datetime.now().year)
|
23 |
+
# }
|
24 |
+
|
25 |
+
# inputs = {
|
26 |
+
# 'target': 'Avraham Hirschson',
|
27 |
+
# 'affiliations': 'Israeli government, Israeli Knesset, Histadrut',
|
28 |
+
# # 'current_year': str(datetime.now().year)
|
29 |
+
# }
|
30 |
+
|
31 |
+
# inputs = {
|
32 |
+
# 'target': 'Igal Nissim',
|
33 |
+
# 'affiliations': 'Comverse, Verint',
|
34 |
+
# # 'current_year': str(datetime.now().year)
|
35 |
+
# }
|
36 |
+
|
37 |
+
# inputs = {
|
38 |
+
# 'target': 'Yeela Harel',
|
39 |
+
# 'affiliations': 'Israel Ministry of Justice, ThetaRay, Bank Of Israel',
|
40 |
+
# # 'current_year': str(datetime.now().year)
|
41 |
+
# }
|
42 |
+
|
43 |
+
# inputs = {
|
44 |
+
# 'target': 'Yehuda Harel',
|
45 |
+
# 'affiliations': 'Hapoalim Bank',
|
46 |
+
# # 'current_year': str(datetime.now().year)
|
47 |
+
# }
|
48 |
+
|
49 |
+
inputs = {
|
50 |
+
'target': 'Bar Mittelman',
|
51 |
+
'affiliations': 'Bitin, crypto',
|
52 |
+
# 'current_year': str(datetime.now().year)
|
53 |
+
}
|
54 |
+
|
55 |
+
try:
|
56 |
+
Investigators().crew().kickoff(inputs=inputs)
|
57 |
+
except Exception as e:
|
58 |
+
raise Exception(f"An error occurred while running the crew: {e}")
|
59 |
+
|
60 |
+
|
investigators/src/investigators/tools/__init__.py
ADDED
File without changes
|
investigators/src/investigators/tools/custom_tool.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from crewai.tools import BaseTool
|
2 |
+
from typing import Type
|
3 |
+
from pydantic import BaseModel, Field
|
4 |
+
|
5 |
+
|
6 |
+
class MyCustomToolInput(BaseModel):
|
7 |
+
"""Input schema for MyCustomTool."""
|
8 |
+
argument: str = Field(..., description="Description of the argument.")
|
9 |
+
|
10 |
+
class MyCustomTool(BaseTool):
|
11 |
+
name: str = "Name of my tool"
|
12 |
+
description: str = (
|
13 |
+
"Clear description for what this tool is useful for, your agent will need this information to use it."
|
14 |
+
)
|
15 |
+
args_schema: Type[BaseModel] = MyCustomToolInput
|
16 |
+
|
17 |
+
def _run(self, argument: str) -> str:
|
18 |
+
# Implementation goes here
|
19 |
+
return "this is an example of a tool output, ignore it and move along."
|
investigators/uv.lock
ADDED
The diff for this file is too large to render.
See raw diff
|
|
pyproject.toml
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[project]
|
2 |
+
name = "OSINT_Agent"
|
3 |
+
version = "0.1.0"
|
4 |
+
description = "Tool for AI-based investigations and KYC"
|
5 |
+
readme = "README.md"
|
6 |
+
requires-python = ">=3.12"
|
7 |
+
dependencies = [
|
8 |
+
"gradio>=5.22.0",
|
9 |
+
"openai>=1.68.2",
|
10 |
+
"openai-agents>=0.0.6",
|
11 |
+
"python-dotenv>=1.0.1",
|
12 |
+
"crewai[tools]>=0.119.0,<1.0.0",
|
13 |
+
]
|
14 |
+
|
15 |
+
[dependency-groups]
|
16 |
+
dev = [
|
17 |
+
"ipykernel>=6.29.5",
|
18 |
+
]
|
requirements-doc.md
ADDED
@@ -0,0 +1,313 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# OSINT Money Laundering Detection Application
|
2 |
+
## Requirements and Implementation Plan
|
3 |
+
|
4 |
+
## 1. Executive Summary
|
5 |
+
|
6 |
+
This document outlines the requirements and implementation plan for an Open Source Intelligence (OSINT) application designed to identify potential money laundering red flags associated with individuals and businesses. The application will leverage CrewAI as the agent orchestration framework, Brave MCP for web searches, and frontier Large Language Models (LLMs) for information analysis and structured output generation. The user interface will be built using Gradio.
|
7 |
+
|
8 |
+
## 2. Project Goals
|
9 |
+
|
10 |
+
- Create an OSINT tool that gathers comprehensive information about individuals and businesses from publicly available sources
|
11 |
+
- Identify potential money laundering indicators based on analysis of the gathered information
|
12 |
+
- Present findings in a structured, actionable format to assist in financial crime investigations
|
13 |
+
- Provide an intuitive user interface that allows for easy input and clear presentation of results
|
14 |
+
|
15 |
+
## 3. Technical Architecture
|
16 |
+
|
17 |
+
### 3.1 Core Components
|
18 |
+
|
19 |
+
1. **CrewAI Framework**: Orchestrates autonomous agents to perform specialized tasks
|
20 |
+
2. **Web Search Module**: Utilizes Brave MCP for comprehensive web searches
|
21 |
+
3. **LLM Analysis Engine**: Leverages frontier LLMs to process and analyze gathered information
|
22 |
+
4. **Gradio Frontend**: Provides the user interface for interaction with the system
|
23 |
+
|
24 |
+
### 3.2 Architecture Diagram
|
25 |
+
|
26 |
+
```
|
27 |
+
βββββββββββββββββββββ ββββββββββββββββββββββββ
|
28 |
+
β β β β
|
29 |
+
β Gradio Frontend βββββββββββΊβ CrewAI Controller β
|
30 |
+
β β β β
|
31 |
+
βββββββββββββββββββββ ββββββββββββ¬ββββββββββββ
|
32 |
+
β
|
33 |
+
βΌ
|
34 |
+
ββββββββββββββββββββββββββββββββββββββββ
|
35 |
+
β β
|
36 |
+
β Agent Orchestration Layer β
|
37 |
+
β β
|
38 |
+
βββββ¬ββββββββββββββ¬ββββββββββββββ¬βββββββ
|
39 |
+
β β β
|
40 |
+
βββββββββββββΌββββ βββββββββΌββββββββ βββββΌβββββββββββββ
|
41 |
+
β β β β β β
|
42 |
+
β Search Agent β β Analysis Agentβ β Reporting Agentβ
|
43 |
+
β (Brave MCP) β β (LLM) β β (LLM) β
|
44 |
+
β β β β β β
|
45 |
+
βββββββββββββββββ βββββββββββββββββ ββββββββββββββββββ
|
46 |
+
```
|
47 |
+
|
48 |
+
## 4. Detailed Requirements
|
49 |
+
|
50 |
+
### 4.1 Data Collection Requirements
|
51 |
+
|
52 |
+
#### 4.1.1 Target Entities
|
53 |
+
- Individual profile information
|
54 |
+
- Personal identifiers (name, age, location)
|
55 |
+
- Professional history
|
56 |
+
- Social media presence
|
57 |
+
- Public records (property ownership, legal filings)
|
58 |
+
- Business profile information
|
59 |
+
- Corporate structure
|
60 |
+
- Ownership information
|
61 |
+
- Business registration details
|
62 |
+
- Financial disclosures if publicly available
|
63 |
+
- Business relationships and partnerships
|
64 |
+
- Industry reputation
|
65 |
+
|
66 |
+
#### 4.1.2 Search Parameters
|
67 |
+
- Primary identifiers (full name, business name)
|
68 |
+
- Secondary identifiers (location, industry, associates)
|
69 |
+
- Customizable search depth (standard, deep)
|
70 |
+
- Date range filtering capabilities
|
71 |
+
- Geographic focus areas
|
72 |
+
|
73 |
+
### 4.2 Analysis Requirements
|
74 |
+
|
75 |
+
#### 4.2.1 Money Laundering Indicators
|
76 |
+
The system should detect and flag the following potential indicators:
|
77 |
+
|
78 |
+
- **Structural Red Flags**
|
79 |
+
- Complex corporate structures with no clear business purpose
|
80 |
+
- Companies registered in high-risk jurisdictions
|
81 |
+
- Shell companies with minimal operational footprint
|
82 |
+
- Frequent changes in business structure or ownership
|
83 |
+
|
84 |
+
- **Transactional Red Flags**
|
85 |
+
- Inconsistencies between public business activity and apparent resources
|
86 |
+
- Involvement with industries known for money laundering risks
|
87 |
+
- Connections to entities on sanction lists or watchlists
|
88 |
+
- Unusual growth patterns or business expansions
|
89 |
+
|
90 |
+
- **Reputational Red Flags**
|
91 |
+
- Negative news coverage related to financial crimes
|
92 |
+
- Past investigations or regulatory actions
|
93 |
+
- Association with politically exposed persons (PEPs)
|
94 |
+
- Inconsistencies in public statements and actual business operations
|
95 |
+
|
96 |
+
#### 4.2.2 LLM Analysis Capabilities
|
97 |
+
- Extract and correlate information from diverse sources
|
98 |
+
- Identify patterns and anomalies in collected data
|
99 |
+
- Apply AML (Anti-Money Laundering) expertise to evaluate findings
|
100 |
+
- Generate confidence scores for identified red flags
|
101 |
+
- Explain reasoning behind flagged items
|
102 |
+
|
103 |
+
### 4.3 User Interface Requirements
|
104 |
+
|
105 |
+
#### 4.3.1 Input Interface
|
106 |
+
- Target entity input fields (individual name, business name)
|
107 |
+
- Search parameter configuration options
|
108 |
+
- Investigation depth selector
|
109 |
+
- Search history functionality
|
110 |
+
|
111 |
+
#### 4.3.2 Results Display
|
112 |
+
- Summary dashboard with key findings
|
113 |
+
- Detailed report section with evidence
|
114 |
+
- Visualization of entity relationships
|
115 |
+
- Red flag severity indicators
|
116 |
+
- Source citations for all information
|
117 |
+
- Option to export findings in various formats (PDF, CSV, JSON)
|
118 |
+
|
119 |
+
#### 4.3.3 User Experience
|
120 |
+
- Progress indicators during search and analysis
|
121 |
+
- Responsive design for desktop and tablet use
|
122 |
+
- Clear navigation between different report sections
|
123 |
+
- Ability to save and reload previous investigations
|
124 |
+
|
125 |
+
## 5. Agent Structure (CrewAI Implementation)
|
126 |
+
|
127 |
+
### 5.1 Agent Roles and Responsibilities
|
128 |
+
|
129 |
+
#### 5.1.1 Research Agent
|
130 |
+
- **Objective**: Gather comprehensive information about target entities
|
131 |
+
- **Tools**: Brave MCP search API
|
132 |
+
- **Capabilities**:
|
133 |
+
- Execute multi-faceted search queries
|
134 |
+
- Follow information trails across multiple sources
|
135 |
+
- Filter and prioritize relevant information
|
136 |
+
- Store and organize gathered data
|
137 |
+
|
138 |
+
#### 5.1.2 Analysis Agent
|
139 |
+
- **Objective**: Process gathered information to identify potential money laundering indicators
|
140 |
+
- **Tools**: Frontier LLM API
|
141 |
+
- **Capabilities**:
|
142 |
+
- Apply AML expertise to evaluate gathered information
|
143 |
+
- Cross-reference findings against known money laundering patterns
|
144 |
+
- Identify and categorize potential red flags
|
145 |
+
- Assign confidence scores to findings
|
146 |
+
|
147 |
+
#### 5.1.3 Reporting Agent
|
148 |
+
- **Objective**: Create structured, clear reports from analysis findings
|
149 |
+
- **Tools**: Frontier LLM API
|
150 |
+
- **Capabilities**:
|
151 |
+
- Organize findings in a logical structure
|
152 |
+
- Generate concise summaries of complex information
|
153 |
+
- Create visualizations of entity relationships
|
154 |
+
- Format reports for readability and impact
|
155 |
+
|
156 |
+
### 5.2 Agent Communication Workflow
|
157 |
+
|
158 |
+
1. User initiates search through Gradio interface
|
159 |
+
2. Research Agent conducts initial search based on provided parameters
|
160 |
+
3. Research Agent iteratively refines search based on initial findings
|
161 |
+
4. Analysis Agent processes gathered information from Research Agent
|
162 |
+
5. Analysis Agent identifies potential red flags and areas of concern
|
163 |
+
6. Reporting Agent structures findings into comprehensive report
|
164 |
+
7. Gradio interface displays final report to user
|
165 |
+
|
166 |
+
## 6. Implementation Plan
|
167 |
+
|
168 |
+
### 6.1 Phase 1: Core Framework Setup (Weeks 1-2)
|
169 |
+
- Set up development environment
|
170 |
+
- Implement basic CrewAI framework configuration
|
171 |
+
- Create agent templates and communication protocols
|
172 |
+
- Establish Brave MCP integration for basic searches
|
173 |
+
- Implement LLM API connections
|
174 |
+
|
175 |
+
### 6.2 Phase 2: Agent Development (Weeks 3-5)
|
176 |
+
- Develop and test Research Agent capabilities
|
177 |
+
- Implement Analysis Agent with basic AML pattern recognition
|
178 |
+
- Create Reporting Agent with standard report templates
|
179 |
+
- Test agent communication and data handoffs
|
180 |
+
|
181 |
+
### 6.3 Phase 3: Frontend Development (Weeks 6-7)
|
182 |
+
- Design and implement Gradio interface
|
183 |
+
- Create input forms and configuration options
|
184 |
+
- Develop results display components
|
185 |
+
- Implement export functionality
|
186 |
+
|
187 |
+
### 6.4 Phase 4: Integration and Testing (Weeks 8-9)
|
188 |
+
- Integrate all components into unified system
|
189 |
+
- Conduct performance testing
|
190 |
+
- Optimize search algorithms and analysis pipelines
|
191 |
+
- Perform security review
|
192 |
+
|
193 |
+
### 6.5 Phase 5: Refinement and Launch (Weeks 10-12)
|
194 |
+
- Conduct user acceptance testing
|
195 |
+
- Refine UI/UX based on feedback
|
196 |
+
- Optimize LLM prompts for improved analysis
|
197 |
+
- Prepare documentation and launch materials
|
198 |
+
|
199 |
+
## 7. Technical Requirements
|
200 |
+
|
201 |
+
### 7.1 Development Requirements
|
202 |
+
- Python 3.9+ environment
|
203 |
+
- CrewAI framework (latest version)
|
204 |
+
- Brave MCP API access credentials
|
205 |
+
- Access to frontier LLM APIs (Claude, GPT-4, etc.)
|
206 |
+
- Gradio UI framework
|
207 |
+
|
208 |
+
### 7.2 Deployment Requirements
|
209 |
+
- Server environment with Python support
|
210 |
+
- Minimum 8GB RAM, 4 CPU cores recommended
|
211 |
+
- API key management system
|
212 |
+
- Secure credential storage
|
213 |
+
- Rate limiting implementation for API calls
|
214 |
+
|
215 |
+
### 7.3 Security Requirements
|
216 |
+
- Encrypted storage of search results
|
217 |
+
- Secure API key management
|
218 |
+
- User authentication for accessing the application
|
219 |
+
- Audit logging of all searches conducted
|
220 |
+
- Compliance with relevant data protection regulations
|
221 |
+
|
222 |
+
## 8. Evaluation Metrics
|
223 |
+
|
224 |
+
### 8.1 Performance Metrics
|
225 |
+
- Search completion time
|
226 |
+
- Analysis accuracy (compared to expert review)
|
227 |
+
- System resource utilization
|
228 |
+
- API cost efficiency
|
229 |
+
|
230 |
+
### 8.2 Quality Metrics
|
231 |
+
- Red flag detection accuracy
|
232 |
+
- False positive rate
|
233 |
+
- Source diversity
|
234 |
+
- Explanation quality for identified red flags
|
235 |
+
|
236 |
+
## 9. Limitations and Ethical Considerations
|
237 |
+
|
238 |
+
### 9.1 Technical Limitations
|
239 |
+
- Reliance on publicly available information only
|
240 |
+
- API rate limits may affect search depth
|
241 |
+
- LLM hallucination risks require human verification
|
242 |
+
- Limited to text-based information analysis
|
243 |
+
|
244 |
+
### 9.2 Ethical Guidelines
|
245 |
+
- System should be used as an investigative aid, not as sole decision basis
|
246 |
+
- All findings require human verification before action
|
247 |
+
- Use limited to legitimate AML and financial crime prevention purposes
|
248 |
+
- Compliance with privacy laws and regulations required
|
249 |
+
- Application should not be used for harassment or unauthorized surveillance
|
250 |
+
|
251 |
+
## 10. Code Structure Overview
|
252 |
+
|
253 |
+
### 10.1 Main Components
|
254 |
+
|
255 |
+
```python
|
256 |
+
# Project structure
|
257 |
+
osint_aml_app/
|
258 |
+
βββ app.py # Main application entry point
|
259 |
+
βββ config/ # Configuration files
|
260 |
+
β βββ config.yaml # General configuration
|
261 |
+
β βββ agent_configs.yaml # Agent-specific configurations
|
262 |
+
βββ agents/ # CrewAI agent implementations
|
263 |
+
β βββ research_agent.py # Web search agent
|
264 |
+
β βββ analysis_agent.py # AML analysis agent
|
265 |
+
β βββ reporting_agent.py # Report generation agent
|
266 |
+
βββ tools/ # Tool implementations
|
267 |
+
β βββ brave_search.py # Brave MCP search integration
|
268 |
+
β βββ llm_interface.py # LLM API interfaces
|
269 |
+
β βββ data_processor.py # Data processing utilities
|
270 |
+
βββ ui/ # Gradio UI components
|
271 |
+
β βββ input_forms.py # Input interfaces
|
272 |
+
β βββ results_display.py # Results visualization
|
273 |
+
β βββ export_tools.py # Report export functionality
|
274 |
+
βββ models/ # Data models
|
275 |
+
β βββ entity.py # Entity representation
|
276 |
+
β βββ red_flag.py # Red flag classification
|
277 |
+
β βββ report.py # Report structure
|
278 |
+
βββ utils/ # Utility functions
|
279 |
+
βββ validators.py # Input validation
|
280 |
+
βββ parsers.py # Content parsing
|
281 |
+
βββ security.py # Security utilities
|
282 |
+
```
|
283 |
+
|
284 |
+
## 11. Budget and Resource Requirements
|
285 |
+
|
286 |
+
### 11.1 Development Resources
|
287 |
+
- Developer time: 12 weeks (1-2 developers)
|
288 |
+
- LLM API costs: Estimated $500-1000 for development and testing
|
289 |
+
- Brave MCP API costs: Based on search volume (approximately $200-500)
|
290 |
+
- Infrastructure costs: $100-200/month for development servers
|
291 |
+
|
292 |
+
### 11.2 Operational Resources
|
293 |
+
- Ongoing API costs: Dependent on usage volume
|
294 |
+
- Maintenance: 10-15 hours per month
|
295 |
+
- Infrastructure: $200-400/month depending on scale
|
296 |
+
|
297 |
+
## 12. Expansion Possibilities
|
298 |
+
|
299 |
+
- Integration with financial database APIs
|
300 |
+
- Addition of document analysis capabilities
|
301 |
+
- Implementation of temporal analysis (tracking changes over time)
|
302 |
+
- Development of collaborative investigation features
|
303 |
+
- Integration with case management systems
|
304 |
+
- Support for additional languages and jurisdictions
|
305 |
+
|
306 |
+
## 13. Success Criteria
|
307 |
+
|
308 |
+
The application will be considered successful if it:
|
309 |
+
- Accurately identifies at least 85% of known money laundering indicators in test cases
|
310 |
+
- Maintains a false positive rate below 15%
|
311 |
+
- Completes standard searches in under 5 minutes
|
312 |
+
- Receives positive usability feedback from AML professionals
|
313 |
+
- Provides clear, actionable intelligence that enhances investigation capabilities
|
requirements.txt
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
crewai==0.120.1
|
2 |
+
crewai-tools==0.45.0
|
3 |
+
gradio==5.30.0
|
4 |
+
gradio-client==1.10.1
|
5 |
+
litellm==1.68.0
|
6 |
+
markdown-it-py==3.0.0
|
7 |
+
openai==1.75.0
|
8 |
+
openai-agents==0.0.12
|
9 |
+
uv==0.7.6
|
test_serper.py
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import requests
|
3 |
+
|
4 |
+
# Test your API key with a simple request
|
5 |
+
serper_api_key = os.getenv("SERPER_API_KEY") # Or however you're storing it
|
6 |
+
|
7 |
+
headers = {
|
8 |
+
"X-API-KEY": serper_api_key,
|
9 |
+
"Content-Type": "application/json"
|
10 |
+
}
|
11 |
+
|
12 |
+
payload = {
|
13 |
+
"q": "test query",
|
14 |
+
"gl": "us",
|
15 |
+
"hl": "en"
|
16 |
+
}
|
17 |
+
|
18 |
+
response = requests.post("https://google.serper.dev/search", headers=headers, json=payload)
|
19 |
+
print(f"Status code: {response.status_code}")
|
20 |
+
print(f"Response: {response.text}")
|
uv.lock
ADDED
The diff for this file is too large to render.
See raw diff
|
|