raznis commited on
Commit
66a53df
Β·
verified Β·
1 Parent(s): 3ecdccf

Upload folder using huggingface_hub

Browse files
.gitignore ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # UV
98
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ #uv.lock
102
+
103
+ # poetry
104
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
106
+ # commonly ignored for libraries.
107
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
+ #poetry.lock
109
+
110
+ # pdm
111
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
112
+ #pdm.lock
113
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
114
+ # in version control.
115
+ # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
116
+ .pdm.toml
117
+ .pdm-python
118
+ .pdm-build/
119
+
120
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
121
+ __pypackages__/
122
+
123
+ # Celery stuff
124
+ celerybeat-schedule
125
+ celerybeat.pid
126
+
127
+ # SageMath parsed files
128
+ *.sage.py
129
+
130
+ # Environments
131
+ .env
132
+ .venv
133
+ env/
134
+ venv/
135
+ ENV/
136
+ env.bak/
137
+ venv.bak/
138
+
139
+ # Spyder project settings
140
+ .spyderproject
141
+ .spyproject
142
+
143
+ # Rope project settings
144
+ .ropeproject
145
+
146
+ # mkdocs documentation
147
+ /site
148
+
149
+ # mypy
150
+ .mypy_cache/
151
+ .dmypy.json
152
+ dmypy.json
153
+
154
+ # Pyre type checker
155
+ .pyre/
156
+
157
+ # pytype static type analyzer
158
+ .pytype/
159
+
160
+ # Cython debug symbols
161
+ cython_debug/
162
+
163
+ # PyCharm
164
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
165
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
166
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
167
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
168
+ #.idea/
169
+
170
+ # Ruff stuff:
171
+ .ruff_cache/
172
+
173
+ # PyPI configuration file
174
+ .pypirc
175
+
176
+ # outputs
177
+ output/*
178
+ investigators/output/*
README.md CHANGED
@@ -1,12 +1,7 @@
1
  ---
2
- title: OSINT Investigator
3
- emoji: 🌍
4
- colorFrom: indigo
5
- colorTo: red
6
  sdk: gradio
7
  sdk_version: 5.30.0
8
- app_file: app.py
9
- pinned: false
10
  ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: OSINT_Investigator
3
+ app_file: app.py
 
 
4
  sdk: gradio
5
  sdk_version: 5.30.0
 
 
6
  ---
7
+ # OSINT_Agent
 
app.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from investigators.src.investigators.crew import Investigators
3
+
4
+ def investigate(target_name, affiliations):
5
+ inputs = {
6
+ 'target': target_name,
7
+ 'affiliations': affiliations,
8
+ # 'current_year': str(datetime.now().year)
9
+ }
10
+
11
+ try:
12
+ crew_output = Investigators().crew().kickoff(inputs=inputs)
13
+ except Exception as e:
14
+ raise Exception(f"An error occurred while running the crew: {e}")
15
+
16
+ return crew_output.raw
17
+
18
+ view = gr.Interface(
19
+ fn=investigate,
20
+ inputs=[
21
+ gr.Textbox(label="Target name:"),
22
+ gr.Textbox(label="Target Affiliations (comma separated):")],
23
+ outputs=[gr.Markdown(label="Risk Assessment Report:")],
24
+ flagging_mode="never",
25
+ examples=[
26
+ ["Raz Nissim", "Ben Gurion University, General Motors"],
27
+ ],
28
+ title="OSINT Investigator",
29
+ description="Enter the name of your target and their affiliations (to make search easier), and get a AML Risk assessment based on their public information.",)
30
+ view.launch(inbrowser=True)
investigators/.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ .env
2
+ __pycache__/
3
+ .DS_Store
investigators/README.md ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Investigators Crew
2
+
3
+ Welcome to the Investigators Crew project, powered by [crewAI](https://crewai.com). This template is designed to help you set up a multi-agent AI system with ease, leveraging the powerful and flexible framework provided by crewAI. Our goal is to enable your agents to collaborate effectively on complex tasks, maximizing their collective intelligence and capabilities.
4
+
5
+ ## Installation
6
+
7
+ Ensure you have Python >=3.10 <3.13 installed on your system. This project uses [UV](https://docs.astral.sh/uv/) for dependency management and package handling, offering a seamless setup and execution experience.
8
+
9
+ First, if you haven't already, install uv:
10
+
11
+ ```bash
12
+ pip install uv
13
+ ```
14
+
15
+ Next, navigate to your project directory and install the dependencies:
16
+
17
+ (Optional) Lock the dependencies and install them by using the CLI command:
18
+ ```bash
19
+ crewai install
20
+ ```
21
+ ### Customizing
22
+
23
+ **Add your `OPENAI_API_KEY` into the `.env` file**
24
+
25
+ - Modify `src/investigators/config/agents.yaml` to define your agents
26
+ - Modify `src/investigators/config/tasks.yaml` to define your tasks
27
+ - Modify `src/investigators/crew.py` to add your own logic, tools and specific args
28
+ - Modify `src/investigators/main.py` to add custom inputs for your agents and tasks
29
+
30
+ ## Running the Project
31
+
32
+ To kickstart your crew of AI agents and begin task execution, run this from the root folder of your project:
33
+
34
+ ```bash
35
+ $ crewai run
36
+ ```
37
+
38
+ This command initializes the investigators Crew, assembling the agents and assigning them tasks as defined in your configuration.
39
+
40
+ This example, unmodified, will run the create a `report.md` file with the output of a research on LLMs in the root folder.
41
+
42
+ ## Understanding Your Crew
43
+
44
+ The investigators Crew is composed of multiple AI agents, each with unique roles, goals, and tools. These agents collaborate on a series of tasks, defined in `config/tasks.yaml`, leveraging their collective skills to achieve complex objectives. The `config/agents.yaml` file outlines the capabilities and configurations of each agent in your crew.
45
+
46
+ ## Support
47
+
48
+ For support, questions, or feedback regarding the Investigators Crew or crewAI.
49
+ - Visit our [documentation](https://docs.crewai.com)
50
+ - Reach out to us through our [GitHub repository](https://github.com/joaomdmoura/crewai)
51
+ - [Join our Discord](https://discord.com/invite/X4JWnZnxPb)
52
+ - [Chat with our docs](https://chatg.pt/DWjSBZn)
53
+
54
+ Let's create wonders together with the power and simplicity of crewAI.
investigators/knowledge/user_preference.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ User name is John Doe.
2
+ User is an AI Engineer.
3
+ User is interested in AI Agents.
4
+ User is based in San Francisco, California.
investigators/pyproject.toml ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "investigators"
3
+ version = "0.1.0"
4
+ description = "investigators using crewAI"
5
+ authors = [{ name = "Raz Nissim", email = "[email protected]" }]
6
+ requires-python = ">=3.10,<3.13"
7
+ dependencies = [
8
+ "crewai[tools]>=0.119.0,<1.0.0"
9
+ ]
10
+
11
+ [project.scripts]
12
+ investigators = "investigators.main:run"
13
+ run_crew = "investigators.main:run"
14
+ train = "investigators.main:train"
15
+ replay = "investigators.main:replay"
16
+ test = "investigators.main:test"
17
+
18
+ [build-system]
19
+ requires = ["hatchling"]
20
+ build-backend = "hatchling.build"
21
+
22
+ [tool.crewai]
23
+ type = "crew"
investigators/src/investigators/__init__.py ADDED
File without changes
investigators/src/investigators/config/agents.yaml ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ researcher:
2
+ role: >
3
+ Research Specialist
4
+ goal: >
5
+ Gather comprehensive information from public sources about the target: {target}, that has the following affiliations: {affiliations}.
6
+ backstory: >
7
+ You are an expert OSINT researcher with deep experience in
8
+ finding and connecting information about people and businesses. You know how to
9
+ follow information trails and identify valuable data sources.
10
+ llm: openai/gpt-4o-mini
11
+
12
+ fincrime_analyst:
13
+ role: >
14
+ Financial Crime Analyst
15
+ goal: >
16
+ Analyze gathered information on {target} to identify potential money laundering indicators.
17
+ Focus on the person and not their affiliations.
18
+ Ignore any sources that are not relevant, especially if do not contain the full name of the target ({target}).
19
+ backstory: >
20
+ You are a seasoned financial crime expert with extensive knowledge
21
+ of money laundering techniques, red flags, and detection methods. You can spot
22
+ suspicious patterns that others might miss.
23
+ llm: openai/gpt-4o-mini
24
+
25
+ osint_reporter:
26
+ role: >
27
+ Intelligence Reporter
28
+ goal: >
29
+ Create clear, structured reports from analysis findings.
30
+ backstory: >
31
+ You are a skilled intelligence reporter who can transform complex
32
+ findings into clear, actionable reports. You know how to present information
33
+ in a way that highlights the most important elements.
34
+ llm: openai/gpt-4o-mini
investigators/src/investigators/config/tasks.yaml ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ research_target:
2
+ description: >
3
+ Research {target} ({affiliations}) thoroughly using available search tools:
4
+
5
+ Name: {target}
6
+
7
+ Your research should focus on:
8
+ 1. Basic background information
9
+ 2. Professional history and business activities
10
+ 3. Corporate structures and relationships
11
+ 4. Public records and news mentions
12
+ 5. Legal or regulatory issues
13
+ 6. Unusual business patterns or inconsistencies
14
+
15
+ Organize your findings clearly and be sure to track your sources.
16
+ expected_output: >
17
+ Comprehensive research findings on {target}
18
+ agent: researcher
19
+
20
+ analyze_target:
21
+ description: >
22
+ Analyze the research findings on {target} to identify potential money laundering red flags. Focus only on {target} and not their affiliations.
23
+ Ignore any sources that do not contain the full name of the target ({target}) at least once.
24
+
25
+ If relevant, analyze for the following reputational Red Flags:
26
+ - Negative news coverage related to financial crimes
27
+ - Past investigations or regulatory actions
28
+ - Association with politically exposed persons (PEPs)
29
+
30
+ For each identified red flag, provide:
31
+ - A clear description
32
+ - Severity assessment (on a scale of 0.0 to 1.0)
33
+ - Supporting evidence
34
+ - Your reasoning
35
+ - Sources of information
36
+
37
+ Structure your output in JSON format.
38
+ IMPORTANT: Output ONLY the raw JSON any markdown formatting, code block delimiters, or backticks.
39
+ expected_output: >
40
+ JSON-structured analysis of {target}'s potential money laundering red flags
41
+ agent: fincrime_analyst
42
+ context:
43
+ - research_target
44
+ output_file: output/analysis_output_{target}.json
45
+
46
+
47
+ reporting_task:
48
+ description: >
49
+ Create a comprehensive intelligence report based on the following analysis
50
+ of potential money laundering red flags:
51
+
52
+ Target Name: {target}
53
+
54
+ Your report should include:
55
+
56
+ 1. Executive Summary
57
+ - Brief overview of the target
58
+ - Summary of key findings and risk assessment
59
+
60
+ 2. Target Profile
61
+ - Background information
62
+ - Known associates and relationships
63
+ - Business activities and corporate structure (if applicable)
64
+
65
+ 3. Red Flag Analysis
66
+ - Detailed examination of each identified red flag
67
+ - Supporting evidence and reasoning
68
+ - Risk severity assessment
69
+
70
+ 4. Overall Risk Assessment
71
+ - Holistic evaluation of money laundering risk
72
+ - Confidence level in findings
73
+ - Potential alternative explanations
74
+
75
+ 5. Recommendations
76
+ - Suggested next steps for investigation
77
+ - Additional intelligence gaps to address
78
+
79
+ Format the report in a clear, professional structure with sections and subsections.
80
+ expected_output: >
81
+ Comprehensive intelligence report on money laundering risks, in Markdown.
82
+ agent: osint_reporter
83
+ context:
84
+ - analyze_target
85
+ output_file: output/osint_report_{target}.md
investigators/src/investigators/crew.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from crewai import Agent, Crew, Process, Task
2
+ from crewai.project import CrewBase, agent, crew, task
3
+ from crewai_tools import SerperDevTool
4
+
5
+
6
+
7
+ @CrewBase
8
+ class Investigators():
9
+ """Investigators crew"""
10
+
11
+ #Agents
12
+ agents_config = 'config/agents.yaml'
13
+
14
+ @agent
15
+ def researcher(self) -> Agent:
16
+ return Agent(
17
+ config=self.agents_config['researcher'],
18
+ verbose=True,
19
+ tools=[SerperDevTool()],
20
+ retry_on_fail=True, # Enable retry
21
+ max_retries=3 # Set maximum retries
22
+ )
23
+
24
+ @agent
25
+ def fincrime_analyst(self) -> Agent:
26
+ return Agent(
27
+ config=self.agents_config['fincrime_analyst'],
28
+ verbose=False,
29
+ )
30
+
31
+ @agent
32
+ def osint_reporter(self) -> Agent:
33
+ return Agent(
34
+ config=self.agents_config['osint_reporter'],
35
+ verbose=False,
36
+ )
37
+
38
+ # Tasks
39
+ tasks_config = 'config/tasks.yaml'
40
+
41
+ @task
42
+ def research_target(self) -> Task:
43
+ return Task(
44
+ config=self.tasks_config['research_target'],
45
+ )
46
+
47
+ @task
48
+ def analyze_target(self) -> Task:
49
+ return Task(
50
+ config=self.tasks_config['analyze_target'],
51
+ )
52
+
53
+ @task
54
+ def reporting_task(self) -> Task:
55
+ return Task(
56
+ config=self.tasks_config['reporting_task'],
57
+ )
58
+
59
+ @crew
60
+ def crew(self) -> Crew:
61
+ """Creates the Investigators crew"""
62
+
63
+ return Crew(
64
+ agents=self.agents, # Automatically created by the @agent decorator
65
+ tasks=self.tasks, # Automatically created by the @task decorator
66
+ process=Process.sequential,
67
+ verbose=True,
68
+ )
investigators/src/investigators/main.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ import os
3
+ import sys
4
+ import warnings
5
+
6
+ from datetime import datetime
7
+
8
+ from investigators.crew import Investigators
9
+
10
+ warnings.filterwarnings("ignore", category=SyntaxWarning, module="pysbd")
11
+
12
+ # Create output directory if it doesn't exist
13
+ os.makedirs('output', exist_ok=True)
14
+
15
+ def run():
16
+ """
17
+ Run the crew.
18
+ """
19
+ # inputs = {
20
+ # 'target': 'Raz Nissim',
21
+ # 'affiliations': 'Ben Gurion University, General Motors',
22
+ # # 'current_year': str(datetime.now().year)
23
+ # }
24
+
25
+ # inputs = {
26
+ # 'target': 'Avraham Hirschson',
27
+ # 'affiliations': 'Israeli government, Israeli Knesset, Histadrut',
28
+ # # 'current_year': str(datetime.now().year)
29
+ # }
30
+
31
+ # inputs = {
32
+ # 'target': 'Igal Nissim',
33
+ # 'affiliations': 'Comverse, Verint',
34
+ # # 'current_year': str(datetime.now().year)
35
+ # }
36
+
37
+ # inputs = {
38
+ # 'target': 'Yeela Harel',
39
+ # 'affiliations': 'Israel Ministry of Justice, ThetaRay, Bank Of Israel',
40
+ # # 'current_year': str(datetime.now().year)
41
+ # }
42
+
43
+ # inputs = {
44
+ # 'target': 'Yehuda Harel',
45
+ # 'affiliations': 'Hapoalim Bank',
46
+ # # 'current_year': str(datetime.now().year)
47
+ # }
48
+
49
+ inputs = {
50
+ 'target': 'Bar Mittelman',
51
+ 'affiliations': 'Bitin, crypto',
52
+ # 'current_year': str(datetime.now().year)
53
+ }
54
+
55
+ try:
56
+ Investigators().crew().kickoff(inputs=inputs)
57
+ except Exception as e:
58
+ raise Exception(f"An error occurred while running the crew: {e}")
59
+
60
+
investigators/src/investigators/tools/__init__.py ADDED
File without changes
investigators/src/investigators/tools/custom_tool.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from crewai.tools import BaseTool
2
+ from typing import Type
3
+ from pydantic import BaseModel, Field
4
+
5
+
6
+ class MyCustomToolInput(BaseModel):
7
+ """Input schema for MyCustomTool."""
8
+ argument: str = Field(..., description="Description of the argument.")
9
+
10
+ class MyCustomTool(BaseTool):
11
+ name: str = "Name of my tool"
12
+ description: str = (
13
+ "Clear description for what this tool is useful for, your agent will need this information to use it."
14
+ )
15
+ args_schema: Type[BaseModel] = MyCustomToolInput
16
+
17
+ def _run(self, argument: str) -> str:
18
+ # Implementation goes here
19
+ return "this is an example of a tool output, ignore it and move along."
investigators/uv.lock ADDED
The diff for this file is too large to render. See raw diff
 
pyproject.toml ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "OSINT_Agent"
3
+ version = "0.1.0"
4
+ description = "Tool for AI-based investigations and KYC"
5
+ readme = "README.md"
6
+ requires-python = ">=3.12"
7
+ dependencies = [
8
+ "gradio>=5.22.0",
9
+ "openai>=1.68.2",
10
+ "openai-agents>=0.0.6",
11
+ "python-dotenv>=1.0.1",
12
+ "crewai[tools]>=0.119.0,<1.0.0",
13
+ ]
14
+
15
+ [dependency-groups]
16
+ dev = [
17
+ "ipykernel>=6.29.5",
18
+ ]
requirements-doc.md ADDED
@@ -0,0 +1,313 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # OSINT Money Laundering Detection Application
2
+ ## Requirements and Implementation Plan
3
+
4
+ ## 1. Executive Summary
5
+
6
+ This document outlines the requirements and implementation plan for an Open Source Intelligence (OSINT) application designed to identify potential money laundering red flags associated with individuals and businesses. The application will leverage CrewAI as the agent orchestration framework, Brave MCP for web searches, and frontier Large Language Models (LLMs) for information analysis and structured output generation. The user interface will be built using Gradio.
7
+
8
+ ## 2. Project Goals
9
+
10
+ - Create an OSINT tool that gathers comprehensive information about individuals and businesses from publicly available sources
11
+ - Identify potential money laundering indicators based on analysis of the gathered information
12
+ - Present findings in a structured, actionable format to assist in financial crime investigations
13
+ - Provide an intuitive user interface that allows for easy input and clear presentation of results
14
+
15
+ ## 3. Technical Architecture
16
+
17
+ ### 3.1 Core Components
18
+
19
+ 1. **CrewAI Framework**: Orchestrates autonomous agents to perform specialized tasks
20
+ 2. **Web Search Module**: Utilizes Brave MCP for comprehensive web searches
21
+ 3. **LLM Analysis Engine**: Leverages frontier LLMs to process and analyze gathered information
22
+ 4. **Gradio Frontend**: Provides the user interface for interaction with the system
23
+
24
+ ### 3.2 Architecture Diagram
25
+
26
+ ```
27
+ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
28
+ β”‚ β”‚ β”‚ β”‚
29
+ β”‚ Gradio Frontend │◄────────►│ CrewAI Controller β”‚
30
+ β”‚ β”‚ β”‚ β”‚
31
+ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
32
+ β”‚
33
+ β–Ό
34
+ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
35
+ β”‚ β”‚
36
+ β”‚ Agent Orchestration Layer β”‚
37
+ β”‚ β”‚
38
+ β””β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”˜
39
+ β”‚ β”‚ β”‚
40
+ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β–Όβ”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β–Όβ”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β–Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
41
+ β”‚ β”‚ β”‚ β”‚ β”‚ β”‚
42
+ β”‚ Search Agent β”‚ β”‚ Analysis Agentβ”‚ β”‚ Reporting Agentβ”‚
43
+ β”‚ (Brave MCP) β”‚ β”‚ (LLM) β”‚ β”‚ (LLM) β”‚
44
+ β”‚ β”‚ β”‚ β”‚ β”‚ β”‚
45
+ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
46
+ ```
47
+
48
+ ## 4. Detailed Requirements
49
+
50
+ ### 4.1 Data Collection Requirements
51
+
52
+ #### 4.1.1 Target Entities
53
+ - Individual profile information
54
+ - Personal identifiers (name, age, location)
55
+ - Professional history
56
+ - Social media presence
57
+ - Public records (property ownership, legal filings)
58
+ - Business profile information
59
+ - Corporate structure
60
+ - Ownership information
61
+ - Business registration details
62
+ - Financial disclosures if publicly available
63
+ - Business relationships and partnerships
64
+ - Industry reputation
65
+
66
+ #### 4.1.2 Search Parameters
67
+ - Primary identifiers (full name, business name)
68
+ - Secondary identifiers (location, industry, associates)
69
+ - Customizable search depth (standard, deep)
70
+ - Date range filtering capabilities
71
+ - Geographic focus areas
72
+
73
+ ### 4.2 Analysis Requirements
74
+
75
+ #### 4.2.1 Money Laundering Indicators
76
+ The system should detect and flag the following potential indicators:
77
+
78
+ - **Structural Red Flags**
79
+ - Complex corporate structures with no clear business purpose
80
+ - Companies registered in high-risk jurisdictions
81
+ - Shell companies with minimal operational footprint
82
+ - Frequent changes in business structure or ownership
83
+
84
+ - **Transactional Red Flags**
85
+ - Inconsistencies between public business activity and apparent resources
86
+ - Involvement with industries known for money laundering risks
87
+ - Connections to entities on sanction lists or watchlists
88
+ - Unusual growth patterns or business expansions
89
+
90
+ - **Reputational Red Flags**
91
+ - Negative news coverage related to financial crimes
92
+ - Past investigations or regulatory actions
93
+ - Association with politically exposed persons (PEPs)
94
+ - Inconsistencies in public statements and actual business operations
95
+
96
+ #### 4.2.2 LLM Analysis Capabilities
97
+ - Extract and correlate information from diverse sources
98
+ - Identify patterns and anomalies in collected data
99
+ - Apply AML (Anti-Money Laundering) expertise to evaluate findings
100
+ - Generate confidence scores for identified red flags
101
+ - Explain reasoning behind flagged items
102
+
103
+ ### 4.3 User Interface Requirements
104
+
105
+ #### 4.3.1 Input Interface
106
+ - Target entity input fields (individual name, business name)
107
+ - Search parameter configuration options
108
+ - Investigation depth selector
109
+ - Search history functionality
110
+
111
+ #### 4.3.2 Results Display
112
+ - Summary dashboard with key findings
113
+ - Detailed report section with evidence
114
+ - Visualization of entity relationships
115
+ - Red flag severity indicators
116
+ - Source citations for all information
117
+ - Option to export findings in various formats (PDF, CSV, JSON)
118
+
119
+ #### 4.3.3 User Experience
120
+ - Progress indicators during search and analysis
121
+ - Responsive design for desktop and tablet use
122
+ - Clear navigation between different report sections
123
+ - Ability to save and reload previous investigations
124
+
125
+ ## 5. Agent Structure (CrewAI Implementation)
126
+
127
+ ### 5.1 Agent Roles and Responsibilities
128
+
129
+ #### 5.1.1 Research Agent
130
+ - **Objective**: Gather comprehensive information about target entities
131
+ - **Tools**: Brave MCP search API
132
+ - **Capabilities**:
133
+ - Execute multi-faceted search queries
134
+ - Follow information trails across multiple sources
135
+ - Filter and prioritize relevant information
136
+ - Store and organize gathered data
137
+
138
+ #### 5.1.2 Analysis Agent
139
+ - **Objective**: Process gathered information to identify potential money laundering indicators
140
+ - **Tools**: Frontier LLM API
141
+ - **Capabilities**:
142
+ - Apply AML expertise to evaluate gathered information
143
+ - Cross-reference findings against known money laundering patterns
144
+ - Identify and categorize potential red flags
145
+ - Assign confidence scores to findings
146
+
147
+ #### 5.1.3 Reporting Agent
148
+ - **Objective**: Create structured, clear reports from analysis findings
149
+ - **Tools**: Frontier LLM API
150
+ - **Capabilities**:
151
+ - Organize findings in a logical structure
152
+ - Generate concise summaries of complex information
153
+ - Create visualizations of entity relationships
154
+ - Format reports for readability and impact
155
+
156
+ ### 5.2 Agent Communication Workflow
157
+
158
+ 1. User initiates search through Gradio interface
159
+ 2. Research Agent conducts initial search based on provided parameters
160
+ 3. Research Agent iteratively refines search based on initial findings
161
+ 4. Analysis Agent processes gathered information from Research Agent
162
+ 5. Analysis Agent identifies potential red flags and areas of concern
163
+ 6. Reporting Agent structures findings into comprehensive report
164
+ 7. Gradio interface displays final report to user
165
+
166
+ ## 6. Implementation Plan
167
+
168
+ ### 6.1 Phase 1: Core Framework Setup (Weeks 1-2)
169
+ - Set up development environment
170
+ - Implement basic CrewAI framework configuration
171
+ - Create agent templates and communication protocols
172
+ - Establish Brave MCP integration for basic searches
173
+ - Implement LLM API connections
174
+
175
+ ### 6.2 Phase 2: Agent Development (Weeks 3-5)
176
+ - Develop and test Research Agent capabilities
177
+ - Implement Analysis Agent with basic AML pattern recognition
178
+ - Create Reporting Agent with standard report templates
179
+ - Test agent communication and data handoffs
180
+
181
+ ### 6.3 Phase 3: Frontend Development (Weeks 6-7)
182
+ - Design and implement Gradio interface
183
+ - Create input forms and configuration options
184
+ - Develop results display components
185
+ - Implement export functionality
186
+
187
+ ### 6.4 Phase 4: Integration and Testing (Weeks 8-9)
188
+ - Integrate all components into unified system
189
+ - Conduct performance testing
190
+ - Optimize search algorithms and analysis pipelines
191
+ - Perform security review
192
+
193
+ ### 6.5 Phase 5: Refinement and Launch (Weeks 10-12)
194
+ - Conduct user acceptance testing
195
+ - Refine UI/UX based on feedback
196
+ - Optimize LLM prompts for improved analysis
197
+ - Prepare documentation and launch materials
198
+
199
+ ## 7. Technical Requirements
200
+
201
+ ### 7.1 Development Requirements
202
+ - Python 3.9+ environment
203
+ - CrewAI framework (latest version)
204
+ - Brave MCP API access credentials
205
+ - Access to frontier LLM APIs (Claude, GPT-4, etc.)
206
+ - Gradio UI framework
207
+
208
+ ### 7.2 Deployment Requirements
209
+ - Server environment with Python support
210
+ - Minimum 8GB RAM, 4 CPU cores recommended
211
+ - API key management system
212
+ - Secure credential storage
213
+ - Rate limiting implementation for API calls
214
+
215
+ ### 7.3 Security Requirements
216
+ - Encrypted storage of search results
217
+ - Secure API key management
218
+ - User authentication for accessing the application
219
+ - Audit logging of all searches conducted
220
+ - Compliance with relevant data protection regulations
221
+
222
+ ## 8. Evaluation Metrics
223
+
224
+ ### 8.1 Performance Metrics
225
+ - Search completion time
226
+ - Analysis accuracy (compared to expert review)
227
+ - System resource utilization
228
+ - API cost efficiency
229
+
230
+ ### 8.2 Quality Metrics
231
+ - Red flag detection accuracy
232
+ - False positive rate
233
+ - Source diversity
234
+ - Explanation quality for identified red flags
235
+
236
+ ## 9. Limitations and Ethical Considerations
237
+
238
+ ### 9.1 Technical Limitations
239
+ - Reliance on publicly available information only
240
+ - API rate limits may affect search depth
241
+ - LLM hallucination risks require human verification
242
+ - Limited to text-based information analysis
243
+
244
+ ### 9.2 Ethical Guidelines
245
+ - System should be used as an investigative aid, not as sole decision basis
246
+ - All findings require human verification before action
247
+ - Use limited to legitimate AML and financial crime prevention purposes
248
+ - Compliance with privacy laws and regulations required
249
+ - Application should not be used for harassment or unauthorized surveillance
250
+
251
+ ## 10. Code Structure Overview
252
+
253
+ ### 10.1 Main Components
254
+
255
+ ```python
256
+ # Project structure
257
+ osint_aml_app/
258
+ β”œβ”€β”€ app.py # Main application entry point
259
+ β”œβ”€β”€ config/ # Configuration files
260
+ β”‚ β”œβ”€β”€ config.yaml # General configuration
261
+ β”‚ └── agent_configs.yaml # Agent-specific configurations
262
+ β”œβ”€β”€ agents/ # CrewAI agent implementations
263
+ β”‚ β”œβ”€β”€ research_agent.py # Web search agent
264
+ β”‚ β”œβ”€β”€ analysis_agent.py # AML analysis agent
265
+ β”‚ └── reporting_agent.py # Report generation agent
266
+ β”œβ”€β”€ tools/ # Tool implementations
267
+ β”‚ β”œβ”€β”€ brave_search.py # Brave MCP search integration
268
+ β”‚ β”œβ”€β”€ llm_interface.py # LLM API interfaces
269
+ β”‚ └── data_processor.py # Data processing utilities
270
+ β”œβ”€β”€ ui/ # Gradio UI components
271
+ β”‚ β”œβ”€β”€ input_forms.py # Input interfaces
272
+ β”‚ β”œβ”€β”€ results_display.py # Results visualization
273
+ β”‚ └── export_tools.py # Report export functionality
274
+ β”œβ”€β”€ models/ # Data models
275
+ β”‚ β”œβ”€β”€ entity.py # Entity representation
276
+ β”‚ β”œβ”€β”€ red_flag.py # Red flag classification
277
+ β”‚ └── report.py # Report structure
278
+ └── utils/ # Utility functions
279
+ β”œβ”€β”€ validators.py # Input validation
280
+ β”œβ”€β”€ parsers.py # Content parsing
281
+ └── security.py # Security utilities
282
+ ```
283
+
284
+ ## 11. Budget and Resource Requirements
285
+
286
+ ### 11.1 Development Resources
287
+ - Developer time: 12 weeks (1-2 developers)
288
+ - LLM API costs: Estimated $500-1000 for development and testing
289
+ - Brave MCP API costs: Based on search volume (approximately $200-500)
290
+ - Infrastructure costs: $100-200/month for development servers
291
+
292
+ ### 11.2 Operational Resources
293
+ - Ongoing API costs: Dependent on usage volume
294
+ - Maintenance: 10-15 hours per month
295
+ - Infrastructure: $200-400/month depending on scale
296
+
297
+ ## 12. Expansion Possibilities
298
+
299
+ - Integration with financial database APIs
300
+ - Addition of document analysis capabilities
301
+ - Implementation of temporal analysis (tracking changes over time)
302
+ - Development of collaborative investigation features
303
+ - Integration with case management systems
304
+ - Support for additional languages and jurisdictions
305
+
306
+ ## 13. Success Criteria
307
+
308
+ The application will be considered successful if it:
309
+ - Accurately identifies at least 85% of known money laundering indicators in test cases
310
+ - Maintains a false positive rate below 15%
311
+ - Completes standard searches in under 5 minutes
312
+ - Receives positive usability feedback from AML professionals
313
+ - Provides clear, actionable intelligence that enhances investigation capabilities
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ crewai==0.120.1
2
+ crewai-tools==0.45.0
3
+ gradio==5.30.0
4
+ gradio-client==1.10.1
5
+ litellm==1.68.0
6
+ markdown-it-py==3.0.0
7
+ openai==1.75.0
8
+ openai-agents==0.0.12
9
+ uv==0.7.6
test_serper.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+
4
+ # Test your API key with a simple request
5
+ serper_api_key = os.getenv("SERPER_API_KEY") # Or however you're storing it
6
+
7
+ headers = {
8
+ "X-API-KEY": serper_api_key,
9
+ "Content-Type": "application/json"
10
+ }
11
+
12
+ payload = {
13
+ "q": "test query",
14
+ "gl": "us",
15
+ "hl": "en"
16
+ }
17
+
18
+ response = requests.post("https://google.serper.dev/search", headers=headers, json=payload)
19
+ print(f"Status code: {response.status_code}")
20
+ print(f"Response: {response.text}")
uv.lock ADDED
The diff for this file is too large to render. See raw diff