blackopsrepl commited on
Commit
3b9a6b5
Β·
1 Parent(s): 57013f8

chore: reboot project versioning

Browse files

refactor: add tabs to UI

Switched primary focus to MCP tool

.gitignore ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ pip-wheel-metadata/
24
+ share/python-wheels/
25
+ *.egg-info/
26
+ .installed.cfg
27
+ *.egg
28
+ MANIFEST
29
+
30
+ # PyInstaller
31
+ # Usually these files are written by a python script from a template
32
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
33
+ *.manifest
34
+ *.spec
35
+
36
+ # Installer logs
37
+ pip-log.txt
38
+ pip-delete-this-directory.txt
39
+
40
+ # Unit test / coverage reports
41
+ htmlcov/
42
+ .tox/
43
+ .nox/
44
+ .coverage
45
+ .coverage.*
46
+ .cache
47
+ nosetests.xml
48
+ coverage.xml
49
+ *.cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+
53
+ # Translations
54
+ *.mo
55
+ *.pot
56
+
57
+ # Django stuff:
58
+ *.log
59
+ local_settings.py
60
+ db.sqlite3
61
+ db.sqlite3-journal
62
+
63
+ # Flask stuff:
64
+ instance/
65
+ .webassets-cache
66
+
67
+ # Scrapy stuff:
68
+ .scrapy
69
+
70
+ # Sphinx documentation
71
+ docs/_build/
72
+
73
+ # PyBuilder
74
+ target/
75
+
76
+ # Jupyter Notebook
77
+ .ipynb_checkpoints
78
+
79
+ # IPython
80
+ profile_default/
81
+ ipython_config.py
82
+
83
+ # pyenv
84
+ .python-version
85
+
86
+ # pipenv
87
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
88
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
89
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
90
+ # install all needed dependencies.
91
+ #Pipfile.lock
92
+
93
+ # celery beat schedule file
94
+ celerybeat-schedule
95
+
96
+ # SageMath parsed files
97
+ *.sage.py
98
+
99
+ # Environments
100
+ .env
101
+ .venv
102
+ env/
103
+ venv/
104
+ ENV/
105
+ env.bak/
106
+ venv.bak/
107
+
108
+ # Spyder project settings
109
+ .spyderproject
110
+ .spyproject
111
+
112
+ # Rope project settings
113
+ .ropeproject
114
+
115
+ # mkdocs documentation
116
+ /site
117
+
118
+ # mypy
119
+ .mypy_cache/
120
+ .dmypy.json
121
+ dmypy.json
122
+
123
+ # Pyre type checker
124
+ .pyre/
125
+
126
+ *.code-workspace
127
+
128
+ tests/secrets/nebius_secrets.py
129
+
130
+ tests/secrets/creds.py
.pre-commit-config.yaml ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ repos:
2
+ - repo: https://github.com/pre-commit/pre-commit-hooks
3
+ rev: v2.3.0
4
+ hooks:
5
+ - id: check-yaml
6
+ - id: end-of-file-fixer
7
+ - id: trailing-whitespace
8
+
9
+ - repo: https://github.com/gitleaks/gitleaks
10
+ rev: v8.18.0
11
+ hooks:
12
+ - id: gitleaks
13
+
14
+ - repo: https://github.com/psf/black
15
+ rev: 22.10.0
16
+ hooks:
17
+ - id: black
Dockerfile ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10
2
+
3
+ RUN apt-get update && \
4
+ apt-get install -y wget gnupg2 && \
5
+ wget -O- https://packages.adoptium.net/artifactory/api/gpg/key/public | gpg --dearmor > /usr/share/keyrings/adoptium-archive-keyring.gpg && \
6
+ echo "deb [signed-by=/usr/share/keyrings/adoptium-archive-keyring.gpg] https://packages.adoptium.net/artifactory/deb bookworm main" > /etc/apt/sources.list.d/adoptium.list && \
7
+ apt-get update && \
8
+ apt-get install -y temurin-21-jdk && \
9
+ apt-get clean && \
10
+ rm -rf /var/lib/apt/lists/*
11
+
12
+ ENV JAVA_HOME=/usr/lib/jvm/temurin-21-jdk-amd64
13
+ ENV PATH="$JAVA_HOME/bin:$PATH"
14
+
15
+ COPY requirements.txt .
16
+ RUN pip install --no-cache-dir -r requirements.txt
17
+
18
+ COPY . .
19
+
20
+ CMD ["python", "src/app.py"]
LICENSE.txt ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright [2025] [https://github.com/blackopsrepl]
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
Makefile ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .PHONY: help venv install run test lint format clean setup-secrets
2
+
3
+ PYTHON=python
4
+ PIP=pip
5
+ VENV=.venv
6
+ ACTIVATE=. $(VENV)/bin/activate
7
+
8
+ help:
9
+ @echo "Yuga Planner Makefile"
10
+ @echo "Available targets:"
11
+ @echo " venv Create a Python virtual environment"
12
+ @echo " install Install all Python dependencies"
13
+ @echo " run Run the Gradio app locally"
14
+ @echo " test Run all tests with pytest"
15
+ @echo " lint Run pre-commit hooks (includes black, yaml, gitleaks)"
16
+ @echo " format Format code with black"
17
+ @echo " setup-secrets Copy and edit secrets template for local dev"
18
+ @echo " clean Remove Python cache and virtual environment"
19
+
20
+ venv:
21
+ $(PYTHON) -m venv $(VENV)
22
+
23
+ install: venv
24
+ $(ACTIVATE); $(PIP) install --upgrade pip
25
+ $(ACTIVATE); $(PIP) install -r requirements.txt
26
+ $(ACTIVATE); $(PIP) install pre-commit black
27
+
28
+ run:
29
+ $(ACTIVATE); $(PYTHON) src/app.py
30
+
31
+ test:
32
+ $(ACTIVATE); pytest
33
+
34
+ lint:
35
+ $(ACTIVATE); pre-commit run --all-files
36
+
37
+ format:
38
+ $(ACTIVATE); black src tests
39
+
40
+ setup-secrets:
41
+ cp -n tests/secrets/nebius_secrets.py.template tests/secrets/cred.py; \
42
+ echo "Edit tests/secrets/cred.py to add your own API credentials."
43
+
44
+ clean:
45
+ rm -rf $(VENV) __pycache__ */__pycache__ .pytest_cache .mypy_cache .coverage .hypothesis
46
+ find . -type f -name '*.pyc' -delete
47
+ find . -type d -name '__pycache__' -exec rm -rf {} +
README.md CHANGED
@@ -1,13 +1,168 @@
1
  ---
2
  title: Yuga Planner
3
- emoji: πŸƒ
4
- colorFrom: green
5
  colorTo: gray
6
- sdk: gradio
7
- sdk_version: 5.32.1
8
- app_file: app.py
9
- pinned: false
10
- license: gpl-3.0
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  title: Yuga Planner
3
+ emoji: 🐍
4
+ colorFrom: purple
5
  colorTo: gray
6
+ sdk: docker
7
+ app_port: 7860
8
+ license: apache-2.0
9
+ tags: ["agent-demo-track"]
 
10
  ---
11
 
12
+ # Yuga Planner 🐍
13
+
14
+ **This project was developed for the [Hugging Face Agents MCP Hackathon](https://huggingface.co/Agents-MCP-Hackathon)!**
15
+
16
+ Yuga Planner is a neuro-symbolic system prototype: it provides an agent-powered team scheduling and task allocation platform built on [Gradio](https://gradio.app/).
17
+
18
+ It takes a project description file such as a README.md file, breaks it down into actionable tasks through a [LLamaIndex](https://www.llamaindex.ai/) agent, then uses [Timefold](http://www.timefold.ai) to generate optimal employee schedules for complex projects.
19
+
20
+ **Demo Video:** [pCloud]()
21
+
22
+ ## πŸš€ Try It Now
23
+ **Live Demo:**
24
+ [https://huggingface.co/spaces/Agents-MCP-Hackathon/yuga-planner](https://huggingface.co/spaces/Agents-MCP-Hackathon/yuga-planner)
25
+
26
+ **Source Code on GitHub:**
27
+ [https://github.com/blackopsrepl/yuga-planner](https://github.com/blackopsrepl/yuga-planner)
28
+
29
+ ### Usage
30
+
31
+ 1. Go to [the live demo](https://huggingface.co/spaces/Agents-MCP-Hackathon/yuga-planner) or [http://localhost:7860](http://localhost:7860)
32
+
33
+ 2. Upload one or more Markdown project file(s), then click "Load Data"
34
+ - Each file will be taken as a separate project
35
+ - The app will parse, decompose, and estimate tasks
36
+ - Click "Solve" to generate an optimal schedule
37
+ - Task order is preserved withing each project
38
+
39
+ 3. When the data is loaded, click "Solve" and view results interactively
40
+
41
+ ## Architecture
42
+
43
+ - **Gradio UI:** Main entry point for users
44
+ - **task_composer_agent:** Uses LLMs to decompose and estimate tasks from Markdown
45
+ - **Data Provider:** Generates synthetic employee data and availability preferences
46
+ - **Constraint Solver:** Assigns tasks to employees, optimizing for skills, availability, and fairness
47
+ - **Utils:** Markdown analysis, secret loading, and more
48
+
49
+ ---
50
+
51
+ ## 🌟 Key Features
52
+ | Feature | Description | Status |
53
+ |---------|-------------|--------|
54
+ | **Markdown Project Parsing** | Automatic extraction of tasks from Markdown docs | βœ… |
55
+ | **LLM-Powered Task Analysis** | [LLamaIndex](https://www.llamaindex.ai/) + [Nebius AI](https://nebius.ai/) for task decomposition & estimation | βœ… |
56
+ | **Constraint-Based Scheduling** | [Timefold](http://www.timefold.ai) optimization engine for schedule assignments | βœ… |
57
+ | **Skills Matching** | Detection of skills required for each task | βœ… |
58
+ | **Task Dependencies** | Sequential workflow modeling | βœ… |
59
+ | **Multiple Projects Support** | Load and schedule multiple projects simultaneously | βœ… |
60
+ | **Live Log Streaming** | Real-time solver progress and status updates in UI | βœ… |
61
+ | **Configurable Parameters** | Adjustable employee count and schedule duration | βœ… |
62
+ | **Mock Project Loading** | Pre-configured sample projects for quick testing | βœ… |
63
+ | **Calendar Parsing** | Extracts tasks from uploaded calendar files (.ics) | βœ… |
64
+ | **MCP Endpoint** | API endpoint for MCP tool integration | βœ… |
65
+
66
+ ## 🧩 MCP Tool Integration
67
+
68
+ Yuga Planner now includes an **MCP tool** endpoint, allowing integration with the Hugging Face MCP platform. The MCP tool can process uploaded calendar files (such as `.ics`) and user messages, extracting events and generating a corresponding task dataframe.
69
+
70
+ > **Note:** The current MCP tool implementation returns the *unsolved* task dataframe (not a scheduled/solved output), as full schedule solving is not yet supported for MCP requests. This allows downstream tools or users to inspect and process the extracted tasks before scheduling is implemented.
71
+
72
+ **Features:**
73
+ - Accepts calendar files and user instructions
74
+ - Parses events into actionable tasks
75
+ - Returns a structured dataframe of tasks (unsolved)
76
+ - Designed for easy integration with agent workflows
77
+
78
+ See the [CHANGELOG.md](CHANGELOG.md) for details on recent MCP-related changes.
79
+
80
+ ### Work in Progress
81
+
82
+ - **Gradio UI overhaul**
83
+ - **General optimization of the workflow**
84
+
85
+ ### Future Work
86
+
87
+ - **RAG:** validation of task decomposition and estimation against industry relevant literature
88
+ - **More granular task dependency:** representation of tasks in a tree instead of a list to allow overlap within projects, where feasible/convenient
89
+ - **Input from GitHub issues:** instead of processing markdown directly, it creates a list by parsing issue
90
+ - **Chat interface:** detection of user intent, with on-the-fly CRUD operations on team, tasks and schedules
91
+ - **Reinforcement learning:** training the agent to improve task decomposition and estimation from GitHub history (e.g. diffs in timestamps, issue comments etc.)
92
+
93
+ ## Prerequisites (Local/GitHub)
94
+
95
+ - Python 3.10
96
+ - Java 17+
97
+ - Docker (optional, for containerized deployment)
98
+ - Nebius API credentials (for LLM-powered features)
99
+
100
+ ### Installation
101
+
102
+ 1. **Clone the repository:**
103
+ ```bash
104
+ git clone https://github.com/blackopsrepl/yuga-planner.git
105
+ cd yuga-planner
106
+ ```
107
+
108
+ 2. **Install dependencies:**
109
+ ```bash
110
+ make install
111
+ ```
112
+
113
+ 3. **Set up environment variables / secrets:**
114
+ ```bash
115
+ make setup-secrets
116
+ # Then edit tests/secrets/cred.py to add your API credentials
117
+ ```
118
+
119
+ 4. **Run the app:**
120
+ ```bash
121
+ make run
122
+ ```
123
+
124
+ #### Docker (Local/GitHub)
125
+
126
+ 1. **Build the image:**
127
+ ```bash
128
+ docker build -t yuga-planner .
129
+ ```
130
+
131
+ 2. **Run the container:**
132
+ ```bash
133
+ docker run -p 7860:786
134
+ ```
135
+
136
+ ---
137
+
138
+ ## Testing
139
+
140
+ - **Run tests:**
141
+ ```bash
142
+ make test
143
+ ```
144
+
145
+ - **Test files:**
146
+ Located in the `tests/` directory.
147
+
148
+ ---
149
+
150
+ ## Python Dependencies
151
+
152
+ See `requirements.txt` for full list.
153
+
154
+ ---
155
+
156
+ ## License
157
+
158
+ This project is licensed under the Apache 2.0 License. See [LICENSE.txt](LICENSE.txt) for details.
159
+
160
+ ---
161
+
162
+ ## Acknowledgements
163
+
164
+ - [Hugging Face](https://huggingface.co/)
165
+ - [Gradio](https://gradio.app/)
166
+ - [Nebius LLM](https://nebius.ai/)
167
+ - [llama-index](https://github.com/jerryjliu/llama_index)
168
+ - [Timefold](https://timefold.ai/)
pytest.ini ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ [pytest]
2
+ pythonpath = src
3
+ testpaths = tests
4
+ python_files = test_*.py
5
+ addopts = -s -v
requirements.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Base requirements
2
+ pytest
3
+ pytest-asyncio
4
+ python-dotenv
5
+ pathlib
6
+ gradio
7
+ gradio[mcp]
8
+ llama-index-core
9
+ llama-index-utils-workflow
10
+ llama-index-llms-nebius
11
+ pandas
12
+ pydantic
13
+ timefold == 1.22.1b0
14
+ icalendar
src/agents/task_composer_agent.py ADDED
@@ -0,0 +1,269 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, asyncio, logging
2
+ from typing import Optional, List
3
+
4
+ from llama_index.llms.nebius import NebiusLLM
5
+ from llama_index.core.prompts import RichPromptTemplate
6
+ from llama_index.core.workflow import (
7
+ StartEvent,
8
+ StopEvent,
9
+ Workflow,
10
+ step,
11
+ Event,
12
+ )
13
+
14
+ from utils.markdown_analyzer import MarkdownAnalyzer
15
+ from agents.task_processing import (
16
+ remove_markdown_code_blocks,
17
+ remove_markdown_list_elements,
18
+ unwrap_tasks_from_generated,
19
+ log_task_duration_breakdown,
20
+ log_total_time,
21
+ )
22
+
23
+ logging.basicConfig(level=logging.INFO)
24
+ logger: logging.Logger = logging.getLogger(__name__)
25
+
26
+
27
+ from domain import AgentsConfig, AGENTS_CONFIG
28
+
29
+
30
+ class TaskComposerAgent:
31
+ def __init__(self, config: AgentsConfig = AGENTS_CONFIG):
32
+ self.config = config
33
+ self.llm: Optional[NebiusLLM] = None
34
+ self.task_splitter_template: Optional[RichPromptTemplate] = None
35
+ self.task_evaluator_template: Optional[RichPromptTemplate] = None
36
+ self.task_deps_matcher_template: Optional[RichPromptTemplate] = None
37
+ self.workflow: Optional[TaskComposerWorkflow] = None
38
+
39
+ self.set_llm()
40
+ self.set_prompt_templates()
41
+ self.set_workflow()
42
+
43
+ def set_llm(self) -> None:
44
+ self.llm = NebiusLLM(
45
+ model=self.config.nebius_model,
46
+ api_key=self.config.nebius_api_key,
47
+ timeout=self.config.timeout,
48
+ max_retries=self.config.max_retries,
49
+ verify_ssl=self.config.verify_ssl,
50
+ request_timeout=self.config.request_timeout,
51
+ max_tokens=self.config.max_tokens,
52
+ temperature=self.config.temperature,
53
+ )
54
+
55
+ def set_prompt_templates(self) -> None:
56
+ self.task_splitter_template = RichPromptTemplate(
57
+ self.config.task_splitter_prompt,
58
+ template_var_mappings={"query_str": "query"},
59
+ )
60
+ self.task_evaluator_template = RichPromptTemplate(
61
+ self.config.task_evaluator_prompt,
62
+ template_var_mappings={"query_str": "query"},
63
+ )
64
+ self.task_deps_matcher_template = RichPromptTemplate(
65
+ self.config.task_deps_matcher_prompt,
66
+ template_var_mappings={
67
+ "query_str": "task",
68
+ "skills_str": "skills",
69
+ "context_str": "context",
70
+ },
71
+ )
72
+
73
+ def set_workflow(self) -> None:
74
+ self.workflow = TaskComposerWorkflow(
75
+ llm=self.llm,
76
+ task_splitter_template=self.task_splitter_template,
77
+ task_evaluator_template=self.task_evaluator_template,
78
+ task_deps_matcher_template=self.task_deps_matcher_template,
79
+ timeout=self.config.workflow_timeout,
80
+ verbose=True,
81
+ )
82
+
83
+ async def run_workflow(
84
+ self, query: str, skills: Optional[List[str]] = None, context: str = ""
85
+ ) -> str:
86
+ return await self.workflow.run(
87
+ input=query, skills=skills or [], context=context
88
+ )
89
+
90
+
91
+ class TaskSplitter(Event):
92
+ task_splitter_output: str
93
+ skills: List[str]
94
+ context: str
95
+
96
+
97
+ class TaskEvaluator(Event):
98
+ task_evaluator_output: list[tuple[str, str]]
99
+ skills: List[str]
100
+ context: str
101
+
102
+
103
+ class TaskDependencyMatcher(Event):
104
+ task_dependency_output: list[
105
+ tuple[str, str, str]
106
+ ] # (task, duration, matched_skill)
107
+
108
+
109
+ class TaskComposerWorkflow(Workflow):
110
+ def __init__(
111
+ self,
112
+ llm: NebiusLLM,
113
+ task_splitter_template: RichPromptTemplate,
114
+ task_evaluator_template: RichPromptTemplate,
115
+ task_deps_matcher_template: RichPromptTemplate,
116
+ **kwargs,
117
+ ):
118
+ super().__init__(**kwargs)
119
+ self._llm = llm
120
+ self._task_splitter_template = task_splitter_template
121
+ self._task_evaluator_template = task_evaluator_template
122
+ self._task_deps_matcher_template = task_deps_matcher_template
123
+
124
+ @step
125
+ async def split_tasks(self, event: StartEvent) -> TaskSplitter:
126
+ logger.info("=== Step 1: Task Breakdown ===")
127
+ logger.info(f"Input task: {event.input}")
128
+
129
+ formatted_prompt: str = self._task_splitter_template.format(query=event.input)
130
+
131
+ response = await asyncio.wait_for(
132
+ asyncio.to_thread(self._llm.complete, formatted_prompt), timeout=30.0
133
+ )
134
+
135
+ logger.info("Task breakdown:")
136
+ logger.info(response.text)
137
+
138
+ # Get skills and context from the event, default to empty if not provided
139
+ skills = getattr(event, "skills", [])
140
+ context = getattr(event, "context", "")
141
+
142
+ logger.info(f"Received skills: {skills}")
143
+ logger.info(f"Received context: {context}")
144
+
145
+ return TaskSplitter(
146
+ task_splitter_output=response.text, skills=skills, context=context
147
+ )
148
+
149
+ @step
150
+ async def evaluate_tasks_duration(self, event: TaskSplitter) -> TaskEvaluator:
151
+ logger.info("=== Step 2: Time Estimation ===")
152
+ logger.info("Using task breakdown from Step 1:")
153
+ logger.info(event.task_splitter_output)
154
+
155
+ content: str = remove_markdown_code_blocks(event.task_splitter_output)
156
+ analyzer: MarkdownAnalyzer = MarkdownAnalyzer(content)
157
+ result: list = analyzer.identify_lists()["Unordered list"]
158
+ tasks: list[str] = unwrap_tasks_from_generated(result)
159
+
160
+ logger.info(f"Processing {len(tasks)} tasks for time estimation...")
161
+
162
+ merged_tasks: list[tuple[str, str]] = []
163
+ for i, task in enumerate(tasks, 1):
164
+ try:
165
+ formatted_prompt: str = self._task_evaluator_template.format(query=task)
166
+
167
+ response = await asyncio.wait_for(
168
+ asyncio.to_thread(self._llm.complete, formatted_prompt),
169
+ timeout=30.0,
170
+ )
171
+ merged_tasks.append((task, response.text))
172
+ logger.info(f"Completed time estimation {i}/{len(tasks)}")
173
+
174
+ except asyncio.TimeoutError:
175
+ logger.warning(f"Time estimation timeout for task {i}: {task[:50]}...")
176
+
177
+ # Use default duration of 2 units (1 hour)
178
+ merged_tasks.append((task, "2"))
179
+
180
+ except Exception as e:
181
+ logger.error(f"Error estimating time for task {i}: {e}")
182
+
183
+ # Use default duration of 2 units (1 hour)
184
+ merged_tasks.append((task, "2"))
185
+
186
+ # remove markdown list elements wrapped in **
187
+ merged_tasks = remove_markdown_list_elements(merged_tasks)
188
+ log_task_duration_breakdown(merged_tasks)
189
+ log_total_time(merged_tasks)
190
+
191
+ return TaskEvaluator(
192
+ task_evaluator_output=merged_tasks,
193
+ skills=event.skills,
194
+ context=event.context,
195
+ )
196
+
197
+ @step
198
+ async def evaluate_tasks_dependencies(
199
+ self, event: TaskEvaluator
200
+ ) -> TaskDependencyMatcher:
201
+ logger.info("=== Step 3: Task Dependencies ===")
202
+ logger.info("Matching tasks with available skills")
203
+
204
+ # Get skills and context from the event
205
+ skills = event.skills
206
+ context = event.context
207
+
208
+ if not skills:
209
+ logger.warning("No skills provided, skipping dependency matching")
210
+ # Convert to dependency format with empty skill
211
+ task_dependencies = [
212
+ (task, duration, "") for task, duration in event.task_evaluator_output
213
+ ]
214
+ return TaskDependencyMatcher(task_dependency_output=task_dependencies)
215
+
216
+ skills_str = "\n".join([f"- {skill}" for skill in skills])
217
+ logger.info(f"Available skills: {skills}")
218
+ logger.info(f"Context: {context}")
219
+
220
+ task_dependencies: list[tuple[str, str, str]] = []
221
+ logger.info(
222
+ f"Processing {len(event.task_evaluator_output)} tasks for skill matching..."
223
+ )
224
+
225
+ for i, (task, duration) in enumerate(event.task_evaluator_output, 1):
226
+ try:
227
+ formatted_prompt: str = self._task_deps_matcher_template.format(
228
+ task=task, skills=skills_str, context=context
229
+ )
230
+
231
+ response = await asyncio.wait_for(
232
+ asyncio.to_thread(self._llm.complete, formatted_prompt),
233
+ timeout=30.0,
234
+ )
235
+
236
+ matched_skill = response.text.strip()
237
+ task_dependencies.append((task, duration, matched_skill))
238
+ logger.info(
239
+ f"Completed skill matching {i}/{len(event.task_evaluator_output)}: {task[:50]}... -> {matched_skill}"
240
+ )
241
+
242
+ except asyncio.TimeoutError:
243
+ logger.warning(f"Skill matching timeout for task {i}: {task[:50]}...")
244
+
245
+ # Use first available skill as fallback
246
+ fallback_skill = skills[0] if skills else ""
247
+ task_dependencies.append((task, duration, fallback_skill))
248
+
249
+ except Exception as e:
250
+ logger.error(f"Error matching skill for task {i}: {e}")
251
+
252
+ # Use first available skill as fallback
253
+ fallback_skill = skills[0] if skills else ""
254
+ task_dependencies.append((task, duration, fallback_skill))
255
+
256
+ return TaskDependencyMatcher(task_dependency_output=task_dependencies)
257
+
258
+ @step
259
+ async def result_output(self, event: TaskDependencyMatcher) -> StopEvent:
260
+ logger.info("=== Step 4: Final Result ===")
261
+
262
+ # Log the final breakdown with dependencies
263
+ for task, duration, skill in event.task_dependency_output:
264
+ logger.info(f"Task: {task}")
265
+ logger.info(f" Duration: {duration} units")
266
+ logger.info(f" Matched Skill: {skill}")
267
+ logger.info("-" * 50)
268
+
269
+ return StopEvent(result=event.task_dependency_output)
src/agents/task_processing.py ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re, logging
2
+
3
+ from utils.markdown_analyzer import MarkdownAnalyzer
4
+
5
+ logging.basicConfig(level=logging.INFO)
6
+ logger = logging.getLogger(__name__)
7
+
8
+
9
+ ### MARKDOWN UTILS ###
10
+ def remove_markdown_code_blocks(text: str) -> str:
11
+ """
12
+ Remove markdown code block syntax from text.
13
+
14
+ Args:
15
+ text (str): Text that may contain markdown code block syntax
16
+
17
+ Returns:
18
+ str: Text with markdown code block syntax removed
19
+ """
20
+ content = text
21
+
22
+ if content.startswith("```markdown"):
23
+ content = content[11:] # Remove ```markdown
24
+
25
+ if content.endswith("```"):
26
+ content = content[:-3] # Remove ```
27
+
28
+ return content.strip()
29
+
30
+
31
+ def remove_markdown_list_headers(
32
+ merged_tasks: list[tuple[str, str]]
33
+ ) -> list[tuple[str, str]]:
34
+ """
35
+ Remove list headers (e.g. **element**) from task descriptions.
36
+
37
+ Args:
38
+ merged_tasks (list[tuple[str, str]]): List of (task, duration) tuples
39
+
40
+ Returns:
41
+ list[tuple[str, str]]: List of (task, duration) tuples with headers removed
42
+ """
43
+ cleaned_tasks: list[tuple[str, str]] = []
44
+
45
+ for task, duration in merged_tasks:
46
+ # Use MarkdownAnalyzer to parse and clean the task text
47
+ analyzer: MarkdownAnalyzer = MarkdownAnalyzer(task)
48
+
49
+ # Get the text content without any markdown formatting
50
+ cleaned_task: str = analyzer.text.strip()
51
+ cleaned_tasks.append((cleaned_task, duration))
52
+
53
+ return cleaned_tasks
54
+
55
+
56
+ def remove_markdown_list_elements(
57
+ merged_tasks: list[tuple[str, str]]
58
+ ) -> list[tuple[str, str]]:
59
+ """
60
+ Remove markdown list elements that start and end with ** from task descriptions.
61
+ If a task is entirely wrapped in **, remove the entire task.
62
+
63
+ Args:
64
+ merged_tasks (list[tuple[str, str]]): List of (task, duration) tuples
65
+
66
+ Returns:
67
+ list[tuple[str, str]]: List of (task, duration) tuples with markdown list elements removed
68
+ """
69
+ cleaned_tasks = []
70
+ for task, duration in merged_tasks:
71
+ # Skip tasks that are wrapped in **
72
+ if task.strip().startswith("**") or task.strip().endswith("**"):
73
+ continue
74
+
75
+ cleaned_tasks.append((task, duration))
76
+
77
+ return cleaned_tasks
78
+
79
+
80
+ def unwrap_tasks_from_generated(result: list) -> list:
81
+ """
82
+ Extract task text from the generated markdown list structure.
83
+
84
+ Args:
85
+ result (list): List containing markdown list structure
86
+
87
+ Returns:
88
+ list: List of task text strings
89
+ """
90
+ tasks = []
91
+
92
+ # Input validation: check if result is a list
93
+ if not isinstance(result, list):
94
+ logger.error("Error: 'Unordered list' is not a list!")
95
+ return tasks
96
+
97
+ # We expect result to be a list of lists, with only one entry
98
+ if not isinstance(result[0], list):
99
+ logger.error("Error: The first element of the result is not a list!")
100
+ return tasks
101
+
102
+ # Unwrap the inner list of dictionaries
103
+ for task in result[0]:
104
+ if isinstance(task, dict) and "text" in task:
105
+ tasks.append(task["text"])
106
+ else:
107
+ logger.warning(f"Unexpected task format: {task}")
108
+
109
+ return tasks
110
+
111
+
112
+ ### LOGGING ###
113
+ def log_task_duration_breakdown(merged_tasks: list[tuple[str, str]]) -> None:
114
+ """
115
+ Log the duration breakdown for each task.
116
+
117
+ Args:
118
+ merged_tasks (list[tuple[str, str]]): List of (task, duration) tuples
119
+ """
120
+ logger.info("Task duration breakdown:")
121
+
122
+ for task, duration in merged_tasks:
123
+ logger.info(f"- {task}: {duration} units")
124
+
125
+
126
+ def safe_int(val):
127
+ try:
128
+ return int(val)
129
+ except (ValueError, TypeError):
130
+ return 0
131
+
132
+
133
+ def log_total_time(merged_tasks: list[tuple[str, str]]) -> None:
134
+ """
135
+ Log the total estimated time for all tasks.
136
+
137
+ Args:
138
+ merged_tasks (list[tuple[str, str]]): List of (task, duration) tuples
139
+ """
140
+ total_time = sum(safe_int(time) for _, time in merged_tasks)
141
+
142
+ logger.info("Estimated time:")
143
+ logger.info(f"{total_time} units (30 minutes each)")
src/app.py ADDED
@@ -0,0 +1,319 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, argparse, logging
2
+ import gradio as gr
3
+
4
+ logging.basicConfig(level=logging.INFO)
5
+
6
+
7
+ from utils.load_secrets import load_secrets
8
+
9
+ if not os.getenv("NEBIUS_API_KEY") or not os.getenv("NEBIUS_MODEL"):
10
+ load_secrets("tests/secrets/creds.py")
11
+
12
+
13
+ from handlers import (
14
+ load_data,
15
+ show_solved,
16
+ start_timer,
17
+ auto_poll,
18
+ show_mock_project_content,
19
+ )
20
+
21
+ from mcp_handlers import process_message_and_attached_file
22
+
23
+ from services import MockProjectService
24
+
25
+ # Store last chat message and file in global variables (for demo purposes)
26
+ last_message_body = None
27
+ last_attached_file = None
28
+
29
+
30
+ # =========================
31
+ # APP
32
+ # =========================
33
+
34
+
35
+ def app(debug: bool = False):
36
+ with gr.Blocks() as demo:
37
+ gr.Markdown(
38
+ """
39
+ # Yuga Planner
40
+ Yuga Planner is a neuro-symbolic system prototype: it provides an agent-powered team scheduling and task allocation platform built on [Gradio](https://gradio.app/).
41
+ """
42
+ )
43
+
44
+ with gr.Tab("Information"):
45
+
46
+ def get_server_url():
47
+ try:
48
+ return gr.get_state().server_url + "/gradio_api/mcp/sse"
49
+ except:
50
+ return "http://localhost:7860/gradio_api/mcp/sse"
51
+
52
+ gr.Markdown(
53
+ f"""
54
+ This is a demo of the Yuga Planner system.
55
+
56
+ To use as an MCP server:
57
+ 1. Register the MCP server with your client using the URL:
58
+ ```
59
+ {get_server_url()}
60
+ ```
61
+ 2. Call the tool from your client. Example:
62
+ ```
63
+ use yuga planner tool @tests/data/calendar.ics
64
+ Task Description: Create a new AWS VPC
65
+ ```
66
+
67
+ """
68
+ )
69
+
70
+ with gr.Tab("Task Scheduling"):
71
+ gr.Markdown("### SWE Team Task Scheduling Demo")
72
+
73
+ gr.Markdown(
74
+ """
75
+ ## Instructions
76
+ 1. Choose a project source - either upload your own project file(s) or select from our mock projects
77
+ 2. Click 'Load Data' to parse, decompose, and estimate tasks
78
+ 3. Click 'Solve' to generate an optimal schedule based on employee skills and availability
79
+ 4. Review the results in the tables below
80
+ """
81
+ )
82
+
83
+ # Project source selector
84
+ project_source = gr.Radio(
85
+ choices=["Upload Project Files", "Use Mock Projects"],
86
+ value="Upload Project Files",
87
+ label="Project Source",
88
+ )
89
+
90
+ # Configuration parameters
91
+ with gr.Row():
92
+ employee_count = gr.Number(
93
+ label="Number of Employees",
94
+ value=12,
95
+ minimum=1,
96
+ maximum=100,
97
+ step=1,
98
+ precision=0,
99
+ )
100
+ days_in_schedule = gr.Number(
101
+ label="Days in Schedule",
102
+ value=365,
103
+ minimum=1,
104
+ maximum=365,
105
+ step=1,
106
+ precision=0,
107
+ )
108
+
109
+ # File upload component (initially visible)
110
+ with gr.Group(visible=True) as file_upload_group:
111
+ file_upload = gr.File(
112
+ label="Upload Project Files (Markdown)",
113
+ file_types=[".md"],
114
+ file_count="multiple",
115
+ )
116
+
117
+ # Mock projects dropdown (initially hidden)
118
+ with gr.Group(visible=False) as mock_projects_group:
119
+ # Get mock project names from ProjectService
120
+ available_projects = MockProjectService.get_available_project_names()
121
+ mock_project_dropdown = gr.Dropdown(
122
+ choices=available_projects,
123
+ label="Select Mock Projects (multiple selection allowed)",
124
+ value=[available_projects[0]] if available_projects else [],
125
+ multiselect=True,
126
+ )
127
+
128
+ # Accordion for viewing mock project content
129
+ with gr.Accordion("πŸ“‹ Project Content Preview", open=False):
130
+ mock_project_content_accordion = gr.Textbox(
131
+ label="Project Content",
132
+ interactive=False,
133
+ lines=15,
134
+ max_lines=20,
135
+ show_copy_button=True,
136
+ placeholder="Select projects above and expand this section to view content...",
137
+ )
138
+
139
+ # Auto-update content when projects change
140
+ mock_project_dropdown.change(
141
+ show_mock_project_content,
142
+ inputs=[mock_project_dropdown],
143
+ outputs=[mock_project_content_accordion],
144
+ )
145
+
146
+ # Log Terminal - Always visible for streaming logs
147
+ gr.Markdown("## Live Log Terminal")
148
+ log_terminal = gr.Textbox(
149
+ label="Processing Logs",
150
+ interactive=False,
151
+ lines=8,
152
+ max_lines=15,
153
+ show_copy_button=True,
154
+ placeholder="Logs will appear here during data loading...",
155
+ )
156
+
157
+ # Toggle visibility based on project source selection
158
+ def toggle_visibility(choice):
159
+ if choice == "Upload Project Files":
160
+ return gr.update(visible=True), gr.update(visible=False)
161
+ else:
162
+ return gr.update(visible=False), gr.update(visible=True)
163
+
164
+ project_source.change(
165
+ toggle_visibility,
166
+ inputs=[project_source],
167
+ outputs=[file_upload_group, mock_projects_group],
168
+ )
169
+
170
+ # State for LLM output, persists per session
171
+ llm_output_state = gr.State(value=None)
172
+ job_id_state = gr.State(value=None)
173
+ status_text = gr.Textbox(
174
+ label="Solver Status",
175
+ interactive=False,
176
+ lines=8,
177
+ max_lines=20,
178
+ show_copy_button=True,
179
+ )
180
+
181
+ with gr.Row():
182
+ load_btn = gr.Button("Load Data")
183
+ solve_btn = gr.Button("Solve", interactive=False) # Initially disabled
184
+
185
+ gr.Markdown("## Employees")
186
+ employees_table = gr.Dataframe(label="Employees", interactive=False)
187
+
188
+ gr.Markdown("## Tasks")
189
+ schedule_table = gr.Dataframe(label="Tasks Table", interactive=False)
190
+
191
+ # Outputs: always keep state as last output
192
+ outputs = [
193
+ employees_table,
194
+ schedule_table,
195
+ job_id_state,
196
+ status_text,
197
+ llm_output_state,
198
+ log_terminal,
199
+ ]
200
+
201
+ # Outputs for load_data that also enables solve button
202
+ load_outputs = outputs + [solve_btn]
203
+
204
+ # Create wrapper function to pass debug flag to auto_poll
205
+ async def auto_poll_with_debug(job_id, llm_output):
206
+ return await auto_poll(job_id, llm_output, debug=debug)
207
+
208
+ # Timer for polling (not related to state)
209
+ timer = gr.Timer(2, active=False)
210
+ timer.tick(
211
+ auto_poll_with_debug,
212
+ inputs=[job_id_state, llm_output_state],
213
+ outputs=outputs,
214
+ )
215
+
216
+ # Create wrapper function to pass debug flag to load_data
217
+ async def load_data_with_debug(
218
+ project_source,
219
+ file_obj,
220
+ mock_projects,
221
+ employee_count,
222
+ days_in_schedule,
223
+ llm_output,
224
+ progress=gr.Progress(),
225
+ ):
226
+ async for result in load_data(
227
+ project_source,
228
+ file_obj,
229
+ mock_projects,
230
+ employee_count,
231
+ days_in_schedule,
232
+ llm_output,
233
+ debug=debug,
234
+ progress=progress,
235
+ ):
236
+ yield result
237
+
238
+ # Use state as both input and output
239
+ load_btn.click(
240
+ load_data_with_debug,
241
+ inputs=[
242
+ project_source,
243
+ file_upload,
244
+ mock_project_dropdown,
245
+ employee_count,
246
+ days_in_schedule,
247
+ llm_output_state,
248
+ ],
249
+ outputs=load_outputs,
250
+ api_name="load_data",
251
+ )
252
+
253
+ # Create wrapper function to pass debug flag to show_solved
254
+ async def show_solved_with_debug(state_data, job_id):
255
+ return await show_solved(state_data, job_id, debug=debug)
256
+
257
+ solve_btn.click(
258
+ show_solved_with_debug,
259
+ inputs=[llm_output_state, job_id_state],
260
+ outputs=outputs,
261
+ ).then(start_timer, inputs=[job_id_state, llm_output_state], outputs=timer)
262
+
263
+ if debug:
264
+
265
+ def debug_set_state(state):
266
+ logging.info(f"DEBUG: Setting state to test_value")
267
+ return "Debug: State set!", "test_value"
268
+
269
+ def debug_show_state(state):
270
+ logging.info(f"DEBUG: Current state is {state}")
271
+ return f"Debug: Current state: {state}", gr.update()
272
+
273
+ debug_out = gr.Textbox(label="Debug Output")
274
+ debug_set_btn = gr.Button("Debug Set State")
275
+ debug_show_btn = gr.Button("Debug Show State")
276
+
277
+ debug_set_btn.click(
278
+ debug_set_state,
279
+ inputs=[llm_output_state],
280
+ outputs=[debug_out, llm_output_state],
281
+ )
282
+ debug_show_btn.click(
283
+ debug_show_state,
284
+ inputs=[llm_output_state],
285
+ outputs=[debug_out, gr.State()],
286
+ )
287
+
288
+ # Register the MCP tool as an API endpoint
289
+ gr.api(process_message_and_attached_file)
290
+
291
+ return demo
292
+
293
+
294
+ if __name__ == "__main__":
295
+ parser = argparse.ArgumentParser(
296
+ description="Yuga Planner - Team Scheduling Application"
297
+ )
298
+ parser.add_argument(
299
+ "--debug",
300
+ action="store_true",
301
+ help="Enable debug mode with additional UI controls and logging",
302
+ )
303
+ parser.add_argument(
304
+ "--server-name",
305
+ default="0.0.0.0",
306
+ help="Server name/IP to bind to (default: 0.0.0.0)",
307
+ )
308
+ parser.add_argument(
309
+ "--server-port",
310
+ type=int,
311
+ default=7860,
312
+ help="Server port to bind to (default: 7860)",
313
+ )
314
+
315
+ args = parser.parse_args()
316
+
317
+ app(debug=args.debug).launch(
318
+ server_name=args.server_name, server_port=args.server_port, mcp_server=True
319
+ )
src/constraint_solvers/timetable/__init__.py ADDED
File without changes
src/constraint_solvers/timetable/analysis/__init__.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Constraint violation analysis module.
3
+
4
+ This module provides tools for analyzing constraint violations in Timefold solver results.
5
+ """
6
+
7
+ from .violation_analyzer import ConstraintViolationAnalyzer
8
+
9
+ __all__ = ["ConstraintViolationAnalyzer"]
src/constraint_solvers/timetable/analysis/violation_analyzer.py ADDED
@@ -0,0 +1,185 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, List, Set
2
+ from ..domain import EmployeeSchedule, Task, Employee
3
+
4
+
5
+ class ConstraintViolationAnalyzer:
6
+ """
7
+ Service for analyzing constraint violations in scheduling solutions.
8
+
9
+ This service implements automatic detection of infeasible scheduling problems.
10
+ When the Timefold solver cannot satisfy all hard constraints, it returns a
11
+ solution with a negative hard score. This service analyzes such solutions to
12
+ provide users with specific, actionable feedback about why their scheduling
13
+ problem cannot be solved.
14
+ """
15
+
16
+ @staticmethod
17
+ def analyze_constraint_violations(schedule: EmployeeSchedule) -> str:
18
+ """
19
+ Analyze constraint violations in a schedule and provide detailed feedback.
20
+
21
+ Args:
22
+ schedule: The schedule to analyze
23
+
24
+ Returns:
25
+ Detailed string describing constraint violations and suggestions
26
+ """
27
+ if not schedule.score or schedule.score.hard_score >= 0:
28
+ return "No constraint violations detected."
29
+
30
+ violations = []
31
+
32
+ # Check for missing skills
33
+ skill_violations = ConstraintViolationAnalyzer._check_skill_violations(schedule)
34
+ if skill_violations:
35
+ violations.extend(skill_violations)
36
+
37
+ # Check for insufficient time
38
+ time_violations = ConstraintViolationAnalyzer._check_time_violations(schedule)
39
+ if time_violations:
40
+ violations.extend(time_violations)
41
+
42
+ # Check for availability conflicts
43
+ availability_violations = (
44
+ ConstraintViolationAnalyzer._check_availability_violations(schedule)
45
+ )
46
+ if availability_violations:
47
+ violations.extend(availability_violations)
48
+
49
+ # Check for sequencing issues
50
+ sequence_violations = ConstraintViolationAnalyzer._check_sequence_violations(
51
+ schedule
52
+ )
53
+ if sequence_violations:
54
+ violations.extend(sequence_violations)
55
+
56
+ if not violations:
57
+ violations.append("Unknown constraint violations detected.")
58
+
59
+ return "\n".join(violations)
60
+
61
+ @staticmethod
62
+ def _check_skill_violations(schedule: EmployeeSchedule) -> List[str]:
63
+ """Check for tasks that require skills not available in the employee pool"""
64
+ violations = []
65
+
66
+ # Get all available skills
67
+ available_skills: Set[str] = set()
68
+ for employee in schedule.employees:
69
+ available_skills.update(employee.skills)
70
+
71
+ # Check for tasks requiring unavailable skills
72
+ unassigned_tasks = [task for task in schedule.tasks if not task.employee]
73
+ missing_skills: Set[str] = set()
74
+
75
+ for task in unassigned_tasks:
76
+ if task.required_skill not in available_skills:
77
+ missing_skills.add(task.required_skill)
78
+
79
+ if missing_skills:
80
+ violations.append(
81
+ f"β€’ Missing Skills: No employees have these required skills: {', '.join(sorted(missing_skills))}"
82
+ )
83
+
84
+ return violations
85
+
86
+ @staticmethod
87
+ def _check_time_violations(schedule: EmployeeSchedule) -> List[str]:
88
+ """Check for insufficient time to complete all tasks"""
89
+ violations = []
90
+
91
+ total_task_slots = sum(task.duration_slots for task in schedule.tasks)
92
+ total_available_slots = (
93
+ len(schedule.employees) * schedule.schedule_info.total_slots
94
+ )
95
+
96
+ if total_task_slots > total_available_slots:
97
+ total_task_hours = total_task_slots / 2 # Convert slots to hours
98
+ total_available_hours = total_available_slots / 2
99
+ violations.append(
100
+ f"β€’ Insufficient Time: Tasks require {total_task_hours:.1f} hours total, "
101
+ f"but only {total_available_hours:.1f} hours available across all employees"
102
+ )
103
+
104
+ return violations
105
+
106
+ @staticmethod
107
+ def _check_availability_violations(schedule: EmployeeSchedule) -> List[str]:
108
+ """Check for tasks scheduled during employee unavailable periods"""
109
+ violations = []
110
+
111
+ for task in schedule.tasks:
112
+ if task.employee and hasattr(task.employee, "unavailable_dates"):
113
+ # This would need actual date calculation based on start_slot
114
+ # For now, we'll just note if there are unassigned tasks with availability constraints
115
+ pass
116
+
117
+ unassigned_count = len([task for task in schedule.tasks if not task.employee])
118
+ if unassigned_count > 0:
119
+ violations.append(
120
+ f"β€’ Unassigned Tasks: {unassigned_count} task(s) could not be assigned to any employee"
121
+ )
122
+
123
+ return violations
124
+
125
+ @staticmethod
126
+ def _check_sequence_violations(schedule: EmployeeSchedule) -> List[str]:
127
+ """Check for project sequencing constraint violations"""
128
+ violations = []
129
+
130
+ # Group tasks by project
131
+ project_tasks: Dict[str, List[Task]] = {}
132
+ for task in schedule.tasks:
133
+ project_id = getattr(task, "project_id", "")
134
+ if project_id:
135
+ if project_id not in project_tasks:
136
+ project_tasks[project_id] = []
137
+ project_tasks[project_id].append(task)
138
+
139
+ # Check sequencing within each project
140
+ for project_id, tasks in project_tasks.items():
141
+ if len(tasks) > 1:
142
+ # Sort by sequence number
143
+ sorted_tasks = sorted(
144
+ tasks, key=lambda t: getattr(t, "sequence_number", 0)
145
+ )
146
+
147
+ # Check if tasks are assigned and properly sequenced
148
+ for i in range(len(sorted_tasks) - 1):
149
+ current_task = sorted_tasks[i]
150
+ next_task = sorted_tasks[i + 1]
151
+
152
+ if not current_task.employee or not next_task.employee:
153
+ continue # Skip unassigned tasks
154
+
155
+ # Check if next task starts after current task ends
156
+ if next_task.start_slot < (
157
+ current_task.start_slot + current_task.duration_slots
158
+ ):
159
+ violations.append(
160
+ f"β€’ Sequence Violation: In project '{project_id}', task sequence is violated"
161
+ )
162
+ break
163
+
164
+ return violations
165
+
166
+ @staticmethod
167
+ def generate_suggestions(schedule: EmployeeSchedule) -> List[str]:
168
+ """Generate actionable suggestions for fixing constraint violations"""
169
+ suggestions = []
170
+
171
+ if not schedule.score or schedule.score.hard_score >= 0:
172
+ return suggestions
173
+
174
+ # Basic suggestions based on common issues
175
+ suggestions.extend(
176
+ [
177
+ "Add more employees with required skills",
178
+ "Increase the scheduling time window (more days)",
179
+ "Reduce task requirements or durations",
180
+ "Check employee availability constraints",
181
+ "Review project sequencing requirements",
182
+ ]
183
+ )
184
+
185
+ return suggestions
src/constraint_solvers/timetable/constraints.py ADDED
@@ -0,0 +1,223 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### GENERAL IMPORTS ###
2
+ from datetime import date, timedelta
3
+
4
+ ### DOMAIN ###
5
+ from .domain import Employee, Task, ScheduleInfo
6
+
7
+ ### TIMEFOLD ###
8
+ from timefold.solver.score import HardSoftDecimalScore
9
+ from timefold.solver.score._constraint_factory import ConstraintFactory
10
+ from timefold.solver.score._joiners import Joiners
11
+ from timefold.solver.score._group_by import ConstraintCollectors
12
+ from timefold.solver.score._annotations import constraint_provider
13
+
14
+
15
+ def get_slot_overlap(task1: Task, task2: Task) -> int:
16
+ """Calculate the number of overlapping slots between two tasks.
17
+
18
+ Args:
19
+ task1 (Task): The first task.
20
+ task2 (Task): The second task.
21
+
22
+ Returns:
23
+ int: The number of overlapping slots.
24
+ """
25
+ task1_end: int = task1.start_slot + task1.duration_slots
26
+ task2_end: int = task2.start_slot + task2.duration_slots
27
+ overlap_start: int = max(task1.start_slot, task2.start_slot)
28
+ overlap_end: int = min(task1_end, task2_end)
29
+ return max(0, overlap_end - overlap_start)
30
+
31
+
32
+ def get_slot_date(slot: int) -> date:
33
+ """Convert a slot index to a date.
34
+
35
+ Args:
36
+ slot (int): The slot index.
37
+
38
+ Returns:
39
+ date: The date corresponding to the slot.
40
+ """
41
+ return date.today() + timedelta(days=slot // 20) # 20 slots per day
42
+
43
+
44
+ def tasks_violate_sequence_order(task1: Task, task2: Task) -> bool:
45
+ """Check if two tasks violate the project sequence order.
46
+
47
+ Args:
48
+ task1 (Task): The first task.
49
+ task2 (Task): The second task.
50
+
51
+ Returns:
52
+ bool: True if task1 should come before task2 but overlaps with it.
53
+ """
54
+ # Different tasks only
55
+ if task1.id == task2.id:
56
+ return False
57
+
58
+ # Both tasks must have project_id attribute
59
+ if not (hasattr(task1, "project_id") and hasattr(task2, "project_id")):
60
+ return False
61
+
62
+ # Task1 must belong to a project
63
+ if task1.project_id == "":
64
+ return False
65
+
66
+ # Tasks must be in the same project
67
+ if task1.project_id != task2.project_id:
68
+ return False
69
+
70
+ # Task1 must have lower sequence number (should come first)
71
+ if task1.sequence_number >= task2.sequence_number:
72
+ return False
73
+
74
+ # Task1 overlaps with task2 (task1 should finish before task2 starts)
75
+ return task1.start_slot + task1.duration_slots > task2.start_slot
76
+
77
+
78
+ @constraint_provider
79
+ def define_constraints(constraint_factory: ConstraintFactory) -> list:
80
+ """
81
+ Define the constraints for the timetable problem.
82
+
83
+ Args:
84
+ constraint_factory (ConstraintFactory): The constraint factory.
85
+
86
+ Returns:
87
+ list[Constraint]: The constraints.
88
+ """
89
+ return [
90
+ # Hard constraints
91
+ required_skill(constraint_factory),
92
+ no_overlapping_tasks(constraint_factory),
93
+ task_within_schedule(constraint_factory),
94
+ task_fits_in_schedule(constraint_factory),
95
+ unavailable_employee(constraint_factory),
96
+ maintain_project_task_order(constraint_factory),
97
+ # Soft constraints
98
+ undesired_day_for_employee(constraint_factory),
99
+ desired_day_for_employee(constraint_factory),
100
+ balance_employee_task_assignments(constraint_factory),
101
+ ]
102
+
103
+
104
+ ### CONSTRAINTS ###
105
+ def required_skill(constraint_factory: ConstraintFactory):
106
+ return (
107
+ constraint_factory.for_each(Task)
108
+ .filter(
109
+ lambda task: task.employee is not None
110
+ and task.required_skill not in task.employee.skills
111
+ )
112
+ .penalize(HardSoftDecimalScore.ONE_HARD)
113
+ .as_constraint("Required skill")
114
+ )
115
+
116
+
117
+ def no_overlapping_tasks(constraint_factory: ConstraintFactory):
118
+ return (
119
+ constraint_factory.for_each_unique_pair(
120
+ Task,
121
+ Joiners.equal(lambda task: task.employee.name),
122
+ Joiners.overlapping(
123
+ lambda task: task.start_slot,
124
+ lambda task: task.start_slot + task.duration_slots,
125
+ ),
126
+ )
127
+ .penalize(HardSoftDecimalScore.ONE_HARD, get_slot_overlap)
128
+ .as_constraint("No overlapping tasks")
129
+ )
130
+
131
+
132
+ def task_within_schedule(constraint_factory: ConstraintFactory):
133
+ return (
134
+ constraint_factory.for_each(Task)
135
+ .filter(lambda task: task.start_slot < 0)
136
+ .penalize(HardSoftDecimalScore.ONE_HARD)
137
+ .as_constraint("Task within schedule")
138
+ )
139
+
140
+
141
+ def task_fits_in_schedule(constraint_factory: ConstraintFactory):
142
+ return (
143
+ constraint_factory.for_each(Task)
144
+ .join(ScheduleInfo)
145
+ .filter(
146
+ lambda task, schedule_info: task.start_slot + task.duration_slots
147
+ > schedule_info.total_slots
148
+ )
149
+ .penalize(HardSoftDecimalScore.ONE_HARD)
150
+ .as_constraint("Task fits in schedule")
151
+ )
152
+
153
+
154
+ def unavailable_employee(constraint_factory: ConstraintFactory):
155
+ return (
156
+ constraint_factory.for_each(Task)
157
+ .filter(
158
+ lambda task: task.employee is not None
159
+ and get_slot_date(task.start_slot) in task.employee.unavailable_dates
160
+ )
161
+ .penalize(HardSoftDecimalScore.ONE_HARD)
162
+ .as_constraint("Unavailable employee")
163
+ )
164
+
165
+
166
+ def undesired_day_for_employee(constraint_factory: ConstraintFactory):
167
+ return (
168
+ constraint_factory.for_each(Task)
169
+ .filter(
170
+ lambda task: task.employee is not None
171
+ and get_slot_date(task.start_slot) in task.employee.undesired_dates
172
+ )
173
+ .penalize(HardSoftDecimalScore.ONE_SOFT)
174
+ .as_constraint("Undesired day for employee")
175
+ )
176
+
177
+
178
+ def desired_day_for_employee(constraint_factory: ConstraintFactory):
179
+ return (
180
+ constraint_factory.for_each(Task)
181
+ .filter(
182
+ lambda task: task.employee is not None
183
+ and get_slot_date(task.start_slot) in task.employee.desired_dates
184
+ )
185
+ .reward(HardSoftDecimalScore.ONE_SOFT)
186
+ .as_constraint("Desired day for employee")
187
+ )
188
+
189
+
190
+ def maintain_project_task_order(constraint_factory: ConstraintFactory):
191
+ """Ensure tasks within the same project maintain their original order."""
192
+ return (
193
+ constraint_factory.for_each(Task)
194
+ .join(Task)
195
+ .filter(tasks_violate_sequence_order)
196
+ .penalize(
197
+ HardSoftDecimalScore.ONE_SOFT,
198
+ lambda task1, task2: 100
199
+ * (task1.start_slot + task1.duration_slots - task2.start_slot),
200
+ ) # High penalty (100x) proportional to overlap to strongly encourage proper sequencing
201
+ .as_constraint("Project task sequence order")
202
+ )
203
+
204
+
205
+ def balance_employee_task_assignments(constraint_factory: ConstraintFactory):
206
+ return (
207
+ constraint_factory.for_each(Task)
208
+ .group_by(lambda task: task.employee, ConstraintCollectors.count())
209
+ .complement(
210
+ Employee, lambda e: 0
211
+ ) # Include all employees which are not assigned to any task
212
+ .group_by(
213
+ ConstraintCollectors.load_balance(
214
+ lambda employee, task_count: employee,
215
+ lambda employee, task_count: task_count,
216
+ )
217
+ )
218
+ .penalize_decimal(
219
+ HardSoftDecimalScore.ONE_SOFT,
220
+ lambda load_balance: load_balance.unfairness(),
221
+ )
222
+ .as_constraint("Balance employee task assignments")
223
+ )
src/constraint_solvers/timetable/domain.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from timefold.solver import SolverStatus
2
+ from timefold.solver.domain import *
3
+ from timefold.solver.score import HardSoftDecimalScore
4
+ from datetime import datetime, date
5
+ from typing import Annotated
6
+ from dataclasses import dataclass, field
7
+
8
+
9
+ @dataclass
10
+ class Employee:
11
+ name: Annotated[str, PlanningId]
12
+ skills: Annotated[set[str], field(default_factory=set)]
13
+ unavailable_dates: Annotated[set[date], field(default_factory=set)] = field(
14
+ default_factory=set
15
+ )
16
+ undesired_dates: Annotated[set[date], field(default_factory=set)] = field(
17
+ default_factory=set
18
+ )
19
+ desired_dates: Annotated[set[date], field(default_factory=set)] = field(
20
+ default_factory=set
21
+ )
22
+
23
+ def to_dict(self):
24
+ return {
25
+ "name": self.name,
26
+ "skills": list(self.skills),
27
+ "unavailable_dates": [d.isoformat() for d in self.unavailable_dates],
28
+ "undesired_dates": [d.isoformat() for d in self.undesired_dates],
29
+ "desired_dates": [d.isoformat() for d in self.desired_dates],
30
+ }
31
+
32
+ @staticmethod
33
+ def from_dict(d):
34
+ return Employee(
35
+ name=d["name"],
36
+ skills=set(d["skills"]),
37
+ unavailable_dates=set(
38
+ date.fromisoformat(s) for s in d["unavailable_dates"]
39
+ ),
40
+ undesired_dates=set(date.fromisoformat(s) for s in d["undesired_dates"]),
41
+ desired_dates=set(date.fromisoformat(s) for s in d["desired_dates"]),
42
+ )
43
+
44
+
45
+ @planning_entity
46
+ @dataclass
47
+ class Task:
48
+ id: Annotated[str, PlanningId]
49
+ description: str
50
+ duration_slots: int # Number of 30-minute slots required
51
+ start_slot: Annotated[
52
+ int, PlanningVariable(value_range_provider_refs=["startSlotRange"])
53
+ ] # Slot index when the task starts
54
+ required_skill: str
55
+ # Identifier for the project this task belongs to (set by the UI when loading multiple project files)
56
+ project_id: str = ""
57
+ # Sequence number within the project to maintain original task order
58
+ sequence_number: int = 0
59
+ employee: Annotated[
60
+ Employee | None, PlanningVariable(value_range_provider_refs=["employeeRange"])
61
+ ] = None
62
+
63
+ def to_dict(self):
64
+ return {
65
+ "id": self.id,
66
+ "description": self.description,
67
+ "duration_slots": self.duration_slots,
68
+ "start_slot": self.start_slot,
69
+ "required_skill": self.required_skill,
70
+ "project_id": self.project_id,
71
+ "sequence_number": self.sequence_number,
72
+ "employee": self.employee.to_dict() if self.employee else None,
73
+ }
74
+
75
+ @staticmethod
76
+ def from_dict(d):
77
+ return Task(
78
+ id=d["id"],
79
+ description=d["description"],
80
+ duration_slots=d["duration_slots"],
81
+ start_slot=d["start_slot"],
82
+ required_skill=d["required_skill"],
83
+ project_id=d.get("project_id", ""),
84
+ sequence_number=d.get("sequence_number", 0),
85
+ employee=Employee.from_dict(d["employee"]) if d["employee"] else None,
86
+ )
87
+
88
+
89
+ @dataclass
90
+ class ScheduleInfo:
91
+ total_slots: int # Total number of 30-minute slots in the schedule
92
+
93
+ def to_dict(self):
94
+ return {"total_slots": self.total_slots}
95
+
96
+ @staticmethod
97
+ def from_dict(d):
98
+ return ScheduleInfo(total_slots=d["total_slots"])
99
+
100
+
101
+ @planning_solution
102
+ @dataclass
103
+ class EmployeeSchedule:
104
+ employees: Annotated[
105
+ list[Employee],
106
+ ProblemFactCollectionProperty,
107
+ ValueRangeProvider(id="employeeRange"),
108
+ ]
109
+ tasks: Annotated[list[Task], PlanningEntityCollectionProperty]
110
+ schedule_info: Annotated[ScheduleInfo, ProblemFactProperty]
111
+ score: Annotated[HardSoftDecimalScore | None, PlanningScore] = None
112
+ solver_status: SolverStatus | None = None
113
+
114
+ def get_start_slot_range(
115
+ self,
116
+ ) -> Annotated[list[int], ValueRangeProvider(id="startSlotRange")]:
117
+ """Returns all possible start slots."""
118
+ return list(range(self.schedule_info.total_slots))
119
+
120
+ def to_dict(self):
121
+ return {
122
+ "employees": [e.to_dict() for e in self.employees],
123
+ "tasks": [t.to_dict() for t in self.tasks],
124
+ "schedule_info": self.schedule_info.to_dict(),
125
+ "score": str(self.score) if self.score is not None else None,
126
+ "solver_status": str(self.solver_status)
127
+ if self.solver_status is not None
128
+ else None,
129
+ }
130
+
131
+ @staticmethod
132
+ def from_dict(d):
133
+ return EmployeeSchedule(
134
+ employees=[Employee.from_dict(e) for e in d["employees"]],
135
+ tasks=[Task.from_dict(t) for t in d["tasks"]],
136
+ schedule_info=ScheduleInfo.from_dict(d["schedule_info"]),
137
+ # score and solver_status are not restored (not needed for state passing)
138
+ )
src/constraint_solvers/timetable/solver.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from timefold.solver import SolverManager, SolverFactory, SolutionManager
2
+ from timefold.solver.config import (
3
+ SolverConfig,
4
+ ScoreDirectorFactoryConfig,
5
+ TerminationConfig,
6
+ Duration,
7
+ )
8
+
9
+ from .domain import *
10
+ from .constraints import define_constraints
11
+
12
+
13
+ solver_config: SolverConfig = SolverConfig(
14
+ solution_class=EmployeeSchedule,
15
+ entity_class_list=[Task],
16
+ score_director_factory_config=ScoreDirectorFactoryConfig(
17
+ constraint_provider_function=define_constraints
18
+ ),
19
+ termination_config=TerminationConfig(spent_limit=Duration(seconds=30)),
20
+ )
21
+
22
+ solver_manager: SolverManager = SolverManager.create(
23
+ SolverFactory.create(solver_config)
24
+ )
25
+ solution_manager: SolutionManager = SolutionManager.create(solver_manager)
src/domain.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dataclasses import dataclass
3
+
4
+ # =========================
5
+ # MOCK PROJECTS
6
+ # =========================
7
+
8
+ MOCK_PROJECTS: dict[str, str] = {
9
+ "go-rssagg": """# GO-RSSAGG
10
+
11
+ ## Project Description
12
+ RSS aggregator backend written in Go. Features REST API and basic authentication.
13
+
14
+ ## Features
15
+ - User authentication and account management
16
+ - Fetch and parse RSS feeds
17
+ - Store feed content in database
18
+ - REST API to access feeds
19
+ - Follow/unfollow feed functionality
20
+ - Mark posts as read/unread
21
+
22
+ ## Tech Stack
23
+ - Go for backend
24
+ - PostgreSQL for database
25
+ - RESTful API endpoints
26
+ - JWT for authentication
27
+ """,
28
+ "rust-chess-pipeline": """# RUST CHESS PIPELINE
29
+
30
+ ## Project Description
31
+ Data Pipeline that extracts chess match metrics and match annotations from Excel files, using AWS Lambda and Step Functions written in Rust.
32
+
33
+ ## Features
34
+ - Parse Excel files containing chess match data
35
+ - Extract player statistics, game metadata, and move annotations
36
+ - Calculate performance metrics and ELO adjustments
37
+ - Store results in data warehouse
38
+ - Generate analytical reports
39
+
40
+ ## Tech Stack
41
+ - Rust for core processing logic
42
+ - AWS Lambda for serverless compute
43
+ - AWS Step Functions for orchestration
44
+ - Amazon S3 for storage
45
+ - AWS Glue for ETL processing
46
+ """,
47
+ "python-ml-forecasting": """# PYTHON ML FORECASTING
48
+
49
+ ## Project Description
50
+ Machine learning service for time-series forecasting of inventory demands, with API endpoints for integration with existing systems.
51
+
52
+ ## Features
53
+ - Historical data ingestion and preprocessing
54
+ - Feature engineering for time-series data
55
+ - Multiple forecasting models (ARIMA, Prophet, LSTM)
56
+ - Model selection and hyperparameter optimization
57
+ - REST API for predictions and model management
58
+ - Visualization of forecasts and confidence intervals
59
+
60
+ ## Tech Stack
61
+ - Python for core functionality
62
+ - FastAPI for REST endpoints
63
+ - PyTorch and scikit-learn for ML models
64
+ - PostgreSQL for metadata storage
65
+ - Docker for containerization
66
+ """,
67
+ }
68
+
69
+ # =========================
70
+ # AGENTS CONFIG
71
+ # =========================
72
+ @dataclass
73
+ class AgentsConfig:
74
+ """Global configuration for all agents"""
75
+
76
+ # Model settings
77
+ nebius_api_key: str
78
+ nebius_model: str
79
+
80
+ # Prompt templates
81
+ task_splitter_prompt: str = "Split the following task into an accurate and concise tree of required subtasks:\n{{query}}\n\nYour output must be a markdown bullet list, with no additional comments.\n\n"
82
+ task_evaluator_prompt: str = "Evaluate the elapsed time, in 30 minute units, for a competent human to complete the following task:\n{{query}}\n\nYour output must be a one integer, with no additional comments.\n\n"
83
+ task_deps_matcher_prompt: str = "Given the following task:\n{{task}}\n\nAnd these available skills:\n{{skills}}\n\nIn this context:\n{{context}}\n\nSelect the most appropriate skill to complete this task. Return only the skill name as a string, with no additional comments or formatting.\n\n"
84
+
85
+ # LLM settings
86
+ timeout: int = 30
87
+ max_retries: int = 3
88
+ verify_ssl: bool = True
89
+ request_timeout: int = 30
90
+ max_tokens: int = 1024
91
+ temperature: float = 0.1
92
+ workflow_timeout: int = 300 # 5 minutes for workflow timeout
93
+
94
+ def __post_init__(self):
95
+ """Validate required configuration"""
96
+ if not self.nebius_model or not self.nebius_api_key:
97
+ if self.nebius_model == "dev-model" and self.nebius_api_key == "dev-key":
98
+ # Development mode - just warn
99
+ import warnings
100
+
101
+ warnings.warn(
102
+ "Using development defaults for NEBIUS_MODEL and NEBIUS_API_KEY"
103
+ )
104
+ else:
105
+ raise ValueError(
106
+ "NEBIUS_MODEL and NEBIUS_API_KEY environment variables must be set"
107
+ )
108
+
109
+
110
+ # Global configuration instance
111
+ # For development environments where env vars might not be set, use defaults
112
+ AGENTS_CONFIG = AgentsConfig(
113
+ nebius_api_key=os.getenv("NEBIUS_API_KEY", "dev-key"),
114
+ nebius_model=os.getenv("NEBIUS_MODEL", "dev-model"),
115
+ )
src/factory/data_generators.py ADDED
@@ -0,0 +1,300 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import date, timedelta
2
+ from random import Random
3
+ from itertools import product
4
+
5
+ from factory.data_models import *
6
+ from constraint_solvers.timetable.domain import *
7
+
8
+
9
+ ### EMPLOYEES ###
10
+ FIRST_NAMES = ("Amy", "Beth", "Carl", "Dan", "Elsa", "Flo", "Gus", "Hugo", "Ivy", "Jay")
11
+ LAST_NAMES = (
12
+ "Cole",
13
+ "Fox",
14
+ "Green",
15
+ "Jones",
16
+ "King",
17
+ "Li",
18
+ "Poe",
19
+ "Rye",
20
+ "Smith",
21
+ "Watt",
22
+ )
23
+
24
+
25
+ def generate_employees(
26
+ parameters: TimeTableDataParameters, random: Random
27
+ ) -> list[Employee]:
28
+ """
29
+ Generates a list of Employee objects with random names and skills.
30
+ """
31
+ name_permutations = [
32
+ f"{first_name} {last_name}"
33
+ for first_name, last_name in product(FIRST_NAMES, LAST_NAMES)
34
+ ]
35
+
36
+ random.shuffle(name_permutations)
37
+
38
+ employees = []
39
+ for i in range(parameters.employee_count):
40
+ (count,) = random.choices(
41
+ population=counts(parameters.optional_skill_distribution),
42
+ weights=weights(parameters.optional_skill_distribution),
43
+ )
44
+
45
+ # Ensure we don't try to sample more skills than available
46
+ count = min(count, len(parameters.skill_set.optional_skills))
47
+
48
+ skills = []
49
+ skills += random.sample(parameters.skill_set.optional_skills, count)
50
+ skills += random.sample(parameters.skill_set.required_skills, 1)
51
+ employees.append(Employee(name=name_permutations[i], skills=set(skills)))
52
+
53
+ return employees
54
+
55
+
56
+ def generate_employee_availability(
57
+ employees: list[Employee],
58
+ parameters: TimeTableDataParameters,
59
+ start_date: date,
60
+ random: Random,
61
+ ) -> None:
62
+ """
63
+ Sets up random availability preferences for employees proportional to schedule length.
64
+
65
+ For 365 days:
66
+ - Max 21 unavailable days per employee
67
+ - Max 0-12 undesired days per employee
68
+ - Desired dates remain flexible (0-12 days)
69
+
70
+ Scales proportionally for different schedule lengths.
71
+ """
72
+ days_in_schedule = parameters.days_in_schedule
73
+
74
+ # Calculate proportional limits based on 365-day baseline
75
+ max_unavailable_per_employee = round((21 / 365) * days_in_schedule)
76
+ max_undesired_per_employee = round((12 / 365) * days_in_schedule)
77
+ max_desired_per_employee = round((12 / 365) * days_in_schedule)
78
+
79
+ # Ensure minimum reasonable values
80
+ max_unavailable_per_employee = max(1, max_unavailable_per_employee)
81
+ max_undesired_per_employee = max(0, max_undesired_per_employee)
82
+ max_desired_per_employee = max(0, max_desired_per_employee)
83
+
84
+ # Generate all possible dates in the schedule
85
+ all_dates = [start_date + timedelta(days=i) for i in range(days_in_schedule)]
86
+
87
+ for employee in employees:
88
+ # Randomly assign unavailable dates (1 to max_unavailable_per_employee)
89
+ num_unavailable = random.randint(1, max_unavailable_per_employee)
90
+ unavailable_dates = random.sample(
91
+ all_dates, min(num_unavailable, len(all_dates))
92
+ )
93
+ employee.unavailable_dates.update(unavailable_dates)
94
+
95
+ # Remove unavailable dates from remaining pool for other preferences
96
+ remaining_dates = [d for d in all_dates if d not in employee.unavailable_dates]
97
+
98
+ # Randomly assign undesired dates (0 to max_undesired_per_employee)
99
+ if max_undesired_per_employee > 0 and remaining_dates:
100
+ num_undesired = random.randint(
101
+ 0, min(max_undesired_per_employee, len(remaining_dates))
102
+ )
103
+ if num_undesired > 0:
104
+ undesired_dates = random.sample(remaining_dates, num_undesired)
105
+ employee.undesired_dates.update(undesired_dates)
106
+ remaining_dates = [
107
+ d for d in remaining_dates if d not in employee.undesired_dates
108
+ ]
109
+
110
+ # Randomly assign desired dates (0 to max_desired_per_employee)
111
+ if max_desired_per_employee > 0 and remaining_dates:
112
+ num_desired = random.randint(
113
+ 0, min(max_desired_per_employee, len(remaining_dates))
114
+ )
115
+ if num_desired > 0:
116
+ desired_dates = random.sample(remaining_dates, num_desired)
117
+ employee.desired_dates.update(desired_dates)
118
+
119
+
120
+ def generate_employee_availability_mcp(
121
+ employees: list[Employee],
122
+ ) -> None:
123
+ """
124
+ For MCP data generator: does not set any unavailable, desired, or undesired days for employees.
125
+ All availability sets remain empty.
126
+ """
127
+ for employee in employees:
128
+ employee.unavailable_dates.clear()
129
+ employee.undesired_dates.clear()
130
+ employee.desired_dates.clear()
131
+
132
+
133
+ def generate_tasks(
134
+ parameters: TimeTableDataParameters,
135
+ random: Random,
136
+ task_tuples: list[tuple[str, int]],
137
+ ) -> list[Task]:
138
+ """
139
+ Given a list of (description, duration) tuples, generate Task objects with randomized required_skill.
140
+ """
141
+ tasks: list[Task] = []
142
+
143
+ ids = generate_task_ids()
144
+
145
+ for description, duration in task_tuples:
146
+ if random.random() >= 0.5:
147
+ required_skill = random.choice(parameters.skill_set.required_skills)
148
+ else:
149
+ required_skill = random.choice(parameters.skill_set.optional_skills)
150
+ tasks.append(
151
+ Task(
152
+ id=next(ids),
153
+ description=description,
154
+ duration_slots=duration,
155
+ start_slot=0, # This will be assigned by the solver
156
+ required_skill=required_skill,
157
+ )
158
+ )
159
+ return tasks
160
+
161
+
162
+ def generate_tasks_from_calendar(
163
+ parameters: TimeTableDataParameters,
164
+ random: Random,
165
+ calendar_entries: list[dict],
166
+ ) -> list[Task]:
167
+ """
168
+ Given a list of calendar entry dicts, generate Task objects with randomized required_skill.
169
+ Output format matches generate_tasks.
170
+ """
171
+ from datetime import datetime
172
+
173
+ tasks: list[Task] = []
174
+ ids = generate_task_ids()
175
+
176
+ for entry in calendar_entries:
177
+ try:
178
+ summary = entry.get("summary", "Event")
179
+ dtstart = entry.get("dtstart", "").replace("Z", "+00:00")
180
+ dtend = entry.get("dtend", "").replace("Z", "+00:00")
181
+ start_dt = datetime.fromisoformat(dtstart) if dtstart else None
182
+ end_dt = datetime.fromisoformat(dtend) if dtend else None
183
+ if start_dt and end_dt:
184
+ duration_minutes = int((end_dt - start_dt).total_seconds() // 60)
185
+ duration_slots = max(1, duration_minutes // 30)
186
+ else:
187
+ duration_slots = 2 # Default 1 hour
188
+ # Randomize required_skill as in generate_tasks
189
+ if random.random() >= 0.5:
190
+ required_skill = random.choice(parameters.skill_set.required_skills)
191
+ else:
192
+ required_skill = random.choice(parameters.skill_set.optional_skills)
193
+ tasks.append(
194
+ Task(
195
+ id=next(ids),
196
+ description=summary,
197
+ duration_slots=duration_slots,
198
+ start_slot=0, # This will be assigned by the solver
199
+ required_skill=required_skill,
200
+ )
201
+ )
202
+ except Exception:
203
+ continue
204
+ return tasks
205
+
206
+
207
+ def generate_task_ids():
208
+ current_id = 0
209
+ while True:
210
+ yield str(current_id)
211
+ current_id += 1
212
+
213
+
214
+ # =========================
215
+ # UTILITY FUNCTIONS
216
+ # =========================
217
+ def counts(distributions: tuple[CountDistribution, ...]) -> tuple[int, ...]:
218
+ """
219
+ Extracts the count values from a tuple of CountDistribution objects.
220
+ """
221
+ return tuple(distribution.count for distribution in distributions)
222
+
223
+
224
+ def weights(distributions: tuple[CountDistribution, ...]) -> tuple[float, ...]:
225
+ """
226
+ Extracts the weight values from a tuple of CountDistribution objects.
227
+ """
228
+ return tuple(distribution.weight for distribution in distributions)
229
+
230
+
231
+ def earliest_monday_on_or_after(target_date: date) -> date:
232
+ """
233
+ Returns the date of the next Monday on or after the given date.
234
+ If the date is already Monday, returns the same date.
235
+ """
236
+ days = (7 - target_date.weekday()) % 7
237
+ return target_date + timedelta(days=days)
238
+
239
+
240
+ def tasks_from_agent_output(agent_output, parameters, project_id: str = ""):
241
+ """
242
+ Convert task_composer_agent output (list of (description, duration, skill)) to Task objects.
243
+ """
244
+ from constraint_solvers.timetable.domain import Task
245
+
246
+ ids = generate_task_ids()
247
+ tasks = []
248
+
249
+ for sequence_num, task_data in enumerate(agent_output):
250
+ # Handle both old format (description, duration) and new format (description, duration, skill)
251
+ if len(task_data) == 3:
252
+ description, duration, required_skill = task_data
253
+ elif len(task_data) == 2:
254
+ description, duration = task_data
255
+ # Fallback to random assignment if no skill provided
256
+ import random
257
+
258
+ if random.random() >= 0.5:
259
+ required_skill = random.choice(parameters.skill_set.required_skills)
260
+ else:
261
+ required_skill = random.choice(parameters.skill_set.optional_skills)
262
+ else:
263
+ continue # skip invalid task data
264
+
265
+ try:
266
+ duration_int = int(duration)
267
+ except (ValueError, TypeError):
268
+ continue # skip this task if duration is invalid
269
+
270
+ # Clean up skill name (remove any extra formatting)
271
+ if required_skill:
272
+ required_skill = required_skill.strip()
273
+ # Ensure the skill exists in our skill set
274
+ all_skills = list(parameters.skill_set.required_skills) + list(
275
+ parameters.skill_set.optional_skills
276
+ )
277
+ if required_skill not in all_skills:
278
+ # If skill doesn't match exactly, try to find closest match or fallback to random
279
+ import random
280
+
281
+ required_skill = random.choice(parameters.skill_set.required_skills)
282
+
283
+ tasks.append(
284
+ Task(
285
+ id=next(ids),
286
+ description=description,
287
+ duration_slots=duration_int,
288
+ start_slot=0,
289
+ required_skill=required_skill,
290
+ project_id=project_id,
291
+ sequence_number=sequence_num,
292
+ )
293
+ )
294
+ return tasks
295
+
296
+
297
+ def skills_from_parameters(parameters: TimeTableDataParameters) -> list[str]:
298
+ return list(parameters.skill_set.required_skills) + list(
299
+ parameters.skill_set.optional_skills
300
+ )
src/factory/data_models.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass, field
2
+
3
+
4
+ # =========================
5
+ # DATA MODELS
6
+ # =========================
7
+ @dataclass(frozen=True, kw_only=True)
8
+ class CountDistribution:
9
+ count: int
10
+ weight: float
11
+
12
+
13
+ @dataclass(frozen=True, kw_only=True)
14
+ class SkillSet:
15
+ required_skills: tuple[str, ...]
16
+ optional_skills: tuple[str, ...]
17
+
18
+
19
+ @dataclass(kw_only=True)
20
+ class TimeTableDataParameters:
21
+ skill_set: SkillSet
22
+ days_in_schedule: int
23
+ employee_count: int
24
+ optional_skill_distribution: tuple[CountDistribution, ...]
25
+ availability_count_distribution: tuple[CountDistribution, ...]
26
+ random_seed: int = field(default=37)
src/factory/data_provider.py ADDED
@@ -0,0 +1,282 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pandas as pd
3
+
4
+ pd.set_option("display.max_columns", None)
5
+ from helpers import schedule_to_dataframe
6
+
7
+ from datetime import date
8
+ from random import Random
9
+
10
+ from domain import AGENTS_CONFIG
11
+
12
+ from factory.data_generators import *
13
+ from factory.data_models import *
14
+
15
+ from agents.task_composer_agent import TaskComposerAgent
16
+
17
+ from constraint_solvers.timetable.domain import *
18
+
19
+ import logging
20
+
21
+ logging.basicConfig(level=logging.INFO)
22
+
23
+ # =========================
24
+ # CONSTANTS
25
+ # =========================
26
+
27
+ # Each slot is 30 minutes - 20 slots = 10 hours working day
28
+ SLOTS_PER_DAY = 20
29
+
30
+
31
+ # =========================
32
+ # DEMO PARAMS
33
+ # =========================
34
+ SKILL_SET = SkillSet(
35
+ required_skills=("Frontend Engineer", "Backend Engineer", "Cloud Engineer"),
36
+ optional_skills=(
37
+ "Security Expert",
38
+ "DevOps Engineer",
39
+ "Data Engineer",
40
+ "Network Engineer",
41
+ "AI Engineer",
42
+ ),
43
+ )
44
+
45
+ DATA_PARAMS = TimeTableDataParameters(
46
+ skill_set=SKILL_SET,
47
+ days_in_schedule=365,
48
+ employee_count=12,
49
+ optional_skill_distribution=(
50
+ CountDistribution(count=1, weight=3),
51
+ CountDistribution(count=2, weight=1),
52
+ ),
53
+ availability_count_distribution=(
54
+ CountDistribution(count=5, weight=4),
55
+ CountDistribution(count=10, weight=3),
56
+ CountDistribution(count=15, weight=2),
57
+ CountDistribution(count=20, weight=1),
58
+ ),
59
+ random_seed=37,
60
+ )
61
+
62
+ MCP_PARAMS = TimeTableDataParameters(
63
+ skill_set=SKILL_SET,
64
+ days_in_schedule=365,
65
+ # In this case, we only have one user
66
+ employee_count=1,
67
+ optional_skill_distribution=(
68
+ CountDistribution(count=len(SKILL_SET.optional_skills), weight=1),
69
+ ),
70
+ availability_count_distribution=(
71
+ # Full availability for one user
72
+ CountDistribution(count=20, weight=1),
73
+ ),
74
+ random_seed=37,
75
+ )
76
+
77
+
78
+ # =========================
79
+ # AGENT DATA
80
+ # =========================
81
+ async def generate_agent_data(
82
+ file, project_id: str = "", employee_count: int = None, days_in_schedule: int = None
83
+ ) -> EmployeeSchedule:
84
+ # Use DATA_PARAMS, but allow override
85
+ parameters = DATA_PARAMS
86
+ if employee_count is not None or days_in_schedule is not None:
87
+ parameters = TimeTableDataParameters(
88
+ skill_set=parameters.skill_set,
89
+ days_in_schedule=days_in_schedule
90
+ if days_in_schedule is not None
91
+ else parameters.days_in_schedule,
92
+ employee_count=employee_count
93
+ if employee_count is not None
94
+ else parameters.employee_count,
95
+ optional_skill_distribution=parameters.optional_skill_distribution,
96
+ availability_count_distribution=parameters.availability_count_distribution,
97
+ random_seed=parameters.random_seed,
98
+ )
99
+
100
+ start_date: date = earliest_monday_on_or_after(date.today())
101
+ randomizer: Random = Random(parameters.random_seed)
102
+ employees: list[Employee] = generate_employees(parameters, randomizer)
103
+ total_slots: int = parameters.days_in_schedule * SLOTS_PER_DAY
104
+
105
+ if os.getenv("YUGA_DEBUG", "false").lower() == "true":
106
+ logging.info("FILE OBJECT: %s %s", file, type(file))
107
+
108
+ match file:
109
+ case file if hasattr(file, "read"):
110
+ input_str = file.read()
111
+
112
+ case bytes():
113
+ input_str = file.decode("utf-8")
114
+
115
+ case str() if os.path.exists(file):
116
+ with open(file, "r", encoding="utf-8") as f:
117
+ input_str = f.read()
118
+
119
+ case str():
120
+ input_str = file
121
+
122
+ case _:
123
+ raise ValueError(f"Unsupported file type: {type(file)}")
124
+
125
+ agent_output = await run_task_composer_agent(input_str, parameters)
126
+
127
+ tasks = tasks_from_agent_output(agent_output, parameters, project_id)
128
+ generate_employee_availability(employees, parameters, start_date, randomizer)
129
+
130
+ return EmployeeSchedule(
131
+ employees=employees,
132
+ tasks=tasks,
133
+ schedule_info=ScheduleInfo(total_slots=total_slots),
134
+ )
135
+
136
+
137
+ async def generate_mcp_data(
138
+ calendar_entries,
139
+ user_message: str,
140
+ project_id: str = "PROJECT",
141
+ employee_count: int = None,
142
+ days_in_schedule: int = None,
143
+ ):
144
+ parameters = MCP_PARAMS
145
+ if employee_count is not None or days_in_schedule is not None:
146
+ parameters = TimeTableDataParameters(
147
+ skill_set=parameters.skill_set,
148
+ days_in_schedule=days_in_schedule
149
+ if days_in_schedule is not None
150
+ else parameters.days_in_schedule,
151
+ employee_count=employee_count
152
+ if employee_count is not None
153
+ else parameters.employee_count,
154
+ optional_skill_distribution=parameters.optional_skill_distribution,
155
+ availability_count_distribution=parameters.availability_count_distribution,
156
+ random_seed=parameters.random_seed,
157
+ )
158
+
159
+ start_date: date = earliest_monday_on_or_after(date.today())
160
+ randomizer: Random = Random(parameters.random_seed)
161
+ employees: list[Employee] = generate_employees(parameters, randomizer)
162
+ total_slots: int = parameters.days_in_schedule * SLOTS_PER_DAY
163
+
164
+ # Set the single employee's name to 'Chatbot User'
165
+ if len(employees) == 1:
166
+ employees[0].name = "Chatbot User"
167
+ else:
168
+ raise ValueError("MCP data provider only supports one employee")
169
+
170
+ # Ensure all date sets are empty
171
+ for emp in employees:
172
+ emp.unavailable_dates.clear()
173
+ emp.undesired_dates.clear()
174
+ emp.desired_dates.clear()
175
+
176
+ # --- CALENDAR TASKS ---
177
+ calendar_tasks = generate_tasks_from_calendar(
178
+ parameters, randomizer, calendar_entries
179
+ )
180
+ # Assign project_id 'EXISTING' to all calendar tasks
181
+ for t in calendar_tasks:
182
+ t.sequence_number = 0 # will be overwritten later
183
+ t.employee = employees[0]
184
+ t.project_id = "EXISTING"
185
+ # Create DataFrame
186
+ calendar_df = pd.DataFrame(
187
+ [
188
+ {
189
+ "id": t.id,
190
+ "description": t.description,
191
+ "duration_slots": t.duration_slots,
192
+ "start_slot": t.start_slot,
193
+ "required_skill": t.required_skill,
194
+ "sequence_number": t.sequence_number,
195
+ "employee": t.employee.name if hasattr(t.employee, "name") else None,
196
+ "project_id": t.project_id,
197
+ }
198
+ for t in calendar_tasks
199
+ ]
200
+ )
201
+
202
+ print("\nCalendar DataFrame:")
203
+ print(calendar_df)
204
+
205
+ # --- LLM TASKS ---
206
+ llm_tasks = []
207
+ if user_message:
208
+ from factory.data_provider import run_task_composer_agent
209
+
210
+ agent_output = await run_task_composer_agent(user_message, parameters)
211
+ llm_tasks = tasks_from_agent_output(agent_output, parameters, "PROJECT")
212
+ for t in llm_tasks:
213
+ t.sequence_number = 0 # will be overwritten later
214
+ t.employee = employees[0]
215
+ t.project_id = "PROJECT"
216
+ llm_df = pd.DataFrame(
217
+ [
218
+ {
219
+ "id": t.id,
220
+ "description": t.description,
221
+ "duration_slots": t.duration_slots,
222
+ "start_slot": t.start_slot,
223
+ "required_skill": t.required_skill,
224
+ "sequence_number": t.sequence_number,
225
+ "employee": t.employee.name if hasattr(t.employee, "name") else None,
226
+ "project_id": t.project_id,
227
+ }
228
+ for t in llm_tasks
229
+ ]
230
+ )
231
+
232
+ print("\nLLM DataFrame:")
233
+ print(llm_df)
234
+
235
+ # --- MERGE AND ASSIGN SEQUENCE ---
236
+ all_tasks = calendar_tasks + llm_tasks
237
+ # Assign sequence_number per project group
238
+ existing_seq = 0
239
+ project_seq = 0
240
+ for t in all_tasks:
241
+ if t.project_id == "EXISTING":
242
+ t.sequence_number = existing_seq
243
+ existing_seq += 1
244
+ elif t.project_id == "PROJECT":
245
+ t.sequence_number = project_seq
246
+ project_seq += 1
247
+
248
+ schedule = EmployeeSchedule(
249
+ employees=employees,
250
+ tasks=all_tasks,
251
+ schedule_info=ScheduleInfo(total_slots=total_slots),
252
+ )
253
+ final_df = schedule_to_dataframe(schedule)
254
+ print("\nFinal DataFrame (MCP-aligned):")
255
+ print(final_df)
256
+ return final_df
257
+
258
+
259
+ async def run_task_composer_agent(
260
+ input_str: str, parameters: TimeTableDataParameters
261
+ ) -> list:
262
+ agent = TaskComposerAgent(AGENTS_CONFIG)
263
+ available_skills = list(parameters.skill_set.required_skills) + list(
264
+ parameters.skill_set.optional_skills
265
+ )
266
+ context = f"Project scheduling for {parameters.employee_count} employees over {parameters.days_in_schedule} days"
267
+
268
+ logging.info(f"Starting workflow with timeout: {AGENTS_CONFIG.workflow_timeout}s")
269
+ logging.info(f"Input length: {len(input_str)} characters")
270
+ logging.info(f"Available skills: {available_skills}")
271
+
272
+ try:
273
+ agent_output = await agent.run_workflow(
274
+ query=input_str, skills=available_skills, context=context
275
+ )
276
+ logging.info(
277
+ f"Workflow completed successfully. Generated {len(agent_output)} tasks."
278
+ )
279
+ return agent_output
280
+ except Exception as e:
281
+ logging.error(f"Workflow failed: {e}")
282
+ raise
src/handlers.py ADDED
@@ -0,0 +1,230 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from typing import Tuple, Dict, List, Optional
3
+
4
+ import pandas as pd
5
+ import gradio as gr
6
+
7
+ from state import app_state
8
+
9
+ from services import (
10
+ LoggingService,
11
+ ScheduleService,
12
+ DataService,
13
+ MockProjectService,
14
+ )
15
+
16
+ # Global logging service instance for UI streaming
17
+ logging_service = LoggingService()
18
+
19
+
20
+ async def show_solved(
21
+ state_data, job_id: str, debug: bool = False
22
+ ) -> Tuple[pd.DataFrame, pd.DataFrame, str, str, object, str]:
23
+ """Handler for solving a schedule from UI state data"""
24
+ # Set up log streaming for solving process
25
+ logging_service.setup_log_streaming()
26
+
27
+ logging.info(
28
+ f"πŸ”§ show_solved called with state_data type: {type(state_data)}, job_id: {job_id}"
29
+ )
30
+
31
+ # Check if data has been loaded
32
+ if not state_data:
33
+ logging.warning("❌ No data loaded - cannot solve schedule")
34
+ return (
35
+ gr.update(),
36
+ gr.update(),
37
+ job_id,
38
+ "❌ No data loaded. Please click 'Load Data' first to load project data before solving.",
39
+ state_data,
40
+ logging_service.get_streaming_logs(),
41
+ )
42
+
43
+ logging.info(f"βœ… State data found, proceeding with solve...")
44
+
45
+ try:
46
+ # Use the schedule service to solve the schedule
47
+ (
48
+ emp_df,
49
+ solved_task_df,
50
+ new_job_id,
51
+ status,
52
+ state_data,
53
+ ) = await ScheduleService.solve_schedule_from_state(
54
+ state_data, job_id, debug=debug
55
+ )
56
+
57
+ logging.info(f"βœ… Solver completed successfully, returning results")
58
+
59
+ return (
60
+ emp_df,
61
+ solved_task_df,
62
+ new_job_id,
63
+ status,
64
+ state_data,
65
+ logging_service.get_streaming_logs(),
66
+ )
67
+ except Exception as e:
68
+ logging.error(f"Error in show_solved: {e}")
69
+ return (
70
+ gr.update(),
71
+ gr.update(),
72
+ job_id,
73
+ f"❌ Error solving schedule: {str(e)}",
74
+ state_data,
75
+ logging_service.get_streaming_logs(),
76
+ )
77
+
78
+
79
+ def show_mock_project_content(project_names) -> str:
80
+ """Handler for displaying mock project content"""
81
+ return MockProjectService.show_mock_project_content(project_names)
82
+
83
+
84
+ async def load_data(
85
+ project_source: str,
86
+ file_obj,
87
+ mock_projects,
88
+ employee_count: int,
89
+ days_in_schedule: int,
90
+ llm_output,
91
+ debug: bool = False,
92
+ progress=gr.Progress(),
93
+ ):
94
+ """
95
+ Handler for data loading from either file uploads or mock projects - streaming version
96
+ Yields intermediate updates for real-time progress
97
+ """
98
+ # Set up log streaming and clear previous logs
99
+ logging_service.setup_log_streaming()
100
+ logging_service.clear_streaming_logs()
101
+
102
+ # Initial log message
103
+ logging.info("πŸš€ Starting data loading process...")
104
+
105
+ # Yield initial state
106
+ yield (
107
+ gr.update(), # employees_table
108
+ gr.update(), # schedule_table
109
+ gr.update(), # job_id_state
110
+ "Starting data loading...", # status_text
111
+ gr.update(), # llm_output_state
112
+ logging_service.get_streaming_logs(), # log_terminal
113
+ gr.update(interactive=False), # solve_btn - keep disabled during loading
114
+ )
115
+
116
+ try:
117
+ # Use the data service to load data from sources
118
+ (
119
+ emp_df,
120
+ task_df,
121
+ job_id,
122
+ status_message,
123
+ state_data,
124
+ ) = await DataService.load_data_from_sources(
125
+ project_source,
126
+ file_obj,
127
+ mock_projects,
128
+ employee_count,
129
+ days_in_schedule,
130
+ debug,
131
+ )
132
+
133
+ # Store schedule for later use
134
+ app_state.add_solved_schedule(job_id, None) # Will be populated when solved
135
+
136
+ # Final yield with complete results
137
+ yield (
138
+ emp_df, # employees_table
139
+ task_df, # schedule_table
140
+ job_id, # job_id_state
141
+ status_message, # status_text
142
+ state_data, # llm_output_state
143
+ logging_service.get_streaming_logs(), # log_terminal with accumulated logs
144
+ gr.update(interactive=True), # solve_btn - enable after successful loading
145
+ )
146
+
147
+ except Exception as e:
148
+ logging.error(f"Error loading data: {e}")
149
+ yield (
150
+ gr.update(),
151
+ gr.update(),
152
+ gr.update(),
153
+ f"Error loading data: {str(e)}",
154
+ gr.update(),
155
+ logging_service.get_streaming_logs(), # log_terminal
156
+ gr.update(interactive=False), # solve_btn - keep disabled on error
157
+ )
158
+
159
+
160
+ def start_timer(job_id, llm_output) -> gr.Timer:
161
+ """Handler for starting the polling timer"""
162
+ return ScheduleService.start_timer(job_id, llm_output)
163
+
164
+
165
+ def poll_solution(
166
+ job_id: str, schedule, debug: bool = False
167
+ ) -> Tuple[pd.DataFrame, pd.DataFrame, str, str, object, str]:
168
+ """Handler for polling a solution for a given job_id"""
169
+ try:
170
+ (
171
+ emp_df,
172
+ task_df,
173
+ job_id,
174
+ status_message,
175
+ schedule,
176
+ ) = ScheduleService.poll_solution(job_id, schedule, debug)
177
+
178
+ return (
179
+ emp_df,
180
+ task_df,
181
+ job_id,
182
+ status_message,
183
+ schedule,
184
+ gr.update(), # log_terminal
185
+ )
186
+
187
+ except Exception as e:
188
+ logging.error(f"Error in poll_solution: {e}")
189
+ return (
190
+ gr.update(),
191
+ gr.update(),
192
+ job_id,
193
+ f"Error polling solution: {str(e)}",
194
+ schedule,
195
+ gr.update(), # log_terminal
196
+ )
197
+
198
+
199
+ async def auto_poll(
200
+ job_id: str, llm_output: dict, debug: bool = False
201
+ ) -> Tuple[pd.DataFrame, pd.DataFrame, str, str, dict, str]:
202
+ """Handler for automatic polling of updates"""
203
+ try:
204
+ (
205
+ emp_df,
206
+ task_df,
207
+ job_id,
208
+ status_message,
209
+ llm_output,
210
+ ) = await ScheduleService.auto_poll(job_id, llm_output, debug)
211
+
212
+ return (
213
+ emp_df, # employees_table
214
+ task_df, # schedule_table
215
+ job_id, # job_id_state
216
+ status_message, # status_text
217
+ llm_output, # llm_output_state
218
+ logging_service.get_streaming_logs(), # log_terminal
219
+ )
220
+
221
+ except Exception as e:
222
+ logging.error(f"Error in auto_poll: {e}")
223
+ return (
224
+ gr.update(),
225
+ gr.update(),
226
+ job_id,
227
+ f"Error in auto polling: {str(e)}",
228
+ llm_output,
229
+ logging_service.get_streaming_logs(), # log_terminal
230
+ )
src/helpers.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import datetime, timedelta
2
+ import pandas as pd
3
+
4
+
5
+ def schedule_to_dataframe(schedule) -> pd.DataFrame:
6
+ """
7
+ Convert an EmployeeSchedule to a pandas DataFrame.
8
+
9
+ Args:
10
+ schedule (EmployeeSchedule): The schedule to convert.
11
+
12
+ Returns:
13
+ pd.DataFrame: The converted DataFrame.
14
+ """
15
+ data: list[dict[str, str]] = []
16
+
17
+ # Process each task in the schedule
18
+ for task in schedule.tasks:
19
+ # Get employee name or "Unassigned" if no employee assigned
20
+ employee: str = task.employee.name if task.employee else "Unassigned"
21
+
22
+ # Calculate start and end times based on 30-minute slots
23
+ start_time: datetime = datetime.now() + timedelta(minutes=30 * task.start_slot)
24
+ end_time: datetime = start_time + timedelta(minutes=30 * task.duration_slots)
25
+
26
+ # Add task data to list with availability flags
27
+ data.append(
28
+ {
29
+ "Project": getattr(task, "project_id", ""),
30
+ "Sequence": getattr(task, "sequence_number", 0),
31
+ "Employee": employee,
32
+ "Task": task.description,
33
+ "Start": start_time,
34
+ "End": end_time,
35
+ "Duration (hours)": task.duration_slots / 2, # Convert slots to hours
36
+ "Required Skill": task.required_skill,
37
+ # Check if task falls on employee's unavailable date
38
+ "Unavailable": employee != "Unassigned"
39
+ and hasattr(task.employee, "unavailable_dates")
40
+ and start_time.date() in task.employee.unavailable_dates,
41
+ # Check if task falls on employee's undesired date
42
+ "Undesired": employee != "Unassigned"
43
+ and hasattr(task.employee, "undesired_dates")
44
+ and start_time.date() in task.employee.undesired_dates,
45
+ # Check if task falls on employee's desired date
46
+ "Desired": employee != "Unassigned"
47
+ and hasattr(task.employee, "desired_dates")
48
+ and start_time.date() in task.employee.desired_dates,
49
+ }
50
+ )
51
+
52
+ return pd.DataFrame(data)
53
+
54
+
55
+ def employees_to_dataframe(schedule) -> pd.DataFrame:
56
+ """
57
+ Convert an EmployeeSchedule to a pandas DataFrame.
58
+
59
+ Args:
60
+ schedule (EmployeeSchedule): The schedule to convert.
61
+ """
62
+
63
+ def format_dates(dates_list, max_display=3):
64
+ """Helper function to format dates for display"""
65
+ if not dates_list:
66
+ return "None"
67
+ try:
68
+ sorted_dates = sorted(dates_list)
69
+ if len(sorted_dates) <= max_display:
70
+ return ", ".join(d.strftime("%m/%d") for d in sorted_dates)
71
+ else:
72
+ displayed = ", ".join(
73
+ d.strftime("%m/%d") for d in sorted_dates[:max_display]
74
+ )
75
+ return f"{displayed} (+{len(sorted_dates) - max_display} more)"
76
+ except Exception:
77
+ return f"{len(dates_list)} dates"
78
+
79
+ data: list[dict[str, str]] = []
80
+
81
+ for emp in schedule.employees:
82
+ try:
83
+ first, last = emp.name.split(" ", 1) if " " in emp.name else (emp.name, "")
84
+
85
+ # Safely get preference dates with fallback to empty sets
86
+ unavailable_dates = getattr(emp, "unavailable_dates", set())
87
+ undesired_dates = getattr(emp, "undesired_dates", set())
88
+ desired_dates = getattr(emp, "desired_dates", set())
89
+
90
+ data.append(
91
+ {
92
+ "First Name": first,
93
+ "Last Name": last,
94
+ "Skills": ", ".join(sorted(emp.skills)),
95
+ "Unavailable Dates": format_dates(unavailable_dates),
96
+ "Undesired Dates": format_dates(undesired_dates),
97
+ "Desired Dates": format_dates(desired_dates),
98
+ "Total Preferences": f"{len(unavailable_dates)} unavailable, {len(undesired_dates)} undesired, {len(desired_dates)} desired",
99
+ }
100
+ )
101
+ except Exception as e:
102
+ # Fallback for any employee that causes issues
103
+ data.append(
104
+ {
105
+ "First Name": str(emp.name),
106
+ "Last Name": "",
107
+ "Skills": ", ".join(sorted(getattr(emp, "skills", []))),
108
+ "Unavailable Dates": "Error loading",
109
+ "Undesired Dates": "Error loading",
110
+ "Desired Dates": "Error loading",
111
+ "Total Preferences": "Error loading preferences",
112
+ }
113
+ )
114
+
115
+ return pd.DataFrame(data)
src/mcp_handlers.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dataclasses import dataclass
3
+ import uuid
4
+ import time
5
+ import asyncio
6
+
7
+ from utils.extract_calendar import extract_ical_entries
8
+ from factory.data_provider import generate_mcp_data
9
+ from services.schedule_service import ScheduleService
10
+
11
+
12
+ @dataclass
13
+ class MCPProcessingResult:
14
+ user_message: str
15
+ file: str
16
+ calendar_entries: list = None
17
+ error: str = None
18
+ solved_task_df: object = None
19
+ status: str = None
20
+ score: object = None
21
+
22
+
23
+ async def process_message_and_attached_file(file_path: str, message_body: str) -> dict:
24
+ """
25
+ Store the last chat message and attached file, echo the message, extract calendar entries, generate tasks, solve, and poll for the solution.
26
+ Args:
27
+ file_path (str): Path to the attached file
28
+ message_body (str): The body of the last chat message, which contains the task description
29
+ Returns:
30
+ dict: Contains confirmation, file info, calendar entries, error, and solved schedule info
31
+ """
32
+ try:
33
+ with open(file_path, "rb") as f:
34
+ file_bytes = f.read()
35
+ except Exception as e:
36
+ result = MCPProcessingResult(
37
+ user_message="",
38
+ file="",
39
+ calendar_entries=[],
40
+ error=f"Failed to read file: {e}",
41
+ )
42
+ return result.__dict__
43
+
44
+ # Try to extract calendar entries
45
+ entries, error = extract_ical_entries(file_bytes)
46
+ if error:
47
+ result = MCPProcessingResult(
48
+ user_message=f"Received your message: {message_body}",
49
+ file=os.path.basename(file_path),
50
+ error=f"File is not a valid calendar file: {error}",
51
+ )
52
+ return result.__dict__
53
+
54
+ # Generate MCP DataFrame
55
+ df = await generate_mcp_data(entries, message_body)
56
+ if df is None or df.empty:
57
+ result = MCPProcessingResult(
58
+ user_message=f"Received your message: {message_body}",
59
+ file=os.path.basename(file_path),
60
+ calendar_entries=entries,
61
+ error="Failed to generate MCP data.",
62
+ )
63
+ return result.__dict__
64
+
65
+ # Build state_data for the solver
66
+ state_data = {
67
+ "task_df_json": df.to_json(orient="split"),
68
+ "employee_count": 1,
69
+ "days_in_schedule": 365,
70
+ }
71
+ job_id = str(uuid.uuid4())
72
+ (
73
+ emp_df,
74
+ solved_task_df,
75
+ new_job_id,
76
+ status,
77
+ state_data,
78
+ ) = await ScheduleService.solve_schedule_from_state(state_data, job_id, debug=True)
79
+
80
+ # Poll for the solution until the status string does not contain 'Solving'
81
+ max_wait = 30 # seconds
82
+ interval = 0.5
83
+ waited = 0
84
+ final_task_df = None
85
+ final_status = None
86
+ final_score = None
87
+ solved = False
88
+ while waited < max_wait:
89
+ (
90
+ _,
91
+ polled_task_df,
92
+ _,
93
+ polled_status,
94
+ solved_schedule,
95
+ ) = ScheduleService.poll_solution(new_job_id, None, debug=True)
96
+ if polled_status and "Solving" not in polled_status:
97
+ final_task_df = polled_task_df
98
+ final_status = polled_status
99
+ final_score = getattr(solved_schedule, "score", None)
100
+ solved = True
101
+ break
102
+ await asyncio.sleep(interval)
103
+ waited += interval
104
+
105
+ result = MCPProcessingResult(
106
+ user_message=f"Received your message: {message_body}",
107
+ file=os.path.basename(file_path),
108
+ calendar_entries=entries,
109
+ solved_task_df=final_task_df.to_dict(orient="records")
110
+ if final_task_df is not None
111
+ else None,
112
+ status=final_status,
113
+ score=final_score,
114
+ error=None if solved else "Solver did not finish within the timeout",
115
+ )
116
+ return result.__dict__
src/services/__init__.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Services module for Yuga Planner business logic.
3
+
4
+ This module contains all the business logic separated from the UI handlers.
5
+ """
6
+
7
+ from .logging_service import LoggingService
8
+ from .schedule_service import ScheduleService
9
+ from .data_service import DataService
10
+ from .mock_projects_service import MockProjectService
11
+
12
+ __all__ = [
13
+ "LoggingService",
14
+ "ScheduleService",
15
+ "DataService",
16
+ "MockProjectService",
17
+ ]
src/services/data_service.py ADDED
@@ -0,0 +1,309 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import uuid
3
+ import logging
4
+ from datetime import datetime
5
+ from io import StringIO
6
+ from typing import Dict, List, Tuple, Union, Optional, Any
7
+
8
+ import pandas as pd
9
+
10
+ from factory.data_provider import (
11
+ generate_agent_data,
12
+ DATA_PARAMS,
13
+ TimeTableDataParameters,
14
+ SLOTS_PER_DAY,
15
+ )
16
+
17
+ from constraint_solvers.timetable.domain import (
18
+ EmployeeSchedule,
19
+ ScheduleInfo,
20
+ Task,
21
+ Employee,
22
+ )
23
+
24
+ from helpers import schedule_to_dataframe, employees_to_dataframe
25
+ from .mock_projects_service import MockProjectService
26
+
27
+
28
+ class DataService:
29
+ """Service for handling data loading and processing operations"""
30
+
31
+ @staticmethod
32
+ async def load_data_from_sources(
33
+ project_source: str,
34
+ file_obj: Any,
35
+ mock_projects: Union[str, List[str], None],
36
+ employee_count: int,
37
+ days_in_schedule: int,
38
+ debug: bool = False,
39
+ ) -> Tuple[pd.DataFrame, pd.DataFrame, str, str, Dict[str, Any]]:
40
+ """
41
+ Handle data loading from either file uploads or mock projects.
42
+
43
+ Args:
44
+ project_source: Source type ("Upload Project Files" or mock projects)
45
+ file_obj: Uploaded file object(s)
46
+ mock_projects: Selected mock project names
47
+ employee_count: Number of employees to generate
48
+ days_in_schedule: Number of days in the schedule
49
+ debug: Enable debug logging
50
+
51
+ Returns:
52
+ Tuple of (emp_df, task_df, job_id, status_message, state_data)
53
+ """
54
+ if project_source == "Upload Project Files":
55
+ files, project_source_info = DataService._process_uploaded_files(file_obj)
56
+ else:
57
+ files, project_source_info = DataService._process_mock_projects(
58
+ mock_projects
59
+ )
60
+
61
+ logging.info(f"πŸ”„ Processing {len(files)} project(s)...")
62
+
63
+ combined_tasks: List[Task] = []
64
+ combined_employees: Dict[str, Employee] = {}
65
+
66
+ # Process each file/project
67
+ for idx, single_file in enumerate(files):
68
+ project_id = DataService._derive_project_id(
69
+ project_source, single_file, mock_projects, idx
70
+ )
71
+
72
+ logging.info(f"βš™οΈ Processing project {idx+1}/{len(files)}: '{project_id}'")
73
+
74
+ schedule_part: EmployeeSchedule = await generate_agent_data(
75
+ single_file,
76
+ project_id=project_id,
77
+ employee_count=employee_count,
78
+ days_in_schedule=days_in_schedule,
79
+ )
80
+ logging.info(f"βœ… Completed processing project '{project_id}'")
81
+
82
+ # Merge employees (unique by name)
83
+ for emp in schedule_part.employees:
84
+ if emp.name not in combined_employees:
85
+ combined_employees[emp.name] = emp
86
+
87
+ # Append tasks with project id already set
88
+ combined_tasks.extend(schedule_part.tasks)
89
+
90
+ logging.info(
91
+ f"πŸ‘₯ Merging data: {len(combined_employees)} unique employees, {len(combined_tasks)} total tasks"
92
+ )
93
+
94
+ # Build final schedule
95
+ final_schedule = DataService._build_final_schedule(
96
+ combined_employees, combined_tasks, employee_count, days_in_schedule
97
+ )
98
+
99
+ # Convert to DataFrames
100
+ emp_df, task_df = DataService._convert_to_dataframes(final_schedule, debug)
101
+
102
+ # Generate job ID and state data
103
+ job_id = str(uuid.uuid4())
104
+ state_data = {
105
+ "task_df_json": task_df.to_json(orient="split"),
106
+ "employee_count": employee_count,
107
+ "days_in_schedule": days_in_schedule,
108
+ }
109
+
110
+ status_message = f"Data loaded successfully from {project_source_info}"
111
+ logging.info("πŸŽ‰ Data loading completed successfully!")
112
+
113
+ return emp_df, task_df, job_id, status_message, state_data
114
+
115
+ @staticmethod
116
+ def _process_uploaded_files(file_obj: Any) -> Tuple[List[Any], str]:
117
+ """Process uploaded files and return file list and description"""
118
+ if file_obj is None:
119
+ raise ValueError("No file uploaded. Please upload a file.")
120
+
121
+ # Support multiple files. Gradio returns a list when multiple files are selected.
122
+ files = file_obj if isinstance(file_obj, list) else [file_obj]
123
+ project_source_info = f"{len(files)} file(s)"
124
+ logging.info(f"πŸ“„ Found {len(files)} file(s) to process")
125
+
126
+ return files, project_source_info
127
+
128
+ @staticmethod
129
+ def _process_mock_projects(
130
+ mock_projects: Union[str, List[str], None]
131
+ ) -> Tuple[List[str], str]:
132
+ """Process mock projects and return file contents and description"""
133
+ if not mock_projects:
134
+ raise ValueError("Please select at least one mock project.")
135
+
136
+ # Ensure mock_projects is a list
137
+ if isinstance(mock_projects, str):
138
+ mock_projects = [mock_projects]
139
+
140
+ # Validate all selected mock projects
141
+ invalid_projects = MockProjectService.validate_mock_projects(mock_projects)
142
+ if invalid_projects:
143
+ raise ValueError(
144
+ f"Invalid mock projects selected: {', '.join(invalid_projects)}"
145
+ )
146
+
147
+ # Get file contents for mock projects
148
+ files = MockProjectService.get_mock_project_files(mock_projects)
149
+ project_source_info = (
150
+ f"{len(mock_projects)} mock project(s): {', '.join(mock_projects)}"
151
+ )
152
+ logging.info(f"πŸ“‹ Selected mock projects: {', '.join(mock_projects)}")
153
+
154
+ return files, project_source_info
155
+
156
+ @staticmethod
157
+ def _derive_project_id(
158
+ project_source: str,
159
+ single_file: Any,
160
+ mock_projects: Union[str, List[str], None],
161
+ idx: int,
162
+ ) -> str:
163
+ """Derive project ID from file or mock project"""
164
+ if project_source == "Upload Project Files":
165
+ try:
166
+ return os.path.splitext(os.path.basename(single_file.name))[0]
167
+ except AttributeError:
168
+ return f"project_{idx+1}"
169
+ else:
170
+ # For mock projects, use the mock project name as the project ID
171
+ if isinstance(mock_projects, list):
172
+ return mock_projects[idx]
173
+ return mock_projects or f"project_{idx+1}"
174
+
175
+ @staticmethod
176
+ def _build_final_schedule(
177
+ combined_employees: Dict[str, Employee],
178
+ combined_tasks: List[Task],
179
+ employee_count: Optional[int],
180
+ days_in_schedule: Optional[int],
181
+ ) -> EmployeeSchedule:
182
+ """Build the final schedule with custom parameters if provided"""
183
+ parameters: TimeTableDataParameters = DATA_PARAMS
184
+
185
+ # Override with custom parameters if provided
186
+ if employee_count is not None or days_in_schedule is not None:
187
+ logging.info(
188
+ f"βš™οΈ Customizing parameters: {employee_count} employees, {days_in_schedule} days"
189
+ )
190
+ parameters = TimeTableDataParameters(
191
+ skill_set=parameters.skill_set,
192
+ days_in_schedule=days_in_schedule
193
+ if days_in_schedule is not None
194
+ else parameters.days_in_schedule,
195
+ employee_count=employee_count
196
+ if employee_count is not None
197
+ else parameters.employee_count,
198
+ optional_skill_distribution=parameters.optional_skill_distribution,
199
+ availability_count_distribution=parameters.availability_count_distribution,
200
+ random_seed=parameters.random_seed,
201
+ )
202
+
203
+ logging.info("πŸ—οΈ Building final schedule structure...")
204
+ return EmployeeSchedule(
205
+ employees=list(combined_employees.values()),
206
+ tasks=combined_tasks,
207
+ schedule_info=ScheduleInfo(
208
+ total_slots=parameters.days_in_schedule * SLOTS_PER_DAY
209
+ ),
210
+ )
211
+
212
+ @staticmethod
213
+ def _convert_to_dataframes(
214
+ schedule: EmployeeSchedule, debug: bool = False
215
+ ) -> Tuple[pd.DataFrame, pd.DataFrame]:
216
+ """Convert schedule to DataFrames for display"""
217
+ logging.info("πŸ“Š Converting to data tables...")
218
+ emp_df: pd.DataFrame = employees_to_dataframe(schedule)
219
+ task_df: pd.DataFrame = schedule_to_dataframe(schedule)
220
+
221
+ # Sort by project and sequence to maintain original order
222
+ task_df = task_df[
223
+ [
224
+ "Project",
225
+ "Sequence",
226
+ "Employee",
227
+ "Task",
228
+ "Start",
229
+ "End",
230
+ "Duration (hours)",
231
+ "Required Skill",
232
+ ]
233
+ ].sort_values(["Project", "Sequence"])
234
+
235
+ if debug:
236
+ # Log sequence numbers for debugging
237
+ logging.info("Task sequence numbers after load_data:")
238
+ for _, row in task_df.iterrows():
239
+ logging.info(
240
+ f"Project: {row['Project']}, Sequence: {row['Sequence']}, Task: {row['Task']}"
241
+ )
242
+ logging.info("Task DataFrame being set in load_data: %s", task_df.head())
243
+
244
+ return emp_df, task_df
245
+
246
+ @staticmethod
247
+ def parse_task_data_from_json(
248
+ task_df_json: str, debug: bool = False
249
+ ) -> pd.DataFrame:
250
+ """
251
+ Parse task data from JSON string.
252
+
253
+ Args:
254
+ task_df_json: JSON string containing task data
255
+ debug: Enable debug logging
256
+
257
+ Returns:
258
+ DataFrame containing task data
259
+ """
260
+ if not task_df_json:
261
+ raise ValueError("No task_df_json provided")
262
+
263
+ try:
264
+ logging.info("πŸ“‹ Parsing task data from JSON...")
265
+ task_df: pd.DataFrame = pd.read_json(StringIO(task_df_json), orient="split")
266
+ logging.info(f"πŸ“Š Found {len(task_df)} tasks to schedule")
267
+
268
+ if debug:
269
+ logging.info("Task sequence numbers from JSON:")
270
+ for _, row in task_df.iterrows():
271
+ logging.info(
272
+ f"Project: {row.get('Project', 'N/A')}, Sequence: {row.get('Sequence', 'N/A')}, Task: {row['Task']}"
273
+ )
274
+
275
+ return task_df
276
+ except Exception as e:
277
+ logging.error(f"❌ Error parsing task_df_json: {e}")
278
+ raise ValueError(f"Error parsing task data: {str(e)}")
279
+
280
+ @staticmethod
281
+ def convert_dataframe_to_tasks(task_df: pd.DataFrame) -> List[Task]:
282
+ """
283
+ Convert a DataFrame to a list of Task objects.
284
+
285
+ Args:
286
+ task_df: DataFrame containing task data
287
+
288
+ Returns:
289
+ List of Task objects
290
+ """
291
+ logging.info("πŸ†” Generating task IDs and converting to solver format...")
292
+ ids = (str(i) for i in range(len(task_df)))
293
+
294
+ tasks = []
295
+ for _, row in task_df.iterrows():
296
+ tasks.append(
297
+ Task(
298
+ id=next(ids),
299
+ description=row["Task"],
300
+ duration_slots=int(float(row["Duration (hours)"]) * 2),
301
+ start_slot=0,
302
+ required_skill=row["Required Skill"],
303
+ project_id=row.get("Project", ""),
304
+ sequence_number=int(row.get("Sequence", 0)),
305
+ )
306
+ )
307
+
308
+ logging.info(f"βœ… Converted {len(tasks)} tasks for solver")
309
+ return tasks
src/services/logging_service.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import threading
3
+ from datetime import datetime
4
+ from typing import List
5
+
6
+
7
+ class LogCapture:
8
+ """Helper class to capture logs for streaming to UI"""
9
+
10
+ def __init__(self):
11
+ self.logs: List[str] = []
12
+ self.lock = threading.Lock()
13
+
14
+ def add_log(self, message: str) -> None:
15
+ """Add a log message with timestamp"""
16
+ with self.lock:
17
+ timestamp = datetime.now().strftime("%H:%M:%S")
18
+ self.logs.append(f"[{timestamp}] {message}")
19
+
20
+ def get_logs(self) -> str:
21
+ """Get all accumulated logs as a single string"""
22
+ with self.lock:
23
+ return "\n".join(self.logs)
24
+
25
+ def clear(self) -> None:
26
+ """Clear all accumulated logs"""
27
+ with self.lock:
28
+ self.logs.clear()
29
+
30
+
31
+ class StreamingLogHandler(logging.Handler):
32
+ """Custom log handler that captures logs for UI streaming"""
33
+
34
+ def __init__(self, log_capture: LogCapture):
35
+ super().__init__()
36
+ self.log_capture = log_capture
37
+
38
+ def emit(self, record: logging.LogRecord) -> None:
39
+ try:
40
+ msg = self.format(record)
41
+ self.log_capture.add_log(msg)
42
+ except Exception:
43
+ self.handleError(record)
44
+
45
+
46
+ class LoggingService:
47
+ """Service for managing log streaming and capture for the UI"""
48
+
49
+ def __init__(self):
50
+ self.log_capture = LogCapture()
51
+ self._handler_added = False
52
+
53
+ def setup_log_streaming(self) -> None:
54
+ """Set up log streaming to capture logs for UI"""
55
+ logger = logging.getLogger()
56
+
57
+ # Remove existing handlers to avoid duplicate logs
58
+ for handler in logger.handlers[:]:
59
+ if isinstance(handler, StreamingLogHandler):
60
+ logger.removeHandler(handler)
61
+
62
+ # Add our streaming handler
63
+ stream_handler = StreamingLogHandler(self.log_capture)
64
+ stream_handler.setLevel(logging.INFO)
65
+ formatter = logging.Formatter("%(levelname)s - %(message)s")
66
+ stream_handler.setFormatter(formatter)
67
+ logger.addHandler(stream_handler)
68
+ self._handler_added = True
69
+
70
+ def get_streaming_logs(self) -> str:
71
+ """Get accumulated logs for streaming to UI"""
72
+ return self.log_capture.get_logs()
73
+
74
+ def clear_streaming_logs(self) -> None:
75
+ """Clear accumulated logs"""
76
+ self.log_capture.clear()
77
+
78
+ def is_setup(self) -> bool:
79
+ """Check if log streaming is set up"""
80
+ return self._handler_added
src/services/mock_projects_service.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Union
2
+ from domain import MOCK_PROJECTS
3
+
4
+
5
+ class MockProjectService:
6
+ """Service for handling project-related operations"""
7
+
8
+ @staticmethod
9
+ def show_mock_project_content(project_names: Union[str, List[str]]) -> str:
10
+ """
11
+ Display the content of selected mock projects.
12
+
13
+ Args:
14
+ project_names: Single project name or list of project names
15
+
16
+ Returns:
17
+ Formatted content of the selected projects
18
+ """
19
+ if not project_names:
20
+ return "No projects selected."
21
+
22
+ # Handle both single string and list of strings
23
+ if isinstance(project_names, str):
24
+ project_names = [project_names]
25
+
26
+ content_parts = []
27
+ for project_name in project_names:
28
+ if project_name in MOCK_PROJECTS:
29
+ content_parts.append(
30
+ f"=== {project_name.upper()} ===\n\n{MOCK_PROJECTS[project_name]}"
31
+ )
32
+ else:
33
+ content_parts.append(
34
+ f"=== {project_name.upper()} ===\n\nProject not found."
35
+ )
36
+
37
+ return (
38
+ "\n\n" + "=" * 50 + "\n\n".join(content_parts)
39
+ if content_parts
40
+ else "No valid projects selected."
41
+ )
42
+
43
+ @staticmethod
44
+ def validate_mock_projects(mock_projects: Union[str, List[str]]) -> List[str]:
45
+ """
46
+ Validate mock project selections and return list of invalid projects.
47
+
48
+ Args:
49
+ mock_projects: Single project name or list of project names
50
+
51
+ Returns:
52
+ List of invalid project names (empty if all valid)
53
+ """
54
+ if not mock_projects:
55
+ return []
56
+
57
+ if isinstance(mock_projects, str):
58
+ mock_projects = [mock_projects]
59
+
60
+ return [p for p in mock_projects if p not in MOCK_PROJECTS]
61
+
62
+ @staticmethod
63
+ def get_mock_project_files(mock_projects: Union[str, List[str]]) -> List[str]:
64
+ """
65
+ Get file contents for selected mock projects.
66
+
67
+ Args:
68
+ mock_projects: Single project name or list of project names
69
+
70
+ Returns:
71
+ List of project file contents
72
+ """
73
+ if isinstance(mock_projects, str):
74
+ mock_projects = [mock_projects]
75
+
76
+ return [
77
+ MOCK_PROJECTS[project]
78
+ for project in mock_projects
79
+ if project in MOCK_PROJECTS
80
+ ]
81
+
82
+ @staticmethod
83
+ def get_available_project_names() -> List[str]:
84
+ """
85
+ Get list of available mock project names.
86
+
87
+ Returns:
88
+ List of available project names
89
+ """
90
+ return list(MOCK_PROJECTS.keys())
src/services/schedule_service.py ADDED
@@ -0,0 +1,345 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import uuid
2
+ import logging
3
+ import random
4
+ from datetime import datetime
5
+ from typing import Tuple, Dict, Any, Optional
6
+
7
+ import pandas as pd
8
+ import gradio as gr
9
+
10
+ from state import app_state
11
+ from constraint_solvers.timetable.solver import solver_manager
12
+ from factory.data_provider import (
13
+ generate_employees,
14
+ generate_employee_availability,
15
+ DATA_PARAMS,
16
+ TimeTableDataParameters,
17
+ SLOTS_PER_DAY,
18
+ )
19
+
20
+ from constraint_solvers.timetable.domain import (
21
+ EmployeeSchedule,
22
+ ScheduleInfo,
23
+ Task,
24
+ )
25
+
26
+ from helpers import schedule_to_dataframe, employees_to_dataframe
27
+ from .data_service import DataService
28
+ from constraint_solvers.timetable.analysis import ConstraintViolationAnalyzer
29
+
30
+
31
+ class ScheduleService:
32
+ """Service for handling schedule solving and management operations"""
33
+
34
+ @staticmethod
35
+ async def solve_schedule_from_state(
36
+ state_data: Dict[str, Any], job_id: str, debug: bool = False
37
+ ) -> Tuple[pd.DataFrame, pd.DataFrame, str, str, Dict[str, Any]]:
38
+ """
39
+ Solve a schedule from state data.
40
+
41
+ Args:
42
+ state_data: State data containing task information and parameters
43
+ job_id: Job identifier for tracking
44
+ debug: Enable debug logging
45
+
46
+ Returns:
47
+ Tuple of (emp_df, task_df, new_job_id, status_message, state_data)
48
+ """
49
+ logging.info(f"πŸ”§ solve_schedule_from_state called with job_id: {job_id}")
50
+ logging.info("πŸš€ Starting solve process...")
51
+
52
+ # Set debug environment variable for constraint system
53
+ import os
54
+
55
+ if debug:
56
+ os.environ["YUGA_DEBUG"] = "true"
57
+ else:
58
+ os.environ["YUGA_DEBUG"] = "false"
59
+
60
+ # Handle both old format (string) and new format (dict) for backward compatibility
61
+ if isinstance(state_data, str):
62
+ task_df_json = state_data
63
+ employee_count = None
64
+ days_in_schedule = None
65
+ elif isinstance(state_data, dict):
66
+ task_df_json = state_data.get("task_df_json")
67
+ employee_count = state_data.get("employee_count")
68
+ days_in_schedule = state_data.get("days_in_schedule")
69
+ else:
70
+ task_df_json = None
71
+ employee_count = None
72
+ days_in_schedule = None
73
+
74
+ if not task_df_json:
75
+ logging.warning("❌ No task_df_json provided to solve_schedule_from_state")
76
+ return (
77
+ gr.update(),
78
+ gr.update(),
79
+ None,
80
+ "No schedule to solve. Please load data first using the 'Load Data' button.",
81
+ None,
82
+ )
83
+
84
+ try:
85
+ # Parse task data
86
+ task_df = DataService.parse_task_data_from_json(task_df_json, debug)
87
+
88
+ # Convert DataFrame to tasks
89
+ tasks = DataService.convert_dataframe_to_tasks(task_df)
90
+
91
+ # Debug: Log task information if debug is enabled
92
+ if debug:
93
+ logging.info("πŸ” DEBUG: Task information for constraint checking:")
94
+ for task in tasks:
95
+ logging.info(
96
+ f" Task ID: {task.id}, Project: '{task.project_id}', "
97
+ f"Sequence: {task.sequence_number}, Description: '{task.description[:30]}...'"
98
+ )
99
+
100
+ # Generate schedule
101
+ schedule = ScheduleService._generate_schedule_for_solving(
102
+ tasks, employee_count, days_in_schedule
103
+ )
104
+
105
+ # Start solving
106
+ (
107
+ emp_df,
108
+ solved_task_df,
109
+ new_job_id,
110
+ status,
111
+ ) = await ScheduleService._solve_schedule(schedule, debug)
112
+
113
+ logging.info("πŸ“ˆ Solver process initiated successfully")
114
+ return emp_df, solved_task_df, new_job_id, status, state_data
115
+
116
+ except Exception as e:
117
+ logging.error(f"Error in solve_schedule_from_state: {e}")
118
+ return (
119
+ gr.update(),
120
+ gr.update(),
121
+ None,
122
+ f"Error solving schedule: {str(e)}",
123
+ state_data,
124
+ )
125
+
126
+ @staticmethod
127
+ def _generate_schedule_for_solving(
128
+ tasks: list, employee_count: Optional[int], days_in_schedule: Optional[int]
129
+ ) -> EmployeeSchedule:
130
+ """Generate a complete schedule ready for solving"""
131
+ parameters: TimeTableDataParameters = DATA_PARAMS
132
+
133
+ # Override parameters if provided from UI
134
+ if employee_count is not None or days_in_schedule is not None:
135
+ parameters = TimeTableDataParameters(
136
+ skill_set=parameters.skill_set,
137
+ days_in_schedule=days_in_schedule
138
+ if days_in_schedule is not None
139
+ else parameters.days_in_schedule,
140
+ employee_count=employee_count
141
+ if employee_count is not None
142
+ else parameters.employee_count,
143
+ optional_skill_distribution=parameters.optional_skill_distribution,
144
+ availability_count_distribution=parameters.availability_count_distribution,
145
+ random_seed=parameters.random_seed,
146
+ )
147
+
148
+ logging.info("πŸ‘₯ Generating employees and availability...")
149
+ start_date = datetime.now().date()
150
+ randomizer = random.Random(parameters.random_seed)
151
+ employees = generate_employees(parameters, randomizer)
152
+ logging.info(f"βœ… Generated {len(employees)} employees")
153
+
154
+ # Generate employee availability preferences
155
+ logging.info("πŸ“… Generating employee availability preferences...")
156
+ generate_employee_availability(employees, parameters, start_date, randomizer)
157
+ logging.info("βœ… Employee availability generated")
158
+
159
+ return EmployeeSchedule(
160
+ employees=employees,
161
+ tasks=tasks,
162
+ schedule_info=ScheduleInfo(
163
+ total_slots=parameters.days_in_schedule * SLOTS_PER_DAY
164
+ ),
165
+ )
166
+
167
+ @staticmethod
168
+ async def _solve_schedule(
169
+ schedule: EmployeeSchedule, debug: bool = False
170
+ ) -> Tuple[pd.DataFrame, pd.DataFrame, str, str]:
171
+ """
172
+ Solve the schedule and return the dataframes and job_id.
173
+
174
+ Args:
175
+ schedule: The schedule to solve
176
+ debug: Enable debug logging
177
+
178
+ Returns:
179
+ Tuple of (emp_df, task_df, job_id, status_message)
180
+ """
181
+ if schedule is None:
182
+ return None, None, None, "No schedule to solve. Please load data first."
183
+
184
+ job_id: str = str(uuid.uuid4())
185
+
186
+ # Start solving asynchronously
187
+ def listener(solution):
188
+ app_state.add_solved_schedule(job_id, solution)
189
+
190
+ solver_manager.solve_and_listen(job_id, schedule, listener)
191
+
192
+ emp_df = employees_to_dataframe(schedule)
193
+ task_df = schedule_to_dataframe(schedule)
194
+
195
+ task_df = task_df[
196
+ [
197
+ "Project",
198
+ "Sequence",
199
+ "Employee",
200
+ "Task",
201
+ "Start",
202
+ "End",
203
+ "Duration (hours)",
204
+ "Required Skill",
205
+ ]
206
+ ].sort_values(["Project", "Sequence"])
207
+
208
+ return emp_df, task_df, job_id, "Solving..."
209
+
210
+ @staticmethod
211
+ def poll_solution(
212
+ job_id: str, schedule: EmployeeSchedule, debug: bool = False
213
+ ) -> Tuple[pd.DataFrame, pd.DataFrame, str, str, object]:
214
+ """
215
+ Poll for a solution for a given job_id.
216
+
217
+ Args:
218
+ job_id: The job_id to poll for
219
+ schedule: The current schedule state
220
+ debug: Whether to enable debug logging
221
+
222
+ Returns:
223
+ Tuple of (emp_df, task_df, job_id, status_message, schedule)
224
+ """
225
+ if job_id and app_state.has_solved_schedule(job_id):
226
+ solved_schedule: EmployeeSchedule = app_state.get_solved_schedule(job_id)
227
+
228
+ emp_df: pd.DataFrame = employees_to_dataframe(solved_schedule)
229
+ task_df: pd.DataFrame = schedule_to_dataframe(solved_schedule)
230
+
231
+ if debug:
232
+ # Log solved task order for debugging
233
+ logging.info("Solved task order:")
234
+ for _, row in task_df.iterrows():
235
+ logging.info(
236
+ f"Project: {row['Project']}, Sequence: {row['Sequence']}, Task: {row['Task'][:30]}, Start: {row['Start']}"
237
+ )
238
+
239
+ task_df = task_df[
240
+ [
241
+ "Project",
242
+ "Sequence",
243
+ "Employee",
244
+ "Task",
245
+ "Start",
246
+ "End",
247
+ "Duration (hours)",
248
+ "Required Skill",
249
+ ]
250
+ ].sort_values(["Start"])
251
+
252
+ # Check if hard constraints are violated (infeasible solution)
253
+ status_message = ScheduleService._generate_status_message(solved_schedule)
254
+
255
+ return emp_df, task_df, job_id, status_message, solved_schedule
256
+
257
+ return None, None, job_id, "Solving...", schedule
258
+
259
+ @staticmethod
260
+ async def auto_poll(
261
+ job_id: str, llm_output: dict, debug: bool = False
262
+ ) -> Tuple[pd.DataFrame, pd.DataFrame, str, str, dict]:
263
+ """
264
+ Poll for updates asynchronously.
265
+
266
+ Args:
267
+ job_id: Job identifier to poll for
268
+ llm_output: Current LLM output state
269
+ debug: Enable debug logging
270
+
271
+ Returns:
272
+ Tuple of (emp_df, task_df, job_id, status_message, llm_output)
273
+ """
274
+ try:
275
+ if job_id and app_state.has_solved_schedule(job_id):
276
+ schedule = app_state.get_solved_schedule(job_id)
277
+ emp_df = employees_to_dataframe(schedule)
278
+ task_df = schedule_to_dataframe(schedule)
279
+
280
+ # Sort tasks by start time for display
281
+ task_df = task_df.sort_values("Start")
282
+
283
+ if debug:
284
+ logging.info(f"Polling for job {job_id}")
285
+ logging.info(f"Current schedule state: {task_df.head()}")
286
+
287
+ # Generate status message based on constraint satisfaction
288
+ status_message = ScheduleService._generate_status_message(schedule)
289
+
290
+ return emp_df, task_df, job_id, status_message, llm_output
291
+
292
+ except Exception as e:
293
+ logging.error(f"Error polling: {e}")
294
+ return (
295
+ gr.update(),
296
+ gr.update(),
297
+ job_id,
298
+ f"Error polling: {str(e)}",
299
+ llm_output,
300
+ )
301
+
302
+ return (
303
+ gr.update(),
304
+ gr.update(),
305
+ None,
306
+ "No updates",
307
+ llm_output,
308
+ )
309
+
310
+ @staticmethod
311
+ def _generate_status_message(schedule: EmployeeSchedule) -> str:
312
+ """Generate status message based on schedule score and constraint violations"""
313
+ status_message = "Solution updated"
314
+
315
+ if schedule.score is not None:
316
+ hard_score = schedule.score.hard_score
317
+ if hard_score < 0:
318
+ # Hard constraints are violated - the problem is infeasible
319
+ violation_count = abs(int(hard_score))
320
+ violation_details = (
321
+ ConstraintViolationAnalyzer.analyze_constraint_violations(schedule)
322
+ )
323
+ suggestions = ConstraintViolationAnalyzer.generate_suggestions(schedule)
324
+ suggestion_text = "\n".join(f"β€’ {s}" for s in suggestions)
325
+
326
+ status_message = (
327
+ f"⚠️ CONSTRAINTS VIOLATED: {violation_count} hard constraint(s) could not be satisfied. "
328
+ f"The schedule is not feasible.\n\n{violation_details}\n\nSuggestions:\n{suggestion_text}"
329
+ )
330
+ logging.warning(
331
+ f"Infeasible solution detected. Hard score: {hard_score}"
332
+ )
333
+ else:
334
+ soft_score = schedule.score.soft_score
335
+ status_message = f"βœ… Solved successfully! Score: {hard_score}/{soft_score} (hard/soft)"
336
+ logging.info(
337
+ f"Feasible solution found. Score: {hard_score}/{soft_score}"
338
+ )
339
+
340
+ return status_message
341
+
342
+ @staticmethod
343
+ def start_timer(job_id: str, llm_output: Any) -> gr.Timer:
344
+ """Start a timer for polling (Gradio-specific functionality)"""
345
+ return gr.Timer(active=True)
src/state.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict
2
+ from constraint_solvers.timetable.domain import EmployeeSchedule
3
+
4
+
5
+ class AppState:
6
+ """Central state management for the Yuga Planner application."""
7
+
8
+ def __init__(self):
9
+ self._solved_schedules: Dict[str, EmployeeSchedule] = {}
10
+
11
+ @property
12
+ def solved_schedules(self) -> Dict[str, EmployeeSchedule]:
13
+ """Get the solved schedules dictionary."""
14
+ return self._solved_schedules
15
+
16
+ def add_solved_schedule(self, key: str, schedule: EmployeeSchedule) -> None:
17
+ """Add a solved schedule to the state."""
18
+ self._solved_schedules[key] = schedule
19
+
20
+ def get_solved_schedule(self, key: str) -> EmployeeSchedule | None:
21
+ """Get a specific solved schedule by key."""
22
+ return self._solved_schedules.get(key)
23
+
24
+ def clear_solved_schedules(self) -> None:
25
+ """Clear all solved schedules."""
26
+ self._solved_schedules.clear()
27
+
28
+ def has_solved_schedule(self, key: str) -> bool:
29
+ """Check if a solved schedule exists for the given key."""
30
+ return key in self._solved_schedules
31
+
32
+
33
+ # Global app state instance
34
+ app_state = AppState()
src/utils/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ """
2
+ Utils package initialization.
3
+ """
src/utils/extract_calendar.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from icalendar import Calendar
2
+
3
+
4
+ def extract_ical_entries(file_bytes):
5
+ try:
6
+ cal = Calendar.from_ical(file_bytes)
7
+ entries = []
8
+ for component in cal.walk():
9
+ if component.name == "VEVENT":
10
+ summary = str(component.get("summary", ""))
11
+ dtstart = component.get("dtstart", "")
12
+ dtend = component.get("dtend", "")
13
+
14
+ def to_iso(val):
15
+ if hasattr(val, "dt"):
16
+ dt = val.dt
17
+ if hasattr(dt, "isoformat"):
18
+ return dt.isoformat()
19
+ return str(dt)
20
+ return str(val)
21
+
22
+ entries.append(
23
+ {
24
+ "summary": summary,
25
+ "dtstart": to_iso(dtstart),
26
+ "dtend": to_iso(dtend),
27
+ }
28
+ )
29
+ return entries, None
30
+ except Exception as e:
31
+ return None, str(e)
src/utils/load_secrets.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, logging
2
+
3
+ logging.basicConfig(level=logging.INFO)
4
+ logger = logging.getLogger(__name__)
5
+
6
+ ### SECRETS ###
7
+ def load_secrets(secrets_file: str):
8
+ """
9
+ Load secrets from Python file into environment variables.
10
+
11
+ Args:
12
+ secrets_file (str): Path to the Python file containing secrets
13
+
14
+ Returns:
15
+ bool: True if secrets were loaded successfully
16
+ """
17
+ try:
18
+ # Import secrets from the specified file
19
+ import importlib.util
20
+
21
+ spec = importlib.util.spec_from_file_location("secrets", secrets_file)
22
+ secrets = importlib.util.module_from_spec(spec)
23
+ spec.loader.exec_module(secrets)
24
+
25
+ # Set environment variables
26
+ os.environ["NEBIUS_API_KEY"] = secrets.NEBIUS_API_KEY
27
+ os.environ["NEBIUS_MODEL"] = secrets.NEBIUS_MODEL
28
+ return True
29
+
30
+ except Exception as e:
31
+ logger.error(f"Failed to load secrets from {secrets_file}: {str(e)}")
32
+ return False
src/utils/markdown_analyzer.py ADDED
@@ -0,0 +1,686 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Markdown Analyzer Library
3
+
4
+ This library provides a comprehensive Markdown parsing and analysis system. It consists of three main components:
5
+
6
+ 1. MarkdownParser: Converts Markdown text into a stream of tokens representing different structural elements
7
+ (headers, paragraphs, lists, code blocks, etc.). It handles both block-level and inline elements.
8
+
9
+ 2. InlineParser: Processes inline Markdown elements within block tokens, such as:
10
+ - Links and images
11
+ - Inline code
12
+ - Emphasis (bold, italic)
13
+ - Footnotes
14
+ - HTML inline elements
15
+
16
+ 3. MarkdownAnalyzer: The main interface that combines parsing and analysis. It:
17
+ - Parses the input text into tokens
18
+ - Processes inline elements within tokens
19
+ - Provides methods to identify and analyze different Markdown elements
20
+ - Generates statistics about the document structure
21
+
22
+ Usage:
23
+ analyzer = MarkdownAnalyzer(markdown_text)
24
+ analysis = analyzer.analyze() # Get document statistics
25
+ headers = analyzer.identify_headers() # Get all headers
26
+ links = analyzer.identify_links() # Get all links
27
+ # etc.
28
+
29
+ The library supports standard Markdown features including:
30
+ - Headers (ATX and Setext style)
31
+ - Lists (ordered, unordered, and task lists)
32
+ - Code blocks (fenced and inline)
33
+ - Blockquotes
34
+ - Tables
35
+ - Links and images
36
+ - Footnotes
37
+ - HTML blocks and inline elements
38
+ """
39
+
40
+ import re
41
+ from collections import defaultdict
42
+
43
+
44
+ ### MAIN INTERFACE ###
45
+ class MarkdownAnalyzer:
46
+ # def __init__(self, file_path, encoding='utf-8'):
47
+ def __init__(self, text):
48
+ # with open(file_path, 'r', encoding=encoding) as f:
49
+ # self.text = f.read()
50
+ self.text = text
51
+ parser = MarkdownParser(self.text)
52
+ self.tokens = parser.parse()
53
+ self.references = parser.references
54
+ self.footnotes = parser.footnotes
55
+ self.inline_parser = InlineParser(
56
+ references=self.references, footnotes=self.footnotes
57
+ )
58
+
59
+ self._parse_inline_tokens()
60
+
61
+ def _parse_inline_tokens(self):
62
+ inline_types = ("paragraph", "header", "blockquote")
63
+ for token in self.tokens:
64
+ if token.type in inline_types and token.content:
65
+ inline_data = self.inline_parser.parse_inline(token.content)
66
+ token.meta.update(inline_data)
67
+
68
+ def identify_headers(self):
69
+ result = defaultdict(list)
70
+ for token in self.tokens:
71
+ if token.type == "header":
72
+ result["Header"].append(
73
+ {"line": token.line, "level": token.level, "text": token.content}
74
+ )
75
+ return dict(result)
76
+
77
+ def identify_paragraphs(self):
78
+ result = defaultdict(list)
79
+ for token in self.tokens:
80
+ if token.type == "paragraph":
81
+ result["Paragraph"].append(token.content)
82
+ return dict(result)
83
+
84
+ def identify_blockquotes(self):
85
+ result = defaultdict(list)
86
+ for token in self.tokens:
87
+ if token.type == "blockquote":
88
+ result["Blockquote"].append(token.content)
89
+ return dict(result)
90
+
91
+ def identify_code_blocks(self):
92
+ result = defaultdict(list)
93
+ for token in self.tokens:
94
+ if token.type == "code":
95
+ result["Code block"].append(
96
+ {
97
+ "start_line": token.line,
98
+ "content": token.content,
99
+ "language": token.meta.get("language"),
100
+ }
101
+ )
102
+ return dict(result)
103
+
104
+ def identify_lists(self):
105
+ result = defaultdict(list)
106
+ for token in self.tokens:
107
+ if token.type == "ordered_list":
108
+ result["Ordered list"].append(token.meta["items"])
109
+ elif token.type == "unordered_list":
110
+ result["Unordered list"].append(token.meta["items"])
111
+ return dict(result)
112
+
113
+ def identify_tables(self):
114
+ result = defaultdict(list)
115
+ for token in self.tokens:
116
+ if token.type == "table":
117
+ result["Table"].append(
118
+ {"header": token.meta["header"], "rows": token.meta["rows"]}
119
+ )
120
+ return dict(result)
121
+
122
+ def identify_links(self):
123
+ result = defaultdict(list)
124
+ for token in self.tokens:
125
+ if "text_links" in token.meta:
126
+ for l in token.meta["text_links"]:
127
+ result["Text link"].append(
128
+ {"line": token.line, "text": l["text"], "url": l["url"]}
129
+ )
130
+ if "image_links" in token.meta:
131
+ for img in token.meta["image_links"]:
132
+ result["Image link"].append(
133
+ {
134
+ "line": token.line,
135
+ "alt_text": img["alt_text"],
136
+ "url": img["url"],
137
+ }
138
+ )
139
+ return dict(result)
140
+
141
+ def identify_footnotes(self):
142
+ result = []
143
+ seen = set()
144
+ for token in self.tokens:
145
+ if "footnotes_used" in token.meta:
146
+ for fn in token.meta["footnotes_used"]:
147
+ key = (fn["id"], fn["content"])
148
+ if key not in seen:
149
+ seen.add(key)
150
+ result.append(
151
+ {
152
+ "line": token.line,
153
+ "id": fn["id"],
154
+ "content": fn["content"],
155
+ }
156
+ )
157
+ return result
158
+
159
+ def identify_inline_code(self):
160
+ codes = []
161
+ for token in self.tokens:
162
+ if "inline_code" in token.meta:
163
+ for c in token.meta["inline_code"]:
164
+ codes.append({"line": token.line, "code": c})
165
+ return codes
166
+
167
+ def identify_emphasis(self):
168
+ ems = []
169
+ for token in self.tokens:
170
+ if "emphasis" in token.meta:
171
+ for e in token.meta["emphasis"]:
172
+ ems.append({"line": token.line, "text": e})
173
+ return ems
174
+
175
+ def identify_task_items(self):
176
+ tasks = []
177
+ for token in self.tokens:
178
+ if token.type in ("ordered_list", "unordered_list"):
179
+ for it in token.meta["items"]:
180
+ if it.get("task_item"):
181
+ tasks.append(
182
+ {
183
+ "line": token.line,
184
+ "text": it["text"],
185
+ "checked": it["checked"],
186
+ }
187
+ )
188
+ return tasks
189
+
190
+ def identify_html_blocks(self):
191
+ # Gets HTML blocks
192
+ result = []
193
+ for token in self.tokens:
194
+ if token.type == "html_block":
195
+ result.append({"line": token.line, "content": token.content})
196
+ return result
197
+
198
+ def identify_html_inline(self):
199
+ # Gets HTML tags from inline tokens
200
+ result = []
201
+ inline_types = ("paragraph", "header", "blockquote")
202
+ for token in self.tokens:
203
+ if token.type in inline_types and "html_inline" in token.meta:
204
+ for h in token.meta["html_inline"]:
205
+ result.append({"line": token.line, "html": h})
206
+ return result
207
+
208
+ def count_words(self):
209
+ words = self.text.split()
210
+ return len(words)
211
+
212
+ def count_characters(self):
213
+ characters = [char for char in self.text if not char.isspace()]
214
+ return len(characters)
215
+
216
+ def analyze(self):
217
+ headers = self.identify_headers().get("Header", [])
218
+ paragraphs = self.identify_paragraphs().get("Paragraph", [])
219
+ blockquotes = self.identify_blockquotes().get("Blockquote", [])
220
+ code_blocks = self.identify_code_blocks().get("Code block", [])
221
+ lists = self.identify_lists()
222
+ ordered_lists = lists.get("Ordered list", [])
223
+ unordered_lists = lists.get("Unordered list", [])
224
+ tables = self.identify_tables().get("Table", [])
225
+ html_blocks = self.identify_html_blocks()
226
+ html_inline = self.identify_html_inline()
227
+
228
+ analysis = {
229
+ "headers": len(headers),
230
+ "paragraphs": len(paragraphs),
231
+ "blockquotes": len(blockquotes),
232
+ "code_blocks": len(code_blocks),
233
+ "ordered_lists": sum(len(l) for l in ordered_lists),
234
+ "unordered_lists": sum(len(l) for l in unordered_lists),
235
+ "tables": len(tables),
236
+ "html_blocks": len(html_blocks),
237
+ "html_inline_count": len(html_inline),
238
+ "words": self.count_words(),
239
+ "characters": self.count_characters(),
240
+ }
241
+ return analysis
242
+
243
+
244
+ ### PARSING CLASSES ###
245
+ class BlockToken:
246
+ """Represents a block-level Markdown element with its type, content, and metadata."""
247
+
248
+ def __init__(self, type_, content="", level=None, meta=None, line=None):
249
+ self.type = type_ # Type of block (header, paragraph, code, etc.)
250
+ self.content = content # The actual content of the block
251
+ self.level = level # Used for headers (h1-h6) and list indentation
252
+ self.meta = meta or {} # Additional metadata (language for code blocks, etc.)
253
+ self.line = line # Line number in the original document
254
+
255
+
256
+ class InlineParser:
257
+ # Regular expressions for matching inline Markdown elements
258
+ IMAGE_OR_LINK_RE = re.compile(
259
+ r"(!?\[([^\]]*)\])(\(([^\)]+)\)|\[([^\]]+)\])"
260
+ ) # Matches [text](url) or ![alt](url)
261
+ CODE_INLINE_RE = re.compile(r"`([^`]+)`") # Matches `code`
262
+ EMPHASIS_RE = re.compile(
263
+ r"(\*\*|__)(.*?)\1|\*(.*?)\*|_(.*?)_"
264
+ ) # Matches **bold**, *italic*, _underline_
265
+ FOOTNOTE_RE = re.compile(r"\[\^([^\]]+)\]") # Matches [^footnote]
266
+ HTML_INLINE_RE = re.compile(r"<[a-zA-Z/][^>]*>") # Matches HTML tags
267
+ HTML_INLINE_BLOCK_RE = re.compile(
268
+ r"<([a-zA-Z]+)([^>]*)>(.*?)</\1>", re.DOTALL
269
+ ) # Matches HTML blocks with content
270
+
271
+ def __init__(self, references=None, footnotes=None):
272
+ # Initialize with optional reference links and footnotes from the document
273
+ self.references = references or {} # For [text][ref] style links
274
+ self.footnotes = footnotes or {} # For [^footnote] style references
275
+
276
+ def parse_inline(self, text):
277
+ """Parse inline Markdown elements within a block of text."""
278
+ result = {
279
+ "text_links": [], # Regular [text](url) links
280
+ "image_links": [], # ![alt](url) images
281
+ "inline_code": [], # `code` blocks
282
+ "emphasis": [], # **bold**, *italic* text
283
+ "footnotes_used": [], # [^footnote] references
284
+ "html_inline": [], # HTML tags and blocks
285
+ }
286
+
287
+ # Process footnotes first to avoid conflicts with other patterns
288
+ used_footnotes = set()
289
+ for fm in self.FOOTNOTE_RE.finditer(text):
290
+ fid = fm.group(1)
291
+ if fid in self.footnotes and fid not in used_footnotes:
292
+ used_footnotes.add(fid)
293
+ result["footnotes_used"].append(
294
+ {"id": fid, "content": self.footnotes[fid]}
295
+ )
296
+
297
+ # Find inline code blocks
298
+ for cm in self.CODE_INLINE_RE.finditer(text):
299
+ code = cm.group(1)
300
+ result["inline_code"].append(code)
301
+
302
+ # Find emphasized text (bold, italic, underline)
303
+ for em_match in self.EMPHASIS_RE.finditer(text):
304
+ emphasized_text = (
305
+ em_match.group(2) or em_match.group(3) or em_match.group(4)
306
+ )
307
+ if emphasized_text:
308
+ result["emphasis"].append(emphasized_text)
309
+
310
+ # Process HTML blocks first to avoid conflicts with other patterns
311
+ temp_text = text
312
+ for block_match in self.HTML_INLINE_BLOCK_RE.finditer(text):
313
+ html_content = block_match.group(0)
314
+ result["html_inline"].append(html_content)
315
+ temp_text = temp_text.replace(html_content, "")
316
+
317
+ # Process links and images
318
+ for mm in self.IMAGE_OR_LINK_RE.finditer(temp_text):
319
+ prefix = mm.group(1) # The [text] or ![alt] part
320
+ inner_text = mm.group(2) # The text inside []
321
+ url = mm.group(4) # The (url) part
322
+ ref_id = mm.group(5) # The [ref] part for reference-style links
323
+
324
+ is_image = prefix.startswith("!")
325
+ final_url = url
326
+ if ref_id and ref_id.lower() in self.references:
327
+ final_url = self.references[ref_id.lower()]
328
+
329
+ if is_image:
330
+ if final_url:
331
+ result["image_links"].append(
332
+ {"alt_text": inner_text, "url": final_url}
333
+ )
334
+ else:
335
+ if final_url:
336
+ result["text_links"].append({"text": inner_text, "url": final_url})
337
+ return result
338
+
339
+
340
+ class MarkdownParser:
341
+ # Regular expressions for matching block-level Markdown elements
342
+ FRONTMATTER_RE = re.compile(r"^---\s*$") # Matches YAML frontmatter delimiters
343
+ ATX_HEADER_RE = re.compile(r"^(#{1,6})\s+(.*)$") # Matches # Header style
344
+ SETEXT_H1_RE = re.compile(r"^=+\s*$") # Matches ==== style h1
345
+ SETEXT_H2_RE = re.compile(r"^-+\s*$") # Matches ---- style h2
346
+ FENCE_RE = re.compile(r"^```([^`]*)$") # Matches code fence start
347
+ BLOCKQUOTE_RE = re.compile(r"^(>\s?)(.*)$") # Matches > quote style
348
+ ORDERED_LIST_RE = re.compile(r"^\s*\d+\.\s+(.*)$") # Matches 1. list style
349
+ UNORDERED_LIST_RE = re.compile(r"^\s*[-+*]\s+(.*)$") # Matches - list style
350
+ HR_RE = re.compile(r"^(\*{3,}|-{3,}|_{3,})\s*$") # Matches horizontal rules
351
+ TABLE_SEPARATOR_RE = re.compile(
352
+ r"^\|?(\s*:?-+:?\s*\|)+\s*:?-+:?\s*\|?\s*$"
353
+ ) # Matches table separators
354
+ REFERENCE_DEF_RE = re.compile(
355
+ r"^\[([^\]]+)\]:\s+(.*?)\s*$", re.MULTILINE
356
+ ) # Matches [ref]: url definitions
357
+ FOOTNOTE_DEF_RE = re.compile(
358
+ r"^\[\^([^\]]+)\]:\s+(.*?)\s*$", re.MULTILINE
359
+ ) # Matches [^footnote]: content
360
+ HTML_BLOCK_START = re.compile(
361
+ r"^(<([a-zA-Z]+)([^>]*)>|<!--)"
362
+ ) # Matches HTML block start
363
+ HTML_BLOCK_END_COMMENT = re.compile(r"-->\s*$") # Matches HTML comment end
364
+
365
+ def __init__(self, text):
366
+ """Initialize parser with the Markdown text to parse."""
367
+ self.lines = text.split("\n")
368
+ self.length = len(self.lines)
369
+ self.pos = 0 # Current position in the text
370
+ self.tokens = [] # List of parsed tokens
371
+ self.text = text
372
+ self.references = {} # Reference-style link definitions
373
+ self.footnotes = {} # Footnote definitions
374
+ self.extract_references_and_footnotes()
375
+
376
+ def extract_references_and_footnotes(self):
377
+ """Extract all reference-style links and footnotes from the document."""
378
+ for m in self.REFERENCE_DEF_RE.finditer(self.text):
379
+ rid, url = m.groups()
380
+ self.references[rid.lower()] = url
381
+
382
+ for m in self.FOOTNOTE_DEF_RE.finditer(self.text):
383
+ fid, content = m.groups()
384
+ self.footnotes[fid] = content
385
+
386
+ def parse(self):
387
+ """Main parsing method that processes the entire document."""
388
+ # Check for frontmatter at the start
389
+ if self.pos < self.length and self.FRONTMATTER_RE.match(
390
+ self.lines[self.pos].strip()
391
+ ):
392
+ self.parse_frontmatter()
393
+
394
+ # Process the document line by line
395
+ while self.pos < self.length:
396
+ if self.pos >= self.length:
397
+ break
398
+ line = self.lines[self.pos]
399
+ if not line.strip():
400
+ self.pos += 1
401
+ continue
402
+
403
+ # Check for table start
404
+ if self.is_table_start():
405
+ self.parse_table()
406
+ continue
407
+
408
+ # Check for HTML block
409
+ if self.is_html_block_start(line):
410
+ self.parse_html_block()
411
+ continue
412
+
413
+ # Check for ATX-style headers (# Header)
414
+ m = self.ATX_HEADER_RE.match(line)
415
+ if m:
416
+ level = len(m.group(1))
417
+ text = m.group(2).strip()
418
+ self.tokens.append(
419
+ BlockToken("header", content=text, level=level, line=self.pos + 1)
420
+ )
421
+ self.pos += 1
422
+ continue
423
+
424
+ # Check for Setext-style headers (=== or ---)
425
+ if self.pos + 1 < self.length:
426
+ next_line = self.lines[self.pos + 1].strip()
427
+ if self.SETEXT_H1_RE.match(next_line):
428
+ text = line.strip()
429
+ self.tokens.append(
430
+ BlockToken("header", content=text, level=1, line=self.pos + 1)
431
+ )
432
+ self.pos += 2
433
+ continue
434
+ if self.SETEXT_H2_RE.match(next_line):
435
+ text = line.strip()
436
+ self.tokens.append(
437
+ BlockToken("header", content=text, level=2, line=self.pos + 1)
438
+ )
439
+ self.pos += 2
440
+ continue
441
+
442
+ # Check for horizontal rule
443
+ if self.HR_RE.match(line.strip()):
444
+ self.tokens.append(BlockToken("hr", line=self.pos + 1))
445
+ self.pos += 1
446
+ continue
447
+
448
+ # Check for fenced code block
449
+ fm = self.FENCE_RE.match(line.strip())
450
+ if fm:
451
+ lang = fm.group(1).strip()
452
+ self.parse_fenced_code_block(lang)
453
+ continue
454
+
455
+ # Check for blockquote
456
+ bm = self.BLOCKQUOTE_RE.match(line)
457
+ if bm:
458
+ self.parse_blockquote()
459
+ continue
460
+
461
+ # Check for lists
462
+ om = self.ORDERED_LIST_RE.match(line)
463
+ um = self.UNORDERED_LIST_RE.match(line)
464
+ if om or um:
465
+ self.parse_list(ordered=bool(om))
466
+ continue
467
+
468
+ # If no other block type matches, treat as paragraph
469
+ self.parse_paragraph()
470
+
471
+ return self.tokens
472
+
473
+ def is_html_block_start(self, line):
474
+ # Verify if line seems to be HTML
475
+ return self.HTML_BLOCK_START.match(line.strip()) is not None
476
+
477
+ def parse_html_block(self):
478
+ start = self.pos
479
+ lines = []
480
+ first_line = self.lines[self.pos].strip()
481
+ comment_mode = first_line.startswith("<!--")
482
+
483
+ # Read HTML block until empty line / eof
484
+ while self.pos < self.length:
485
+ line = self.lines[self.pos]
486
+ lines.append(line)
487
+ self.pos += 1
488
+
489
+ if comment_mode and self.HTML_BLOCK_END_COMMENT.search(line):
490
+ break
491
+ else:
492
+ # If next line is empty or doesn't exist, stop
493
+ if self.pos < self.length:
494
+ nxt_line = self.lines[self.pos]
495
+ if not nxt_line.strip():
496
+ # End of block
497
+ break
498
+ else:
499
+ # End of file
500
+ break
501
+
502
+ content = "\n".join(lines)
503
+ self.tokens.append(BlockToken("html_block", content=content, line=start + 1))
504
+
505
+ def starts_new_block_peek(self):
506
+ # Check next line without advancing
507
+ if self.pos < self.length:
508
+ nxt = self.lines[self.pos].strip()
509
+ return self.starts_new_block(nxt)
510
+ return False
511
+
512
+ def is_table_start(self):
513
+ if self.pos + 1 < self.length:
514
+ line = self.lines[self.pos].strip()
515
+ next_line = self.lines[self.pos + 1].strip()
516
+ if (
517
+ "|" in line
518
+ and "|" in next_line
519
+ and self.TABLE_SEPARATOR_RE.match(next_line)
520
+ ):
521
+ return True
522
+ return False
523
+
524
+ def parse_table(self):
525
+ start = self.pos
526
+ header_line = self.lines[self.pos].strip()
527
+ separator_line = self.lines[self.pos + 1].strip()
528
+ self.pos += 2
529
+ rows = []
530
+ while self.pos < self.length:
531
+ line = self.lines[self.pos].strip()
532
+ if not line or self.starts_new_block(line):
533
+ break
534
+ rows.append(line)
535
+ self.pos += 1
536
+
537
+ header_cells = [
538
+ h.strip() for h in header_line.strip("|").split("|") if h.strip()
539
+ ]
540
+ data_rows = []
541
+ for r in rows:
542
+ data_rows.append([c.strip() for c in r.strip("|").split("|") if c.strip()])
543
+
544
+ self.tokens.append(
545
+ BlockToken(
546
+ "table",
547
+ meta={"header": header_cells, "rows": data_rows},
548
+ line=start + 1,
549
+ )
550
+ )
551
+
552
+ def starts_new_block(self, line):
553
+ return (
554
+ self.ATX_HEADER_RE.match(line)
555
+ or self.FRONTMATTER_RE.match(line)
556
+ or self.FENCE_RE.match(line)
557
+ or self.BLOCKQUOTE_RE.match(line)
558
+ or self.ORDERED_LIST_RE.match(line)
559
+ or self.UNORDERED_LIST_RE.match(line)
560
+ or self.HR_RE.match(line)
561
+ or self.SETEXT_H1_RE.match(line)
562
+ or self.SETEXT_H2_RE.match(line)
563
+ or self.HTML_BLOCK_START.match(line)
564
+ )
565
+
566
+ def parse_frontmatter(self):
567
+ self.pos += 1
568
+ start = self.pos
569
+ while self.pos < self.length:
570
+ if self.FRONTMATTER_RE.match(self.lines[self.pos].strip()):
571
+ content = "\n".join(self.lines[start : self.pos])
572
+ self.tokens.append(BlockToken("frontmatter", content=content))
573
+ self.pos += 1
574
+ return
575
+ self.pos += 1
576
+ content = "\n".join(self.lines[start:])
577
+ self.tokens.append(BlockToken("frontmatter", content=content))
578
+ self.pos = self.length
579
+
580
+ def parse_fenced_code_block(self, lang):
581
+ self.pos += 1
582
+ start = self.pos
583
+ while self.pos < self.length:
584
+ line = self.lines[self.pos]
585
+ if line.strip().startswith("```"):
586
+ content = "\n".join(self.lines[start : self.pos])
587
+ self.tokens.append(
588
+ BlockToken(
589
+ "code", content=content, meta={"language": lang}, line=start + 1
590
+ )
591
+ )
592
+ self.pos += 1
593
+ return
594
+ self.pos += 1
595
+ content = "\n".join(self.lines[start:])
596
+ self.tokens.append(
597
+ BlockToken("code", content=content, meta={"language": lang}, line=start + 1)
598
+ )
599
+ self.pos = self.length
600
+
601
+ def parse_blockquote(self):
602
+ start = self.pos
603
+ lines = []
604
+ while self.pos < self.length:
605
+ line = self.lines[self.pos]
606
+ bm = self.BLOCKQUOTE_RE.match(line)
607
+ if bm:
608
+ lines.append(bm.group(2))
609
+ self.pos += 1
610
+ else:
611
+ break
612
+ content = "\n".join(lines)
613
+ self.tokens.append(BlockToken("blockquote", content=content, line=start + 1))
614
+
615
+ def parse_list(self, ordered):
616
+ start = self.pos
617
+ items = []
618
+ current_item = []
619
+ list_pattern = self.ORDERED_LIST_RE if ordered else self.UNORDERED_LIST_RE
620
+
621
+ while self.pos < self.length:
622
+ line = self.lines[self.pos]
623
+ if not line.strip():
624
+ if current_item:
625
+ items.append("\n".join(current_item).strip())
626
+ current_item = []
627
+ self.pos += 1
628
+ continue
629
+
630
+ if self.starts_new_block(line.strip()) and not (
631
+ self.ORDERED_LIST_RE.match(line.strip())
632
+ or self.UNORDERED_LIST_RE.match(line.strip())
633
+ ):
634
+ break
635
+
636
+ lm = list_pattern.match(line)
637
+ if lm:
638
+ if current_item:
639
+ items.append("\n".join(current_item).strip())
640
+ current_item = []
641
+ current_item.append(lm.group(1))
642
+ self.pos += 1
643
+ else:
644
+ current_item.append(line.strip())
645
+ self.pos += 1
646
+
647
+ if current_item:
648
+ items.append("\n".join(current_item).strip())
649
+
650
+ task_re = re.compile(r"^\[( |x)\]\s+(.*)$")
651
+ final_items = []
652
+ for it in items:
653
+ lines = it.split("\n")
654
+ first_line = lines[0].strip()
655
+ m = task_re.match(first_line)
656
+ if m:
657
+ state = m.group(1)
658
+ text = m.group(2)
659
+ task_checked = state == "x"
660
+ final_items.append(
661
+ {"text": text, "task_item": True, "checked": task_checked}
662
+ )
663
+ else:
664
+ final_items.append({"text": it, "task_item": False})
665
+
666
+ list_type = "ordered_list" if ordered else "unordered_list"
667
+ self.tokens.append(
668
+ BlockToken(list_type, meta={"items": final_items}, line=start + 1)
669
+ )
670
+
671
+ def parse_paragraph(self):
672
+ start = self.pos
673
+ lines = []
674
+ while self.pos < self.length:
675
+ line = self.lines[self.pos]
676
+ if not line.strip():
677
+ self.pos += 1
678
+ break
679
+ if self.starts_new_block(line.strip()):
680
+ break
681
+ lines.append(line)
682
+ self.pos += 1
683
+
684
+ content = "\n".join(lines).strip()
685
+ if content:
686
+ self.tokens.append(BlockToken("paragraph", content=content, line=start + 1))
tests/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ """
2
+ Tests package initialization.
3
+ """
tests/data/calendar.ics ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ BEGIN:VCALENDAR
2
+ VERSION:2.0
3
+ PRODID:-//Mock Calendar//EN
4
+ CALSCALE:GREGORIAN
5
+
6
+ BEGIN:VEVENT
7
+ UID:recur-meeting-1@mock
8
+ DTSTAMP:20240523T000000Z
9
+ DTSTART;TZID=UTC:20250603T100000
10
+ DTEND;TZID=UTC:20250603T110000
11
+ RRULE:FREQ=WEEKLY;BYDAY=TU,TH
12
+ SUMMARY:Team Sync
13
+ END:VEVENT
14
+
15
+ BEGIN:VEVENT
16
+ UID:recur-meeting-2@mock
17
+ DTSTAMP:20240523T000000Z
18
+ DTSTART;TZID=UTC:20250602T140000
19
+ DTEND;TZID=UTC:20250602T150000
20
+ RRULE:FREQ=WEEKLY;BYDAY=MO,WE,FR
21
+ SUMMARY:Project Review
22
+ END:VEVENT
23
+
24
+ BEGIN:VEVENT
25
+ UID:single-event-1@mock
26
+ DTSTAMP:20240523T000000Z
27
+ DTSTART;TZID=UTC:20250605T130000
28
+ DTEND;TZID=UTC:20250605T140000
29
+ SUMMARY:Client Call
30
+ END:VEVENT
31
+
32
+ BEGIN:VEVENT
33
+ UID:single-event-2@mock
34
+ DTSTAMP:20240523T000000Z
35
+ DTSTART;TZID=UTC:20250616T110000
36
+ DTEND;TZID=UTC:20250616T120000
37
+ SUMMARY:Workshop
38
+ END:VEVENT
39
+
40
+ BEGIN:VEVENT
41
+ UID:single-event-3@mock
42
+ DTSTAMP:20240523T000000Z
43
+ DTSTART;TZID=UTC:20250707T150000
44
+ DTEND;TZID=UTC:20250707T163000
45
+ SUMMARY:Planning Session
46
+ END:VEVENT
47
+
48
+ BEGIN:VEVENT
49
+ UID:single-event-4@mock
50
+ DTSTAMP:20240523T000000Z
51
+ DTSTART;TZID=UTC:20250722T093000
52
+ DTEND;TZID=UTC:20250722T103000
53
+ SUMMARY:Demo
54
+ END:VEVENT
55
+
56
+ END:VCALENDAR
tests/secrets/nebius_secrets.py.template ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # Nebius API credentials
2
+ NEBIUS_API_KEY = ""
3
+ NEBIUS_MODEL = ""
tests/test_calendar_operations.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import icalendar
2
+
3
+ from pathlib import Path
4
+
5
+
6
+ def test_calendar_operations():
7
+ ics_path = Path("tests/data/calendar.ics")
8
+
9
+ calendar = icalendar.Calendar.from_ical(ics_path.read_bytes())
10
+
11
+ for event in calendar.events:
12
+ print(event.get("summary"))
13
+
14
+ def to_iso(val):
15
+ if hasattr(val, "dt"):
16
+ dt = val.dt
17
+ if hasattr(dt, "isoformat"):
18
+ return dt.isoformat()
19
+ return str(dt)
20
+ return str(val)
21
+
22
+ print(to_iso(event.get("dtstart")))
23
+ print(to_iso(event.get("dtend")))
tests/test_factory.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest
2
+ from datetime import datetime, timedelta
3
+ from src.utils.load_secrets import load_secrets
4
+ from dateutil.rrule import rrulestr
5
+ from icalendar import Calendar, vDDDTypes
6
+
7
+ # Load environment variables for agent (if needed)
8
+ load_secrets("tests/secrets/creds.py")
9
+
10
+ import factory.data_provider as data_provider
11
+ from src.utils.extract_calendar import extract_ical_entries
12
+
13
+
14
+ @pytest.mark.asyncio
15
+ async def test_factory_demo_agent():
16
+ # Use a simple string as the project description
17
+ test_input = "Test project for schedule generation."
18
+
19
+ # Generate schedule data using generate_agent_data
20
+ schedule = await data_provider.generate_agent_data(test_input)
21
+
22
+ # Assert basic schedule properties
23
+ assert len(schedule.employees) > 0
24
+ assert schedule.schedule_info.total_slots > 0
25
+ assert len(schedule.tasks) > 0
26
+
27
+ # Verify employee skills
28
+ for employee in schedule.employees:
29
+ assert len(employee.skills) > 0
30
+ # Check that each employee has at least one required skill
31
+ assert any(
32
+ skill in data_provider.SKILL_SET.required_skills
33
+ for skill in employee.skills
34
+ )
35
+
36
+ # Verify task properties
37
+ for task in schedule.tasks:
38
+ assert task.duration_slots > 0
39
+ assert task.required_skill
40
+ assert hasattr(task, "project_id")
41
+
42
+ # Print schedule details for debugging
43
+ print("Employee names:", [e.name for e in schedule.employees])
44
+ print("Tasks count:", len(schedule.tasks))
45
+ print("Total slots:", schedule.schedule_info.total_slots)
46
+
47
+
48
+ @pytest.mark.asyncio
49
+ async def test_factory_mcp():
50
+ # Load the real calendar.ics file
51
+ with open("tests/data/calendar.ics", "rb") as f:
52
+ file_bytes = f.read()
53
+ entries, err = extract_ical_entries(file_bytes)
54
+ assert err is None
55
+ assert entries is not None
56
+ assert len(entries) > 0
57
+
58
+ print("\nEntries:")
59
+ print(entries)
60
+
61
+ # Use a made-up user message
62
+ user_message = "Create a new AWS VPC."
63
+
64
+ # Call generate_mcp_data directly
65
+ df = await data_provider.generate_mcp_data(entries, user_message)
66
+
67
+ # Assert the DataFrame is not empty
68
+ assert df is not None
69
+ assert not df.empty
70
+
71
+ # Print the DataFrame for debug
72
+ print(df)
tests/test_task_composer_agent.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest, logging
2
+
3
+
4
+ from src.utils.load_secrets import load_secrets
5
+
6
+ # Configure logging
7
+ logging.basicConfig(level=logging.DEBUG)
8
+ logger = logging.getLogger(__name__)
9
+
10
+ # Load environment variables
11
+ load_secrets("tests/secrets/creds.py")
12
+ # Import task_composer_agent after environment variables are set
13
+ from src.agents.task_composer_agent import TaskComposerAgent
14
+
15
+
16
+ @pytest.mark.asyncio
17
+ async def test_task_composer_agent():
18
+ logger.info("\n=== Test Environment ===")
19
+
20
+ logger.info("\n=== Starting Test ===")
21
+
22
+ # Create agent
23
+ logger.info("\nInitializing task_composer_agent...")
24
+ agent = TaskComposerAgent()
25
+
26
+ # Test input
27
+ test_input = "Plan a weekend trip to Paris"
28
+ logger.info(f"\n=== Test Input ===")
29
+ logger.info(f"Task: {test_input}")
30
+
31
+ # Run workflow
32
+ logger.info("\n=== Running Workflow ===")
33
+ result = await agent.run_workflow(test_input)
34
+
35
+ # Print the result
36
+ logger.info(f"\n=== Final Result ===")
37
+ logger.info("Task breakdown with estimated times:")
38
+ for task, duration, skill in result:
39
+ logger.info(f"- {task}: {duration} units (Skill: {skill})")
40
+
41
+ # Calculate total time
42
+ total_time = sum(
43
+ int(time) if str(time).isdigit() and str(time) != "" else 0
44
+ for _, time, _ in result
45
+ )
46
+ logger.info(
47
+ f"\nTotal estimated time: {total_time} units ({total_time * 30} minutes)"
48
+ )
49
+
50
+ # Verify the result is a list of 3-tuples
51
+ assert isinstance(result, list), f"Expected a list, got {type(result)}"
52
+ assert all(
53
+ isinstance(item, tuple) and len(item) == 3 for item in result
54
+ ), "Expected a list of (task, duration, skill) tuples"
55
+ logger.info("\n=== Test Summary ===")
56
+ logger.info("βœ“ Test passed!")
57
+ logger.info(f"βœ“ Task: {test_input}")
58
+ logger.info(
59
+ f"βœ“ Total estimated time: {total_time} units ({total_time * 30} minutes)"
60
+ )