NitinBot001 commited on
Commit
3ca5f72
·
verified ·
1 Parent(s): 9e9b02f

Upload 38 files

Browse files
.env.example ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # TTSFM Environment Configuration
2
+
3
+ # Server Configuration
4
+ HOST=0.0.0.0
5
+ PORT=7000
6
+
7
+ # SSL Configuration
8
+ VERIFY_SSL=true
9
+
10
+ # Flask Configuration
11
+ FLASK_ENV=production
12
+ FLASK_APP=app.py
13
+ DEBUG=false
14
+
15
+ # API Key Protection (Optional)
16
+ # Set REQUIRE_API_KEY=true to enable API key authentication
17
+ REQUIRE_API_KEY=false
18
+
19
+ # Set your API key here when protection is enabled
20
+ # This key will be required for all TTS generation requests
21
+ TTSFM_API_KEY=your-secret-api-key-here
22
+
23
+ # Example usage:
24
+ # 1. Set REQUIRE_API_KEY=true
25
+ # 2. Set TTSFM_API_KEY to your desired secret key
26
+ # 3. Restart the application
27
+ # 4. All TTS requests will now require the API key in:
28
+ # - Authorization header (Bearer token) - OpenAI compatible
29
+ # - X-API-Key header
30
+ # - api_key query parameter
31
+ # - api_key in JSON body
.github/ISSUE_TEMPLATE/bug_report.md CHANGED
@@ -1,38 +1,38 @@
1
- ---
2
- name: Bug report
3
- about: Create a report to help us improve
4
- title: ''
5
- labels: ''
6
- assignees: ''
7
-
8
- ---
9
-
10
- **Describe the bug**
11
- A clear and concise description of what the bug is.
12
-
13
- **To Reproduce**
14
- Steps to reproduce the behavior:
15
- 1. Go to '...'
16
- 2. Click on '....'
17
- 3. Scroll down to '....'
18
- 4. See error
19
-
20
- **Expected behavior**
21
- A clear and concise description of what you expected to happen.
22
-
23
- **Screenshots**
24
- If applicable, add screenshots to help explain your problem.
25
-
26
- **Desktop (please complete the following information):**
27
- - OS: [e.g. iOS]
28
- - Browser [e.g. chrome, safari]
29
- - Version [e.g. 22]
30
-
31
- **Smartphone (please complete the following information):**
32
- - Device: [e.g. iPhone6]
33
- - OS: [e.g. iOS8.1]
34
- - Browser [e.g. stock browser, safari]
35
- - Version [e.g. 22]
36
-
37
- **Additional context**
38
- Add any other context about the problem here.
 
1
+ ---
2
+ name: Bug report
3
+ about: Create a report to help us improve
4
+ title: ''
5
+ labels: ''
6
+ assignees: ''
7
+
8
+ ---
9
+
10
+ **Describe the bug**
11
+ A clear and concise description of what the bug is.
12
+
13
+ **To Reproduce**
14
+ Steps to reproduce the behavior:
15
+ 1. Go to '...'
16
+ 2. Click on '....'
17
+ 3. Scroll down to '....'
18
+ 4. See error
19
+
20
+ **Expected behavior**
21
+ A clear and concise description of what you expected to happen.
22
+
23
+ **Screenshots**
24
+ If applicable, add screenshots to help explain your problem.
25
+
26
+ **Desktop (please complete the following information):**
27
+ - OS: [e.g. iOS]
28
+ - Browser [e.g. chrome, safari]
29
+ - Version [e.g. 22]
30
+
31
+ **Smartphone (please complete the following information):**
32
+ - Device: [e.g. iPhone6]
33
+ - OS: [e.g. iOS8.1]
34
+ - Browser [e.g. stock browser, safari]
35
+ - Version [e.g. 22]
36
+
37
+ **Additional context**
38
+ Add any other context about the problem here.
.github/ISSUE_TEMPLATE/feature_request.md CHANGED
@@ -1,20 +1,20 @@
1
- ---
2
- name: Feature request
3
- about: Suggest an idea for this project
4
- title: ''
5
- labels: ''
6
- assignees: ''
7
-
8
- ---
9
-
10
- **Is your feature request related to a problem? Please describe.**
11
- A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12
-
13
- **Describe the solution you'd like**
14
- A clear and concise description of what you want to happen.
15
-
16
- **Describe alternatives you've considered**
17
- A clear and concise description of any alternative solutions or features you've considered.
18
-
19
- **Additional context**
20
- Add any other context or screenshots about the feature request here.
 
1
+ ---
2
+ name: Feature request
3
+ about: Suggest an idea for this project
4
+ title: ''
5
+ labels: ''
6
+ assignees: ''
7
+
8
+ ---
9
+
10
+ **Is your feature request related to a problem? Please describe.**
11
+ A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12
+
13
+ **Describe the solution you'd like**
14
+ A clear and concise description of what you want to happen.
15
+
16
+ **Describe alternatives you've considered**
17
+ A clear and concise description of any alternative solutions or features you've considered.
18
+
19
+ **Additional context**
20
+ Add any other context or screenshots about the feature request here.
.github/workflows/docker-build.yml CHANGED
@@ -1,78 +1,78 @@
1
- name: Docker Build and Push
2
-
3
- on:
4
- release:
5
- types: [published]
6
-
7
- env:
8
- REGISTRY_DOCKERHUB: docker.io
9
- REGISTRY_GHCR: ghcr.io
10
- IMAGE_NAME: ${{ github.repository }}
11
-
12
- jobs:
13
- build-and-push:
14
- runs-on: ubuntu-latest
15
- permissions:
16
- contents: read
17
- packages: write
18
- steps:
19
- - name: Checkout repository
20
- uses: actions/checkout@v4
21
-
22
- - name: Set up QEMU
23
- uses: docker/setup-qemu-action@v3
24
-
25
- - name: Set up Docker Buildx
26
- uses: docker/setup-buildx-action@v3
27
- with:
28
- driver: docker-container
29
-
30
- - name: Login to Docker Hub
31
- uses: docker/login-action@v3
32
- with:
33
- username: ${{ secrets.DOCKERHUB_USERNAME }}
34
- password: ${{ secrets.DOCKERHUB_TOKEN }}
35
-
36
- - name: Login to GitHub Container Registry
37
- uses: docker/login-action@v3
38
- with:
39
- registry: ${{ env.REGISTRY_GHCR }}
40
- username: ${{ github.actor }}
41
- password: ${{ secrets.GITHUB_TOKEN }}
42
-
43
- - name: Extract metadata
44
- id: meta
45
- uses: docker/metadata-action@v5
46
- with:
47
- images: |
48
- ${{ secrets.DOCKERHUB_USERNAME }}/ttsfm
49
- ${{ env.REGISTRY_GHCR }}/${{ env.IMAGE_NAME }}
50
- tags: |
51
- type=ref,event=tag
52
- type=semver,pattern={{version}}
53
- type=semver,pattern={{major}}.{{minor}}
54
- type=semver,pattern={{major}}
55
- type=raw,value=latest
56
- labels: |
57
- org.opencontainers.image.source=${{ github.repositoryUrl }}
58
- org.opencontainers.image.description=Free TTS API server compatible with OpenAI's TTS API format using openai.fm
59
- org.opencontainers.image.licenses=MIT
60
- org.opencontainers.image.title=TTSFM - Free TTS API Server
61
- org.opencontainers.image.vendor=dbcccc
62
-
63
- - name: Build and push
64
- id: build-and-push
65
- uses: docker/build-push-action@v5
66
- with:
67
- context: .
68
- platforms: linux/amd64,linux/arm64
69
- push: true
70
- tags: ${{ steps.meta.outputs.tags }}
71
- labels: ${{ steps.meta.outputs.labels }}
72
- cache-from: type=gha
73
- cache-to: type=gha,mode=max
74
-
75
- - name: Show image info
76
- run: |
77
- echo "Pushed tags: ${{ steps.meta.outputs.tags }}"
78
  echo "Image digest: ${{ steps.build-and-push.outputs.digest }}"
 
1
+ name: Docker Build and Push
2
+
3
+ on:
4
+ release:
5
+ types: [published]
6
+
7
+ env:
8
+ REGISTRY_DOCKERHUB: docker.io
9
+ REGISTRY_GHCR: ghcr.io
10
+ IMAGE_NAME: ${{ github.repository }}
11
+
12
+ jobs:
13
+ build-and-push:
14
+ runs-on: ubuntu-latest
15
+ permissions:
16
+ contents: read
17
+ packages: write
18
+ steps:
19
+ - name: Checkout repository
20
+ uses: actions/checkout@v4
21
+
22
+ - name: Set up QEMU
23
+ uses: docker/setup-qemu-action@v3
24
+
25
+ - name: Set up Docker Buildx
26
+ uses: docker/setup-buildx-action@v3
27
+ with:
28
+ driver: docker-container
29
+
30
+ - name: Login to Docker Hub
31
+ uses: docker/login-action@v3
32
+ with:
33
+ username: ${{ secrets.DOCKERHUB_USERNAME }}
34
+ password: ${{ secrets.DOCKERHUB_TOKEN }}
35
+
36
+ - name: Login to GitHub Container Registry
37
+ uses: docker/login-action@v3
38
+ with:
39
+ registry: ${{ env.REGISTRY_GHCR }}
40
+ username: ${{ github.actor }}
41
+ password: ${{ secrets.GITHUB_TOKEN }}
42
+
43
+ - name: Extract metadata
44
+ id: meta
45
+ uses: docker/metadata-action@v5
46
+ with:
47
+ images: |
48
+ ${{ secrets.DOCKERHUB_USERNAME }}/ttsfm
49
+ ${{ env.REGISTRY_GHCR }}/${{ env.IMAGE_NAME }}
50
+ tags: |
51
+ type=ref,event=tag
52
+ type=semver,pattern={{version}}
53
+ type=semver,pattern={{major}}.{{minor}}
54
+ type=semver,pattern={{major}}
55
+ type=raw,value=latest
56
+ labels: |
57
+ org.opencontainers.image.source=${{ github.repositoryUrl }}
58
+ org.opencontainers.image.description=Free TTS API server compatible with OpenAI's TTS API format using openai.fm
59
+ org.opencontainers.image.licenses=MIT
60
+ org.opencontainers.image.title=TTSFM - Free TTS API Server
61
+ org.opencontainers.image.vendor=dbcccc
62
+
63
+ - name: Build and push
64
+ id: build-and-push
65
+ uses: docker/build-push-action@v5
66
+ with:
67
+ context: .
68
+ platforms: linux/amd64,linux/arm64
69
+ push: true
70
+ tags: ${{ steps.meta.outputs.tags }}
71
+ labels: ${{ steps.meta.outputs.labels }}
72
+ cache-from: type=gha
73
+ cache-to: type=gha,mode=max
74
+
75
+ - name: Show image info
76
+ run: |
77
+ echo "Pushed tags: ${{ steps.meta.outputs.tags }}"
78
  echo "Image digest: ${{ steps.build-and-push.outputs.digest }}"
.github/workflows/release.yml CHANGED
@@ -1,90 +1,95 @@
1
- name: Release and Publish
2
-
3
- on:
4
- push:
5
- tags:
6
- - 'v*' # Triggers on version tags like v1.0.0, v3.0.1, etc.
7
-
8
- permissions:
9
- contents: write
10
- id-token: write
11
-
12
- jobs:
13
- release-and-publish:
14
- runs-on: ubuntu-latest
15
-
16
- steps:
17
- - uses: actions/checkout@v4
18
-
19
- - name: Set up Python
20
- uses: actions/setup-python@v4
21
- with:
22
- python-version: '3.11'
23
-
24
- - name: Install dependencies
25
- run: |
26
- python -m pip install --upgrade pip
27
- pip install build twine
28
-
29
- - name: Test package import
30
- run: |
31
- pip install -e .
32
- python -c "import ttsfm; print(f'✅ TTSFM imported successfully')"
33
- python -c "from ttsfm import TTSClient; print('✅ TTSClient imported successfully')"
34
-
35
- - name: Build package
36
- run: |
37
- python -m build
38
- echo "📦 Package built successfully"
39
- ls -la dist/
40
-
41
- - name: Check package
42
- run: |
43
- twine check dist/*
44
- echo "✅ Package validation passed"
45
-
46
- - name: Publish to PyPI
47
- uses: pypa/gh-action-pypi-publish@release/v1
48
- with:
49
- password: ${{ secrets.PYPI_API_TOKEN }}
50
-
51
- - name: Create GitHub Release
52
- uses: softprops/action-gh-release@v1
53
- with:
54
- body: |
55
- ## 🎉 TTSFM ${{ github.ref_name }}
56
-
57
- New release of TTSFM - Free Text-to-Speech API with OpenAI compatibility.
58
-
59
- ### 📦 Installation
60
- ```bash
61
- pip install ttsfm==${{ github.ref_name }}
62
- ```
63
-
64
- ### 🚀 Quick Start
65
- ```python
66
- from ttsfm import TTSClient
67
-
68
- client = TTSClient()
69
- response = client.generate_speech("Hello from TTSFM!")
70
- response.save_to_file("hello")
71
- ```
72
-
73
- ### 🐳 Docker
74
- ```bash
75
- docker run -p 8000:8000 dbcccc/ttsfm:latest
76
- ```
77
-
78
- ### Features
79
- - 🆓 Completely free (uses openai.fm service)
80
- - 🎯 OpenAI-compatible API
81
- - 🗣️ 11 voices available
82
- - 🎵 6 audio formats (MP3, WAV, OPUS, AAC, FLAC, PCM)
83
- - Async and sync clients
84
- - 🌐 Web interface included
85
- - 🔧 CLI tool available
86
-
87
- ### 📚 Documentation
88
- See [README](https://github.com/dbccccccc/ttsfm#readme) for full documentation.
89
- draft: false
90
- prerelease: false
 
 
 
 
 
 
1
+ name: Release and Publish
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - 'v*' # Triggers on version tags like v1.0.0, v3.0.1, etc.
7
+
8
+ permissions:
9
+ contents: write
10
+ id-token: write
11
+
12
+ jobs:
13
+ release-and-publish:
14
+ runs-on: ubuntu-latest
15
+
16
+ steps:
17
+ - uses: actions/checkout@v4
18
+
19
+ - name: Set up Python
20
+ uses: actions/setup-python@v4
21
+ with:
22
+ python-version: '3.11'
23
+
24
+ - name: Install dependencies
25
+ run: |
26
+ python -m pip install --upgrade pip
27
+ pip install build twine
28
+
29
+ - name: Test package import
30
+ run: |
31
+ pip install -e .
32
+ python -c "import ttsfm; print(f'✅ TTSFM imported successfully')"
33
+ python -c "from ttsfm import TTSClient; print('✅ TTSClient imported successfully')"
34
+
35
+ - name: Build package
36
+ run: |
37
+ python -m build
38
+ echo "📦 Package built successfully"
39
+ ls -la dist/
40
+
41
+ - name: Check package
42
+ run: |
43
+ twine check dist/*
44
+ echo "✅ Package validation passed"
45
+
46
+ - name: Publish to PyPI
47
+ uses: pypa/gh-action-pypi-publish@release/v1
48
+ with:
49
+ attestations: true
50
+ skip-existing: true
51
+
52
+ - name: Extract version (strip leading v)
53
+ id: ver
54
+ run: echo "version=${GITHUB_REF_NAME#v}" >> "$GITHUB_OUTPUT"
55
+
56
+ - name: Create GitHub Release
57
+ uses: softprops/action-gh-release@v1
58
+ with:
59
+ body: |
60
+ ## 🎉 TTSFM ${{ github.ref_name }}
61
+
62
+ New release of TTSFM - Free Text-to-Speech API with OpenAI compatibility.
63
+
64
+ ### 📦 Installation
65
+ ```bash
66
+ pip install ttsfm==${{ steps.ver.outputs.version }}
67
+ ```
68
+
69
+ ### 🚀 Quick Start
70
+ ```python
71
+ from ttsfm import TTSClient
72
+
73
+ client = TTSClient()
74
+ response = client.generate_speech("Hello from TTSFM!")
75
+ response.save_to_file("hello")
76
+ ```
77
+
78
+ ### 🐳 Docker
79
+ ```bash
80
+ docker run -p 8000:8000 dbcccc/ttsfm:latest
81
+ ```
82
+
83
+ ### Features
84
+ - 🆓 Completely free (uses openai.fm service)
85
+ - 🎯 OpenAI-compatible API
86
+ - 🗣️ 11 voices available
87
+ - 🎵 6 audio formats (MP3, WAV, OPUS, AAC, FLAC, PCM)
88
+ - Async and sync clients
89
+ - 🌐 Web interface included
90
+ - 🔧 CLI tool available
91
+
92
+ ### 📚 Documentation
93
+ See [README](https://github.com/dbccccccc/ttsfm#readme) for full documentation.
94
+ draft: false
95
+ prerelease: false
.gitignore CHANGED
@@ -1,156 +1,159 @@
1
- # Python
2
- __pycache__/
3
- *.py[cod]
4
- *$py.class
5
- *.so
6
- .Python
7
- build/
8
- develop-eggs/
9
- dist/
10
- downloads/
11
- eggs/
12
- .eggs/
13
- lib/
14
- lib64/
15
- parts/
16
- sdist/
17
- var/
18
- wheels/
19
- *.egg-info/
20
- .installed.cfg
21
- *.egg
22
- MANIFEST
23
-
24
- # Virtual Environment
25
- venv/
26
- env/
27
- ENV/
28
- .venv/
29
-
30
- # Environment variables
31
- .env
32
- .env.local
33
- .env.production
34
-
35
- # IDE
36
- .idea/
37
- .vscode/
38
- *.swp
39
- *.swo
40
- .spyderproject
41
- .spyproject
42
-
43
- # OS
44
- .DS_Store
45
- .DS_Store?
46
- ._*
47
- .Spotlight-V100
48
- .Trashes
49
- ehthumbs.db
50
- Thumbs.db
51
-
52
- # Generated audio files (for testing)
53
- *.mp3
54
- *.wav
55
- *.opus
56
- *.aac
57
- *.flac
58
- *.pcm
59
- test_output.*
60
- output.*
61
- hello.*
62
- speech.*
63
-
64
- # Logs
65
- *.log
66
- logs/
67
- .pytest_cache/
68
-
69
- # Temporary files
70
- tmp/
71
- temp/
72
- .tmp/
73
-
74
- # Coverage reports
75
- htmlcov/
76
- .coverage
77
- .coverage.*
78
- coverage.xml
79
- *.cover
80
- .hypothesis/
81
-
82
- # Documentation builds
83
- docs/_build/
84
- site/
85
-
86
- # Package builds
87
- *.tar.gz
88
- *.whl
89
- dist/
90
- build/
91
-
92
- # MyPy
93
- .mypy_cache/
94
- .dmypy.json
95
- dmypy.json
96
-
97
- # Jupyter Notebook
98
- .ipynb_checkpoints
99
-
100
- # pyenv
101
- .python-version
102
-
103
- # pipenv
104
- Pipfile.lock
105
-
106
- # PEP 582
107
- __pypackages__/
108
-
109
- # Celery
110
- celerybeat-schedule
111
- celerybeat.pid
112
-
113
- # SageMath parsed files
114
- *.sage.py
115
-
116
- # Rope project settings
117
- .ropeproject
118
-
119
- # mkdocs documentation
120
- /site
121
-
122
- # Pyre type checker
123
- .pyre/
124
-
125
- # Additional exclusions for GitHub
126
-
127
- # API Keys and Secrets
128
- config.json
129
- secrets.json
130
- .secrets
131
- api_keys.txt
132
-
133
- # Database files
134
- *.db
135
- *.sqlite
136
- *.sqlite3
137
-
138
- # Backup files
139
- *.bak
140
- *.backup
141
- *~
142
-
143
- # Node.js (if using any JS tools)
144
- node_modules/
145
- npm-debug.log*
146
- yarn-debug.log*
147
- yarn-error.log*
148
-
149
- # Docker
150
- .dockerignore
151
- Dockerfile.dev
152
- docker-compose.override.yml
153
-
154
- # Local configuration
155
- local_settings.py
156
- local_config.py
 
 
 
 
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .Python
7
+ build/
8
+ develop-eggs/
9
+ dist/
10
+ downloads/
11
+ eggs/
12
+ .eggs/
13
+ lib/
14
+ lib64/
15
+ parts/
16
+ sdist/
17
+ var/
18
+ wheels/
19
+ *.egg-info/
20
+ .installed.cfg
21
+ *.egg
22
+ MANIFEST
23
+
24
+ # Virtual Environment
25
+ venv/
26
+ env/
27
+ ENV/
28
+ .venv/
29
+
30
+ # Environment variables
31
+ .env
32
+ .env.local
33
+ .env.production
34
+
35
+ # IDE
36
+ .idea/
37
+ .vscode/
38
+ *.swp
39
+ *.swo
40
+ .spyderproject
41
+ .spyproject
42
+
43
+ # OS
44
+ .DS_Store
45
+ .DS_Store?
46
+ ._*
47
+ .Spotlight-V100
48
+ .Trashes
49
+ ehthumbs.db
50
+ Thumbs.db
51
+
52
+ # Generated audio files (for testing)
53
+ *.mp3
54
+ *.wav
55
+ *.opus
56
+ *.aac
57
+ *.flac
58
+ *.pcm
59
+ test_output.*
60
+ output.*
61
+ hello.*
62
+ speech.*
63
+
64
+ # Logs
65
+ *.log
66
+ logs/
67
+ .pytest_cache/
68
+
69
+ # Temporary files
70
+ tmp/
71
+ temp/
72
+ .tmp/
73
+
74
+ # Coverage reports
75
+ htmlcov/
76
+ .coverage
77
+ .coverage.*
78
+ coverage.xml
79
+ *.cover
80
+ .hypothesis/
81
+
82
+ # Documentation builds
83
+ docs/_build/
84
+ site/
85
+
86
+ # Package builds
87
+ *.tar.gz
88
+ *.whl
89
+ dist/
90
+ build/
91
+
92
+ # MyPy
93
+ .mypy_cache/
94
+ .dmypy.json
95
+ dmypy.json
96
+
97
+ # Jupyter Notebook
98
+ .ipynb_checkpoints
99
+
100
+ # pyenv
101
+ .python-version
102
+
103
+ # pipenv
104
+ Pipfile.lock
105
+
106
+ # PEP 582
107
+ __pypackages__/
108
+
109
+ # Celery
110
+ celerybeat-schedule
111
+ celerybeat.pid
112
+
113
+ # SageMath parsed files
114
+ *.sage.py
115
+
116
+ # Rope project settings
117
+ .ropeproject
118
+
119
+ # mkdocs documentation
120
+ /site
121
+
122
+ # Pyre type checker
123
+ .pyre/
124
+
125
+ # Additional exclusions for GitHub
126
+
127
+ # API Keys and Secrets
128
+ config.json
129
+ secrets.json
130
+ .secrets
131
+ api_keys.txt
132
+
133
+ # Database files
134
+ *.db
135
+ *.sqlite
136
+ *.sqlite3
137
+
138
+ # Backup files
139
+ *.bak
140
+ *.backup
141
+ *~
142
+
143
+ # Node.js (if using any JS tools)
144
+ node_modules/
145
+ npm-debug.log*
146
+ yarn-debug.log*
147
+ yarn-error.log*
148
+
149
+ # Docker
150
+ .dockerignore
151
+ Dockerfile.dev
152
+ docker-compose.override.yml
153
+
154
+ # Local configuration
155
+ local_settings.py
156
+ local_config.py
157
+
158
+ # Claude
159
+ .claude/
CHANGELOG.md CHANGED
@@ -1,191 +1,266 @@
1
- # Changelog
2
-
3
- All notable changes to this project will be documented in this file.
4
-
5
- The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
- and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
-
8
- ## [3.1.0] - 2024-12-19
9
-
10
- ### 🔧 Format Support Improvements
11
-
12
- This release focuses on fixing audio format handling and improving format delivery optimization.
13
-
14
- ### ✨ Added
15
-
16
- - **Smart Header Selection**: Intelligent HTTP header selection to optimize format delivery from openai.fm service
17
- - **Format Mapping Functions**: Helper functions for better format handling and optimization
18
- - **Enhanced Web Interface**: Improved format selection with detailed descriptions for each format
19
- - **Comprehensive Format Documentation**: Updated README and documentation with complete format information
20
-
21
- ### 🔄 Changed
22
-
23
- - **File Naming Logic**: Files are now saved with extensions based on the actual returned format, not the requested format
24
- - **Enhanced Logging**: Added format-specific log messages for better debugging
25
- - **Web API Enhancement**: `/api/formats` endpoint now provides detailed information about all supported formats
26
- - **Documentation Updates**: README and package documentation now include comprehensive format guides
27
-
28
- ### 🐛 Fixed
29
-
30
- - **MAJOR FIX**: Resolved file naming issue where files were saved with incorrect double extensions (e.g., `test.wav.mp3`, `test.opus.wav`)
31
- - **Correct File Extensions**: Files now save with proper single extensions based on actual audio format (e.g., `test.mp3`, `test.wav`)
32
- - **Format Optimization**: Improved format delivery through smart request optimization
33
- - **Format Handling**: Better handling of all supported audio formats
34
-
35
- ### 📝 Technical Details
36
-
37
- - **Format Optimization**: Smart request optimization to deliver the best quality for each format
38
- - **Backward Compatibility**: Existing code continues to work unchanged
39
- - **Enhanced Format Support**: Improved support for all 6 audio formats (MP3, WAV, OPUS, AAC, FLAC, PCM)
40
-
41
- ## [3.0.0] - 2025-06-06
42
-
43
- ### 🎉 First Python Package Release
44
-
45
- This is the first release of TTSFM as an installable Python package. Previous versions (v1.x and v2.x) were service-only releases that provided the API server but not a pip-installable package.
46
-
47
- ### Added
48
-
49
- - **Complete Package Restructure**: Modern Python package structure with proper typing
50
- - **Async Support**: Full asynchronous client implementation with `asyncio`
51
- - **OpenAI API Compatibility**: Drop-in replacement for OpenAI TTS API
52
- - **Type Hints**: Complete type annotation support throughout the codebase
53
- - **CLI Interface**: Command-line tool for easy TTS generation
54
- - **Web Application**: Optional Flask-based web interface
55
- - **Docker Support**: Multi-architecture Docker images (linux/amd64, linux/arm64)
56
- - **Comprehensive Error Handling**: Detailed exception hierarchy
57
- - **Multiple Audio Formats**: Support for MP3, WAV, FLAC, and more
58
- - **Voice Options**: Multiple voice models (alloy, ash, ballad, coral, echo, fable, nova, onyx, sage, shimmer)
59
- - **Text Processing**: Automatic text length validation and splitting
60
- - **Rate Limiting**: Built-in rate limiting and retry mechanisms
61
- - **Configuration**: Environment variable and configuration file support
62
-
63
- ### 🔧 Technical Improvements
64
-
65
- - **Modern Build System**: Using `pyproject.toml` with setuptools
66
- - **GitHub Actions**: Automated Docker builds and PyPI publishing
67
- - **Development Tools**: Pre-commit hooks, linting, testing setup
68
- - **Documentation**: Comprehensive README and inline documentation
69
- - **Package Management**: Proper dependency management with optional extras
70
-
71
- ### 🌐 API Changes
72
-
73
- - **Breaking**: Complete API redesign for better usability
74
- - **OpenAI Compatible**: `/v1/audio/speech` endpoint compatibility
75
- - **RESTful Design**: Clean REST API design
76
- - **Health Checks**: Built-in health check endpoints
77
- - **CORS Support**: Cross-origin resource sharing enabled
78
-
79
- ### 📦 Installation Options
80
-
81
- ```bash
82
- # Basic installation
83
- pip install ttsfm
84
-
85
- # With web application support
86
- pip install ttsfm[web]
87
-
88
- # With development tools
89
- pip install ttsfm[dev]
90
-
91
- # Docker
92
- docker run -p 8000:8000 ghcr.io/dbccccccc/ttsfm:latest
93
- ```
94
-
95
- ### 🚀 Quick Start
96
-
97
- ```python
98
- from ttsfm import TTSClient, Voice
99
-
100
- client = TTSClient()
101
- response = client.generate_speech(
102
- text="Hello! This is TTSFM v3.0.0",
103
- voice=Voice.CORAL
104
- )
105
-
106
- with open("speech.mp3", "wb") as f:
107
- f.write(response.audio_data)
108
- ```
109
-
110
- ### 📦 Package vs Service History
111
-
112
- **Important Note**: This v3.0.0 is the first release of TTSFM as a Python package available on PyPI. Previous versions (v1.x and v2.x) were service/API server releases only and were not available as installable packages.
113
-
114
- - **v1.x - v2.x**: Service releases (API server only, not pip-installable)
115
- - **v3.0.0+**: Full Python package releases (pip-installable with service capabilities)
116
-
117
- ### 🐛 Bug Fixes
118
-
119
- - Fixed Docker build issues with dependency resolution
120
- - Improved error handling and user feedback
121
- - Better handling of long text inputs
122
- - Enhanced stability and performance
123
-
124
- ### 📚 Documentation
125
-
126
- - Complete API documentation
127
- - Usage examples and tutorials
128
- - Docker deployment guide
129
- - Development setup instructions
130
-
131
- ---
132
-
133
- ## Previous Service Releases (Not Available as Python Packages)
134
-
135
- The following versions were service/API server releases only and were not available as pip-installable packages:
136
-
137
- ### [2.0.0-alpha9] - 2025-04-09
138
- - Service improvements (alpha release)
139
-
140
- ### [2.0.0-alpha8] - 2025-04-09
141
- - Service improvements (alpha release)
142
-
143
- ### [2.0.0-alpha7] - 2025-04-07
144
- - Service improvements (alpha release)
145
-
146
- ### [2.0.0-alpha6] - 2025-04-07
147
- - Service improvements (alpha release)
148
-
149
- ### [2.0.0-alpha5] - 2025-04-07
150
- - Service improvements (alpha release)
151
-
152
- ### [2.0.0-alpha4] - 2025-04-07
153
- - Service improvements (alpha release)
154
-
155
- ### [2.0.0-alpha3] - 2025-04-07
156
- - Service improvements (alpha release)
157
-
158
- ### [2.0.0-alpha2] - 2025-04-07
159
- - Service improvements (alpha release)
160
-
161
- ### [2.0.0-alpha1] - 2025-04-07
162
- - Alpha release (DO NOT USE)
163
-
164
- ### [1.3.0] - 2025-03-28
165
- - Support for additional audio file formats in the API
166
- - Alignment with formats supported by the official API
167
-
168
- ### [1.2.2] - 2025-03-28
169
- - Fixed Docker support
170
-
171
- ### [1.2.1] - 2025-03-28
172
- - Color change for indicator for status
173
- - Voice preview on webpage for each voice
174
-
175
- ### [1.2.0] - 2025-03-26
176
- - Enhanced stability and availability by implementing advanced request handling mechanisms
177
- - Removed the proxy pool
178
-
179
- ### [1.1.2] - 2025-03-26
180
- - Version display on webpage
181
- - Last version of 1.1.x
182
-
183
- ### [1.1.1] - 2025-03-26
184
- - Build fixes
185
-
186
- ### [1.1.0] - 2025-03-26
187
- - Project restructuring for better future development experiences
188
- - Added .env settings
189
-
190
- ### [1.0.0] - 2025-03-26
191
- - First service release
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [3.2.3] - 2025-06-27
9
+
10
+ ### 🔄 Enhanced OpenAI API Compatibility
11
+
12
+ This release consolidates the OpenAI-compatible API endpoints and introduces intelligent auto-combine functionality.
13
+
14
+ ### ✨ Added
15
+
16
+ - **Auto-Combine Parameter**: New optional `auto_combine` parameter in `/v1/audio/speech` endpoint (default: `true`)
17
+ - **Intelligent Text Handling**: Automatically detects long text and combines audio chunks when `auto_combine=true`
18
+ - **Enhanced Error Messages**: Better error handling for long text when auto-combine is disabled
19
+ - **Response Headers**: Added `X-Auto-Combine` and `X-Chunks-Combined` headers for transparency
20
+
21
+ ### 🔄 Changed
22
+
23
+ - **Unified Endpoint**: Combined `/v1/audio/speech` and `/v1/audio/speech-combined` into single endpoint
24
+ - **Backward Compatibility**: Maintains full OpenAI API compatibility while adding TTSFM-specific features
25
+ - **Default Behavior**: Long text is now automatically split and combined by default (can be disabled)
26
+
27
+ ### 🗑️ Removed
28
+
29
+ - **Deprecated Endpoint**: Removed `/v1/audio/speech-combined` endpoint (functionality moved to main endpoint)
30
+ - **Legacy Web Options**: Removed confusing batch processing options from web interface for cleaner UX
31
+ - **Complex UI Elements**: Simplified playground interface to focus on auto-combine
32
+
33
+ ### 🧹 Streamlined Web Experience
34
+
35
+ - **User-Focused Design**: Web interface now emphasizes auto-combine as the primary approach
36
+ - **Developer Features Preserved**: All advanced functionality remains in Python package
37
+ - **Clear Separation**: Web for users, Python package for developers
38
+
39
+ ### 📋 Migration Guide
40
+
41
+ - **No Breaking Changes**: Existing API calls continue to work unchanged
42
+ - **Long Text**: Now automatically handled by default - no need to use separate endpoint
43
+ - **Disable Auto-Combine**: Add `"auto_combine": false` to request body to get original behavior
44
+
45
+ ## [3.2.2] - 2025-06-26
46
+
47
+ ### 🎵 Combined Audio Functionality
48
+
49
+ This release introduces the revolutionary combined audio feature that allows generating single, seamless audio files from long text content.
50
+
51
+ ### Added
52
+
53
+ - **Combined Audio Endpoints**: New `/api/generate-combined` and `/v1/audio/speech-combined` endpoints
54
+ - **Intelligent Text Splitting**: Smart algorithm that splits text at sentence boundaries, then word boundaries, preserving natural speech flow
55
+ - **Seamless Audio Combination**: Professional audio processing to merge chunks into single continuous files
56
+ - **OpenAI Compatibility**: Full OpenAI TTS API compatibility for combined audio generation
57
+ - **Advanced Fallback System**: Multiple fallback mechanisms for audio combination (PyDub → WAV concatenation raw concatenation)
58
+ - **Rich Metadata**: Response headers with chunk count, file size, and processing information
59
+ - **Comprehensive Testing**: Full test suite with unit tests, integration tests, and performance benchmarks
60
+
61
+ ### 🔄 Changed
62
+
63
+ - **Extended Character Limits**: No longer limited to 4096 characters per request
64
+ - **Enhanced Web Interface**: Updated documentation with combined audio endpoint information
65
+ - **Improved Error Handling**: Better validation and error messages for long text processing
66
+
67
+ ### 🛠️ Technical Features
68
+
69
+ - **Concurrent Processing**: Parallel chunk processing for faster generation
70
+ - **Memory Optimization**: Efficient memory usage for large text processing
71
+ - **Format Support**: Works with all supported audio formats (MP3, WAV, OPUS, AAC, FLAC, PCM)
72
+ - **Performance Monitoring**: Built-in performance tracking and optimization
73
+ - **Unicode Support**: Full Unicode text handling for international content
74
+
75
+ ### 📋 Use Cases
76
+
77
+ - **Long Articles**: Convert blog posts and articles to single audio files
78
+ - **Audiobooks**: Generate chapters as continuous audio
79
+ - **Educational Content**: Transform learning materials to audio format
80
+ - **Accessibility**: Enhanced support for visually impaired users
81
+ - **Podcast Creation**: Convert scripts to professional audio content
82
+
83
+ ## [3.1.0] - 2024-12-19
84
+
85
+ ### 🔧 Format Support Improvements
86
+
87
+ This release focuses on fixing audio format handling and improving format delivery optimization.
88
+
89
+ ### Added
90
+
91
+ - **Smart Header Selection**: Intelligent HTTP header selection to optimize format delivery from openai.fm service
92
+ - **Format Mapping Functions**: Helper functions for better format handling and optimization
93
+ - **Enhanced Web Interface**: Improved format selection with detailed descriptions for each format
94
+ - **Comprehensive Format Documentation**: Updated README and documentation with complete format information
95
+
96
+ ### 🔄 Changed
97
+
98
+ - **File Naming Logic**: Files are now saved with extensions based on the actual returned format, not the requested format
99
+ - **Enhanced Logging**: Added format-specific log messages for better debugging
100
+ - **Web API Enhancement**: `/api/formats` endpoint now provides detailed information about all supported formats
101
+ - **Documentation Updates**: README and package documentation now include comprehensive format guides
102
+
103
+ ### 🐛 Fixed
104
+
105
+ - **MAJOR FIX**: Resolved file naming issue where files were saved with incorrect double extensions (e.g., `test.wav.mp3`, `test.opus.wav`)
106
+ - **Correct File Extensions**: Files now save with proper single extensions based on actual audio format (e.g., `test.mp3`, `test.wav`)
107
+ - **Format Optimization**: Improved format delivery through smart request optimization
108
+ - **Format Handling**: Better handling of all supported audio formats
109
+
110
+ ### 📝 Technical Details
111
+
112
+ - **Format Optimization**: Smart request optimization to deliver the best quality for each format
113
+ - **Backward Compatibility**: Existing code continues to work unchanged
114
+ - **Enhanced Format Support**: Improved support for all 6 audio formats (MP3, WAV, OPUS, AAC, FLAC, PCM)
115
+
116
+ ## [3.0.0] - 2025-06-06
117
+
118
+ ### 🎉 First Python Package Release
119
+
120
+ This is the first release of TTSFM as an installable Python package. Previous versions (v1.x and v2.x) were service-only releases that provided the API server but not a pip-installable package.
121
+
122
+ ### Added
123
+
124
+ - **Complete Package Restructure**: Modern Python package structure with proper typing
125
+ - **Async Support**: Full asynchronous client implementation with `asyncio`
126
+ - **OpenAI API Compatibility**: Drop-in replacement for OpenAI TTS API
127
+ - **Type Hints**: Complete type annotation support throughout the codebase
128
+ - **CLI Interface**: Command-line tool for easy TTS generation
129
+ - **Web Application**: Optional Flask-based web interface
130
+ - **Docker Support**: Multi-architecture Docker images (linux/amd64, linux/arm64)
131
+ - **Comprehensive Error Handling**: Detailed exception hierarchy
132
+ - **Multiple Audio Formats**: Support for MP3, WAV, FLAC, and more
133
+ - **Voice Options**: Multiple voice models (alloy, ash, ballad, coral, echo, fable, nova, onyx, sage, shimmer)
134
+ - **Text Processing**: Automatic text length validation and splitting
135
+ - **Rate Limiting**: Built-in rate limiting and retry mechanisms
136
+ - **Configuration**: Environment variable and configuration file support
137
+
138
+ ### 🔧 Technical Improvements
139
+
140
+ - **Modern Build System**: Using `pyproject.toml` with setuptools
141
+ - **GitHub Actions**: Automated Docker builds and PyPI publishing
142
+ - **Development Tools**: Pre-commit hooks, linting, testing setup
143
+ - **Documentation**: Comprehensive README and inline documentation
144
+ - **Package Management**: Proper dependency management with optional extras
145
+
146
+ ### 🌐 API Changes
147
+
148
+ - **Breaking**: Complete API redesign for better usability
149
+ - **OpenAI Compatible**: `/v1/audio/speech` endpoint compatibility
150
+ - **RESTful Design**: Clean REST API design
151
+ - **Health Checks**: Built-in health check endpoints
152
+ - **CORS Support**: Cross-origin resource sharing enabled
153
+
154
+ ### 📦 Installation Options
155
+
156
+ ```bash
157
+ # Basic installation
158
+ pip install ttsfm
159
+
160
+ # With web application support
161
+ pip install ttsfm[web]
162
+
163
+ # With development tools
164
+ pip install ttsfm[dev]
165
+
166
+ # Docker
167
+ docker run -p 8000:8000 ghcr.io/dbccccccc/ttsfm:latest
168
+ ```
169
+
170
+ ### 🚀 Quick Start
171
+
172
+ ```python
173
+ from ttsfm import TTSClient, Voice
174
+
175
+ client = TTSClient()
176
+ response = client.generate_speech(
177
+ text="Hello! This is TTSFM v3.0.0",
178
+ voice=Voice.CORAL
179
+ )
180
+
181
+ with open("speech.mp3", "wb") as f:
182
+ f.write(response.audio_data)
183
+ ```
184
+
185
+ ### 📦 Package vs Service History
186
+
187
+ **Important Note**: This v3.0.0 is the first release of TTSFM as a Python package available on PyPI. Previous versions (v1.x and v2.x) were service/API server releases only and were not available as installable packages.
188
+
189
+ - **v1.x - v2.x**: Service releases (API server only, not pip-installable)
190
+ - **v3.0.0+**: Full Python package releases (pip-installable with service capabilities)
191
+
192
+ ### 🐛 Bug Fixes
193
+
194
+ - Fixed Docker build issues with dependency resolution
195
+ - Improved error handling and user feedback
196
+ - Better handling of long text inputs
197
+ - Enhanced stability and performance
198
+
199
+ ### 📚 Documentation
200
+
201
+ - Complete API documentation
202
+ - Usage examples and tutorials
203
+ - Docker deployment guide
204
+ - Development setup instructions
205
+
206
+ ---
207
+
208
+ ## Previous Service Releases (Not Available as Python Packages)
209
+
210
+ The following versions were service/API server releases only and were not available as pip-installable packages:
211
+
212
+ ### [2.0.0-alpha9] - 2025-04-09
213
+ - Service improvements (alpha release)
214
+
215
+ ### [2.0.0-alpha8] - 2025-04-09
216
+ - Service improvements (alpha release)
217
+
218
+ ### [2.0.0-alpha7] - 2025-04-07
219
+ - Service improvements (alpha release)
220
+
221
+ ### [2.0.0-alpha6] - 2025-04-07
222
+ - Service improvements (alpha release)
223
+
224
+ ### [2.0.0-alpha5] - 2025-04-07
225
+ - Service improvements (alpha release)
226
+
227
+ ### [2.0.0-alpha4] - 2025-04-07
228
+ - Service improvements (alpha release)
229
+
230
+ ### [2.0.0-alpha3] - 2025-04-07
231
+ - Service improvements (alpha release)
232
+
233
+ ### [2.0.0-alpha2] - 2025-04-07
234
+ - Service improvements (alpha release)
235
+
236
+ ### [2.0.0-alpha1] - 2025-04-07
237
+ - Alpha release (DO NOT USE)
238
+
239
+ ### [1.3.0] - 2025-03-28
240
+ - Support for additional audio file formats in the API
241
+ - Alignment with formats supported by the official API
242
+
243
+ ### [1.2.2] - 2025-03-28
244
+ - Fixed Docker support
245
+
246
+ ### [1.2.1] - 2025-03-28
247
+ - Color change for indicator for status
248
+ - Voice preview on webpage for each voice
249
+
250
+ ### [1.2.0] - 2025-03-26
251
+ - Enhanced stability and availability by implementing advanced request handling mechanisms
252
+ - Removed the proxy pool
253
+
254
+ ### [1.1.2] - 2025-03-26
255
+ - Version display on webpage
256
+ - Last version of 1.1.x
257
+
258
+ ### [1.1.1] - 2025-03-26
259
+ - Build fixes
260
+
261
+ ### [1.1.0] - 2025-03-26
262
+ - Project restructuring for better future development experiences
263
+ - Added .env settings
264
+
265
+ ### [1.0.0] - 2025-03-26
266
+ - First service release
Dockerfile CHANGED
@@ -1,34 +1,36 @@
1
- FROM python:3.11-slim
2
-
3
- WORKDIR /app
4
-
5
- ENV PYTHONDONTWRITEBYTECODE=1 \
6
- PYTHONUNBUFFERED=1 \
7
- PORT=8000
8
-
9
- # Install dependencies
10
- RUN apt-get update && apt-get install -y gcc curl && rm -rf /var/lib/apt/lists/*
11
-
12
- # Copy source code first
13
- COPY ttsfm/ ./ttsfm/
14
- COPY ttsfm-web/ ./ttsfm-web/
15
- COPY pyproject.toml ./
16
- COPY requirements.txt ./
17
-
18
- # Install the TTSFM package with web dependencies
19
- RUN pip install --no-cache-dir -e .[web]
20
-
21
- # Install additional web dependencies
22
- RUN pip install --no-cache-dir python-dotenv>=1.0.0
23
-
24
- # Create non-root user
25
- RUN useradd --create-home ttsfm && chown -R ttsfm:ttsfm /app
26
- USER ttsfm
27
-
28
- EXPOSE 7860
29
-
30
- HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
31
- CMD curl -f http://localhost:8000/api/health || exit 1
32
-
33
- WORKDIR /app/ttsfm-web
34
- CMD ["python", "-m", "waitress", "--host=0.0.0.0", "--port=7860", "app:app"]
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ WORKDIR /app
4
+
5
+ ENV PYTHONDONTWRITEBYTECODE=1 \
6
+ PYTHONUNBUFFERED=1 \
7
+ PORT=8000
8
+
9
+ # Install dependencies
10
+ RUN apt-get update && apt-get install -y gcc curl git && rm -rf /var/lib/apt/lists/*
11
+
12
+ # Copy source code first
13
+ COPY ttsfm/ ./ttsfm/
14
+ COPY ttsfm-web/ ./ttsfm-web/
15
+ COPY pyproject.toml ./
16
+ COPY requirements.txt ./
17
+ COPY .git/ ./.git/
18
+
19
+ # Install the TTSFM package with web dependencies
20
+ RUN pip install --no-cache-dir -e .[web]
21
+
22
+ # Install additional web dependencies
23
+ RUN pip install --no-cache-dir python-dotenv>=1.0.0 flask-socketio>=5.3.0 python-socketio>=5.10.0 eventlet>=0.33.3
24
+
25
+ # Create non-root user
26
+ RUN useradd --create-home ttsfm && chown -R ttsfm:ttsfm /app
27
+ USER ttsfm
28
+
29
+ EXPOSE 8000
30
+
31
+ HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
32
+ CMD curl -f http://localhost:8000/api/health || exit 1
33
+
34
+ WORKDIR /app/ttsfm-web
35
+ # Use run.py for proper eventlet initialization
36
+ CMD ["python", "run.py"]
LICENSE CHANGED
@@ -1,21 +1,21 @@
1
- MIT License
2
-
3
- Copyright (c) 2025 dbcccc
4
-
5
- Permission is hereby granted, free of charge, to any person obtaining a copy
6
- of this software and associated documentation files (the "Software"), to deal
7
- in the Software without restriction, including without limitation the rights
8
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
- copies of the Software, and to permit persons to whom the Software is
10
- furnished to do so, subject to the following conditions:
11
-
12
- The above copyright notice and this permission notice shall be included in all
13
- copies or substantial portions of the Software.
14
-
15
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
- SOFTWARE.
 
1
+ MIT License
2
+
3
+ Copyright (c) 2025 dbcccc
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.zh.md ADDED
@@ -0,0 +1,792 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # TTSFM - 文本转语音API客户端
2
+
3
+ > **Language / 语言**: [English](README.md) | [中文](README.zh.md)
4
+
5
+ [![Docker Pulls](https://img.shields.io/docker/pulls/dbcccc/ttsfm?style=flat-square&logo=docker)](https://hub.docker.com/r/dbcccc/ttsfm)
6
+ [![GitHub Stars](https://img.shields.io/github/stars/dbccccccc/ttsfm?style=social)](https://github.com/dbccccccc/ttsfm)
7
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg?style=flat-square)](https://opensource.org/licenses/MIT)
8
+
9
+ ## Star历史
10
+
11
+ [![Star History Chart](https://api.star-history.com/svg?repos=dbccccccc/ttsfm&type=Date)](https://www.star-history.com/#dbccccccc/ttsfm&Date)
12
+
13
+ 🎤 **现代化、免费的文本转语音API客户端,兼容OpenAI**
14
+
15
+ TTSFM为文本转语音生成提供同步和异步Python客户端,使用逆向工程的openai.fm服务。无需API密钥 - 完全免费使用!
16
+
17
+ ## ✨ 主要特性
18
+
19
+ - 🆓 **完全免费** - 使用逆向工程的openai.fm服务(无需API密钥)
20
+ - 🎯 **OpenAI兼容** - OpenAI TTS API的直接替代品(`/v1/audio/speech`)
21
+ - ⚡ **异步和同步** - 提供`asyncio`和同步客户端
22
+ - 🗣️ **11种声音** - 所有OpenAI兼容的声音(alloy、echo、fable、onyx、nova、shimmer等)
23
+ - 🎵 **6种音频格式** - 支持MP3、WAV、OPUS、AAC、FLAC、PCM
24
+ - 🐳 **Docker就绪** - 一键部署,包含Web界面
25
+ - 🌐 **Web界面** - 用于测试声音和格式的交互式试用平台
26
+ - 🔧 **CLI工具** - 用于快速TTS生成的命令行界面
27
+ - 📦 **类型提示** - 完整的类型注解支持,提供更好的IDE体验
28
+ - 🛡️ **错误处理** - 全面的异常层次结构和重试逻辑
29
+ - ✨ **自动合并** - 自动处理长文本,无缝音频合并
30
+ - 📊 **文本验证** - 自动文本长度验证和分割
31
+ - 🔐 **API密钥保护** - 可选的OpenAI兼容身份验证,用于安全部署
32
+
33
+ ## 📦 安装
34
+
35
+ ### 快速安装
36
+
37
+ ```bash
38
+ pip install ttsfm
39
+ ```
40
+
41
+ ### 安装选项
42
+
43
+ ```bash
44
+ # 基础安装(仅同步客户端)
45
+ pip install ttsfm
46
+
47
+ # 包含Web应用支持
48
+ pip install ttsfm[web]
49
+
50
+ # 包含开发工具
51
+ pip install ttsfm[dev]
52
+
53
+ # 包含文档工具
54
+ pip install ttsfm[docs]
55
+
56
+ # 安装所有可选依赖
57
+ pip install ttsfm[web,dev,docs]
58
+ ```
59
+
60
+ ### 系统要求
61
+
62
+ - **Python**: 3.8+(在3.8、3.9、3.10、3.11、3.12上测试)
63
+ - **操作系统**: Windows、macOS、Linux
64
+ - **依赖**: `requests`、`aiohttp`、`fake-useragent`
65
+
66
+ ## 🚀 快速开始
67
+
68
+ ### 🐳 Docker(推荐)
69
+
70
+ 运行带有Web界面和OpenAI兼容API的TTSFM:
71
+
72
+ ```bash
73
+ # 使用GitHub Container Registry
74
+ docker run -p 8000:8000 ghcr.io/dbccccccc/ttsfm:latest
75
+
76
+ # 使用Docker Hub
77
+ docker run -p 8000:8000 dbcccc/ttsfm:latest
78
+ ```
79
+
80
+ **可用端点:**
81
+ - 🌐 **Web界面**: http://localhost:8000
82
+ - 🔗 **OpenAI API**: http://localhost:8000/v1/audio/speech
83
+ - 📊 **健康检查**: http://localhost:8000/api/health
84
+
85
+ **测试API:**
86
+
87
+ ```bash
88
+ curl -X POST http://localhost:8000/v1/audio/speech \
89
+ -H "Content-Type: application/json" \
90
+ -d '{"model":"gpt-4o-mini-tts","input":"你好世界!","voice":"alloy"}' \
91
+ --output speech.mp3
92
+ ```
93
+
94
+ ### 📦 Python包
95
+
96
+ #### 同步客户端
97
+
98
+ ```python
99
+ from ttsfm import TTSClient, Voice, AudioFormat
100
+
101
+ # 创建客户端(使用免费的openai.fm服务)
102
+ client = TTSClient()
103
+
104
+ # 生成语音
105
+ response = client.generate_speech(
106
+ text="你好!这是TTSFM - 一个免费的TTS服务。",
107
+ voice=Voice.CORAL,
108
+ response_format=AudioFormat.MP3
109
+ )
110
+
111
+ # 保存音频文件
112
+ response.save_to_file("output") # 保存为output.mp3
113
+
114
+ # 或获取原始音频数据
115
+ audio_bytes = response.audio_data
116
+ print(f"生成了 {len(audio_bytes)} 字节的音频")
117
+ ```
118
+
119
+ #### 异步客户端
120
+
121
+ ```python
122
+ import asyncio
123
+ from ttsfm import AsyncTTSClient, Voice
124
+
125
+ async def generate_speech():
126
+ async with AsyncTTSClient() as client:
127
+ response = await client.generate_speech(
128
+ text="异步TTS生成!",
129
+ voice=Voice.NOVA
130
+ )
131
+ response.save_to_file("async_output")
132
+
133
+ # 运行异步函数
134
+ asyncio.run(generate_speech())
135
+ ```
136
+
137
+ #### 长文本处理(Python包)
138
+
139
+ 对于需要精细控制文本分割的开发者:
140
+
141
+ ```python
142
+ from ttsfm import TTSClient, Voice, AudioFormat
143
+
144
+ # 创建客户端
145
+ client = TTSClient()
146
+
147
+ # 从长文本生成语音(为每个片段创建单独的文件)
148
+ responses = client.generate_speech_long_text(
149
+ text="超过4096字符的很长文本...",
150
+ voice=Voice.ALLOY,
151
+ response_format=AudioFormat.MP3,
152
+ max_length=2000,
153
+ preserve_words=True
154
+ )
155
+
156
+ # 将每个片段保存为单独的文件
157
+ for i, response in enumerate(responses, 1):
158
+ response.save_to_file(f"part_{i:03d}") # 保存为part_001.mp3、part_002.mp3等
159
+
160
+ print(f"从长文本生成了 {len(responses)} 个音频文件")
161
+ ```
162
+
163
+ #### OpenAI Python客户端兼容性
164
+
165
+ ```python
166
+ from openai import OpenAI
167
+
168
+ # 指向TTSFM Docker容器(默认不需要API密钥)
169
+ client = OpenAI(
170
+ api_key="not-needed", # TTSFM默认免费
171
+ base_url="http://localhost:8000/v1"
172
+ )
173
+
174
+ # 启用API密钥保护时
175
+ client_with_auth = OpenAI(
176
+ api_key="your-secret-api-key", # 您的TTSFM API密钥
177
+ base_url="http://localhost:8000/v1"
178
+ )
179
+
180
+ # 生成语音(与OpenAI完全相同)
181
+ response = client.audio.speech.create(
182
+ model="gpt-4o-mini-tts",
183
+ voice="alloy",
184
+ input="来自TTSFM的问候!"
185
+ )
186
+
187
+ response.stream_to_file("output.mp3")
188
+ ```
189
+
190
+ #### 长文本自动合并功能
191
+
192
+ TTSFM通过新的自动合并功能自动处理长文本(>4096字符):
193
+
194
+ ```python
195
+ from openai import OpenAI
196
+
197
+ client = OpenAI(
198
+ api_key="not-needed",
199
+ base_url="http://localhost:8000/v1"
200
+ )
201
+
202
+ # 长文本自动分割并合并为单个音频文件
203
+ long_article = """
204
+ 您的很长的文章或文档内容在这里...
205
+ 这可以是数千字符长,TTSFM将
206
+ 自动将其分割成片段,为每个片段生成音频,
207
+ 并将它们合并成一个无缝的音频文件。
208
+ """ * 100 # 使其真的很长
209
+
210
+ # 这可以无缝工作 - 无需手动分割!
211
+ response = client.audio.speech.create(
212
+ model="gpt-4o-mini-tts",
213
+ voice="nova",
214
+ input=long_article,
215
+ # auto_combine=True 是默认值
216
+ )
217
+
218
+ response.stream_to_file("long_article.mp3") # 单个合并文件!
219
+
220
+ # 禁用自动合并以严格兼容OpenAI
221
+ response = client.audio.speech.create(
222
+ model="gpt-4o-mini-tts",
223
+ voice="nova",
224
+ input="仅短文本",
225
+ auto_combine=False # 如果文本>4096字符将出错
226
+ )
227
+ ```
228
+
229
+ ### 🖥️ 命令行界面
230
+
231
+ ```bash
232
+ # 基本用法
233
+ ttsfm "你好,世界!" --output hello.mp3
234
+
235
+ # 指定声音和格式
236
+ ttsfm "你好,世界!" --voice nova --format wav --output hello.wav
237
+
238
+ # 从文件读取
239
+ ttsfm --text-file input.txt --output speech.mp3
240
+
241
+ # 自定义服务URL
242
+ ttsfm "你好,世界!" --url http://localhost:7000 --output hello.mp3
243
+
244
+ # 列出可用声音
245
+ ttsfm --list-voices
246
+
247
+ # 获取帮助
248
+ ttsfm --help
249
+ ```
250
+
251
+ ## ⚙️ 配置
252
+
253
+ TTSFM自动使用免费的openai.fm服务 - **默认情况下无需配置或API密钥!**
254
+
255
+ ### 环境变量
256
+
257
+ | 变量 | 默认值 | 描述 |
258
+ |----------|---------|-------------|
259
+ | `REQUIRE_API_KEY` | `false` | 启用API密钥保护 |
260
+ | `TTSFM_API_KEY` | `None` | 您的秘密API密钥 |
261
+ | `HOST` | `localhost` | 服务器主机 |
262
+ | `PORT` | `8000` | 服务器端口 |
263
+ | `DEBUG` | `false` | 调试模式 |
264
+
265
+ ### Python客户端配置
266
+
267
+ ```python
268
+ from ttsfm import TTSClient
269
+
270
+ # 默认客户端(使用openai.fm,无需API密钥)
271
+ client = TTSClient()
272
+
273
+ # 自定义配置
274
+ client = TTSClient(
275
+ base_url="https://www.openai.fm", # 默认
276
+ timeout=30.0, # 请求超时
277
+ max_retries=3, # 重试次数
278
+ verify_ssl=True # SSL验证
279
+ )
280
+
281
+ # 用于带有API密钥保护的TTSFM服务器
282
+ protected_client = TTSClient(
283
+ base_url="http://localhost:8000",
284
+ api_key="your-ttsfm-api-key"
285
+ )
286
+
287
+ # 用于其他自定义TTS服务
288
+ custom_client = TTSClient(
289
+ base_url="http://your-tts-service.com",
290
+ api_key="your-api-key-if-needed"
291
+ )
292
+ ```
293
+
294
+ ## 🗣️ 可用声音
295
+
296
+ TTSFM支持所有**11种OpenAI兼容声音**:
297
+
298
+ | 声音 | 描述 | 最适合 |
299
+ |-------|-------------|----------|
300
+ | `alloy` | 平衡且多功能 | 通用目的,中性语调 |
301
+ | `ash` | 清晰且清楚 | 专业,商务内容 |
302
+ | `ballad` | 流畅且优美 | 讲故事,有声读物 |
303
+ | `coral` | 温暖且友好 | 客户服务,教程 |
304
+ | `echo` | 共鸣且清晰 | 公告,演示 |
305
+ | `fable` | 富有表现力且动态 | 创意内容,娱乐 |
306
+ | `nova` | 明亮且充满活力 | 营销,积极内容 |
307
+ | `onyx` | 深沉且权威 | 新闻,严肃内容 |
308
+ | `sage` | 智慧且稳重 | 教育,信息性 |
309
+ | `shimmer` | 轻盈且飘逸 | 休闲,对话式 |
310
+ | `verse` | 有节奏且流畅 | 诗歌,艺术内容 |
311
+
312
+ ```python
313
+ from ttsfm import Voice
314
+
315
+ # 使用枚举值
316
+ response = client.generate_speech("你好!", voice=Voice.CORAL)
317
+
318
+ # 或使用字符串值
319
+ response = client.generate_speech("你好!", voice="coral")
320
+
321
+ # 测试不同声音
322
+ for voice in Voice:
323
+ response = client.generate_speech(f"这是{voice.value}声音", voice=voice)
324
+ response.save_to_file(f"test_{voice.value}")
325
+ ```
326
+
327
+ ## 🎵 音频格式
328
+
329
+ TTSFM支持**6种音频格式**,具有不同的质量和压缩选项:
330
+
331
+ | 格式 | 扩展名 | 质量 | 文件大小 | 使用场景 |
332
+ |--------|-----------|---------|-----------|----------|
333
+ | `mp3` | `.mp3` | 良好 | 小 | Web、移动应用、通用使用 |
334
+ | `opus` | `.opus` | 优秀 | 小 | Web流媒体、VoIP |
335
+ | `aac` | `.aac` | 良好 | 中等 | Apple设备、流媒体 |
336
+ | `flac` | `.flac` | 无损 | 大 | 高质量存档 |
337
+ | `wav` | `.wav` | 无损 | 大 | 专业���频 |
338
+ | `pcm` | `.pcm` | 原始 | 大 | 音频处理 |
339
+
340
+ ### **使用示例**
341
+
342
+ ```python
343
+ from ttsfm import TTSClient, AudioFormat
344
+
345
+ client = TTSClient()
346
+
347
+ # 生成不同格式
348
+ formats = [
349
+ AudioFormat.MP3, # 最常见
350
+ AudioFormat.OPUS, # 最佳压缩
351
+ AudioFormat.AAC, # Apple兼容
352
+ AudioFormat.FLAC, # 无损
353
+ AudioFormat.WAV, # 未压缩
354
+ AudioFormat.PCM # 原始音频
355
+ ]
356
+
357
+ for fmt in formats:
358
+ response = client.generate_speech(
359
+ text="测试音频格式",
360
+ response_format=fmt
361
+ )
362
+ response.save_to_file(f"test.{fmt.value}")
363
+ ```
364
+
365
+ ### **格式选择指南**
366
+
367
+ - **选择MP3**用于:
368
+ - Web应用
369
+ - 移动应用
370
+ - 较小的文件大小
371
+ - 通用音频
372
+
373
+ - **选择OPUS**用于:
374
+ - Web流媒体
375
+ - VoIP应用
376
+ - 最佳压缩比
377
+ - 实时音频
378
+
379
+ - **选择AAC**用于:
380
+ - Apple设备
381
+ - 流媒体服务
382
+ - 良好的质量/大小平衡
383
+
384
+ - **选择FLAC**用于:
385
+ - 存档目的
386
+ - 无损压缩
387
+ - 专业工作流程
388
+
389
+ - **选择WAV**用于:
390
+ - 专业音频制作
391
+ - 最大兼容性
392
+ - 当文件大小不是问题时
393
+
394
+ - **选择PCM**用于:
395
+ - 音频处理
396
+ - 原始音频数据
397
+ - 自定义应用
398
+
399
+ > **注意**:库会自动优化请求,为您选择的格式提供最佳质量。文件总是根据音频格式以正确的扩展名保存。
400
+
401
+ ## 🌐 Web界面
402
+
403
+ TTSFM包含一个**美观的Web界面**用于测试和实验:
404
+
405
+ ![Web Interface](https://img.shields.io/badge/Web%20Interface-Available-brightgreen?style=flat-square)
406
+
407
+ **功能:**
408
+ - 🎮 **交互式试用平台** - 实时测试声音和格式
409
+ - 📝 **文本验证** - 字符计数和长度验证
410
+ - 🎛️ **高级选项** - 声音指令,自动分割长文本
411
+ - 📊 **音频播放器** - 内置播放器,显示时长和文件大小信息
412
+ - 📥 **下载支持** - 下载单个或批量音频文件
413
+ - 🎲 **随机文本** - 生成随机示例文本进行测试
414
+ - 📱 **响应式设计** - 在桌面、平板和移动设备上工作
415
+
416
+ 访问地址:http://localhost:8000(运行Docker容器时)
417
+
418
+ ## 🔗 API端点
419
+
420
+ 运行Docker容器时,这些端点可用:
421
+
422
+ | 端点 | 方法 | 描述 |
423
+ |----------|--------|-------------|
424
+ | `/` | GET | Web界面 |
425
+ | `/playground` | GET | 交互式TTS试用平台 |
426
+ | `/v1/audio/speech` | POST | OpenAI兼容的TTS API |
427
+ | `/v1/models` | GET | 列出可用模型 |
428
+ | `/api/health` | GET | 健康检查端点 |
429
+ | `/api/voices` | GET | 列出可用声音 |
430
+ | `/api/formats` | GET | 列出支持的音频格式 |
431
+ | `/api/validate-text` | POST | 验证文本长度 |
432
+
433
+ ### OpenAI兼容API
434
+
435
+ ```bash
436
+ # 生成语音(短文本) - 默认不需要API密钥
437
+ curl -X POST http://localhost:8000/v1/audio/speech \
438
+ -H "Content-Type: application/json" \
439
+ -d '{
440
+ "model": "gpt-4o-mini-tts",
441
+ "input": "你好,这是一个测试!",
442
+ "voice": "alloy",
443
+ "response_format": "mp3"
444
+ }' \
445
+ --output speech.mp3
446
+
447
+ # 使用API密钥生成语音(启用保护时)
448
+ curl -X POST http://localhost:8000/v1/audio/speech \
449
+ -H "Content-Type: application/json" \
450
+ -H "Authorization: Bearer your-secret-api-key" \
451
+ -d '{
452
+ "model": "gpt-4o-mini-tts",
453
+ "input": "你好,这是一个测试!",
454
+ "voice": "alloy",
455
+ "response_format": "mp3"
456
+ }' \
457
+ --output speech.mp3
458
+
459
+ # 使用自动合并从长文本生成语音(默认行为)
460
+ curl -X POST http://localhost:8000/v1/audio/speech \
461
+ -H "Content-Type: application/json" \
462
+ -d '{
463
+ "model": "gpt-4o-mini-tts",
464
+ "input": "这是一个超过4096字符限制的很长文本...",
465
+ "voice": "alloy",
466
+ "response_format": "mp3",
467
+ "auto_combine": true
468
+ }' \
469
+ --output long_speech.mp3
470
+
471
+ # 不使用自动合并从长文本生成语音(如果文本>4096字符将返回错误)
472
+ curl -X POST http://localhost:8000/v1/audio/speech \
473
+ -H "Content-Type: application/json" \
474
+ -d '{
475
+ "model": "gpt-4o-mini-tts",
476
+ "input": "您的文本在这里...",
477
+ "voice": "alloy",
478
+ "response_format": "mp3",
479
+ "auto_combine": false
480
+ }' \
481
+ --output speech.mp3
482
+
483
+ # 列出模型
484
+ curl http://localhost:8000/v1/models
485
+
486
+ # 健康检查
487
+ curl http://localhost:8000/api/health
488
+ ```
489
+
490
+ #### **新参数:`auto_combine`**
491
+
492
+ TTSFM通过可选的`auto_combine`参数扩展了OpenAI API:
493
+
494
+ - **`auto_combine`**(布尔值,可选,默认:`true`)
495
+ - 当为`true`时:自动将长文本(>4096字符)分割成片段,为每个片段生成音频,并将它们合并成一个无缝的音频文件
496
+ - 当为`false`时:如果文本超过4096字符限制则返回错误(标准OpenAI行为)
497
+ - **好处**:无需手动管理长内容的文本分割或音频文件合并
498
+
499
+ ## 🐳 Docker部署
500
+
501
+ ### 快速开始
502
+
503
+ ```bash
504
+ # 使用默认设置运行(无需API密钥)
505
+ docker run -p 8000:8000 ghcr.io/dbccccccc/ttsfm:latest
506
+
507
+ # 启用API密钥保护运行
508
+ docker run -p 8000:8000 \
509
+ -e REQUIRE_API_KEY=true \
510
+ -e TTSFM_API_KEY=your-secret-api-key \
511
+ ghcr.io/dbccccccc/ttsfm:latest
512
+
513
+ # 使用自定义端口运行
514
+ docker run -p 3000:8000 ghcr.io/dbccccccc/ttsfm:latest
515
+
516
+ # 后台运行
517
+ docker run -d -p 8000:8000 --name ttsfm ghcr.io/dbccccccc/ttsfm:latest
518
+ ```
519
+
520
+ ### Docker Compose
521
+
522
+ ```yaml
523
+ version: '3.8'
524
+ services:
525
+ ttsfm:
526
+ image: ghcr.io/dbccccccc/ttsfm:latest
527
+ ports:
528
+ - "8000:8000"
529
+ environment:
530
+ - PORT=8000
531
+ # 可选:启用API密钥保护
532
+ - REQUIRE_API_KEY=false
533
+ - TTSFM_API_KEY=your-secret-api-key-here
534
+ restart: unless-stopped
535
+ healthcheck:
536
+ test: ["CMD", "curl", "-f", "http://localhost:8000/api/health"]
537
+ interval: 30s
538
+ timeout: 10s
539
+ retries: 3
540
+ ```
541
+
542
+ ### 可用镜像
543
+
544
+ | 注册表 | 镜像 | 描述 |
545
+ |----------|-------|-------------|
546
+ | GitHub Container Registry | `ghcr.io/dbccccccc/ttsfm:latest` | 最新稳定版本 |
547
+ | Docker Hub | `dbcccc/ttsfm:latest` | Docker Hub镜像 |
548
+ | GitHub Container Registry | `ghcr.io/dbccccccc/ttsfm:v3.2.2` | 特定版本 |
549
+
550
+ ## 🛠️ 高级用法
551
+
552
+ ### 错误处理
553
+
554
+ ```python
555
+ from ttsfm import TTSClient, TTSException, APIException, NetworkException
556
+
557
+ client = TTSClient()
558
+
559
+ try:
560
+ response = client.generate_speech("你好,世界!")
561
+ response.save_to_file("output")
562
+ except NetworkException as e:
563
+ print(f"网络错误:{e}")
564
+ except APIException as e:
565
+ print(f"API错误:{e}")
566
+ except TTSException as e:
567
+ print(f"TTS错误:{e}")
568
+ ```
569
+
570
+ ### 文本验证和分割
571
+
572
+ ```python
573
+ from ttsfm.utils import validate_text_length, split_text_by_length
574
+
575
+ # 验证文本长度
576
+ text = "您的长文本在这里..."
577
+ is_valid, length = validate_text_length(text, max_length=4096)
578
+
579
+ if not is_valid:
580
+ # 将长文本分割成片段
581
+ chunks = split_text_by_length(text, max_length=4000)
582
+
583
+ # 为每个片段生成语音
584
+ for i, chunk in enumerate(chunks):
585
+ response = client.generate_speech(chunk)
586
+ response.save_to_file(f"output_part_{i}")
587
+ ```
588
+
589
+ ### 自定义请求头和用户代理
590
+
591
+ ```python
592
+ from ttsfm import TTSClient
593
+
594
+ # 客户端自动使用真实的请求头
595
+ client = TTSClient()
596
+
597
+ # 请求头包括:
598
+ # - 真实的User-Agent字符串
599
+ # - 音频内容的Accept头
600
+ # - 连接保持活跃
601
+ # - 压缩的Accept-Encoding
602
+ ```
603
+
604
+ ## 🔧 开发
605
+
606
+ ### 本地开发
607
+
608
+ ```bash
609
+ # 克隆仓库
610
+ git clone https://github.com/dbccccccc/ttsfm.git
611
+ cd ttsfm
612
+
613
+ # 以开发模式安装
614
+ pip install -e .[dev]
615
+
616
+ # 运行测试
617
+ pytest
618
+
619
+ # 运行Web应用
620
+ cd ttsfm-web
621
+ python app.py
622
+ ```
623
+
624
+ ### 构建Docker镜像
625
+
626
+ ```bash
627
+ # 构建镜像
628
+ docker build -t ttsfm:local .
629
+
630
+ # 运行本地镜像
631
+ docker run -p 8000:8000 ttsfm:local
632
+ ```
633
+
634
+ ### 贡献
635
+
636
+ 1. Fork仓库
637
+ 2. 创建功能分支(`git checkout -b feature/amazing-feature`)
638
+ 3. 提交更改(`git commit -m 'Add amazing feature'`)
639
+ 4. 推送到分支(`git push origin feature/amazing-feature`)
640
+ 5. 打开Pull Request
641
+
642
+ ## 📊 性能
643
+
644
+ ### 基准测试
645
+
646
+ - **延迟**:典型文本约1-3秒(取决于openai.fm服务)
647
+ - **吞吐量**:异步客户端支持并发请求
648
+ - **文本限制**:使用自动合并无限制!自动处理任何长度的文本
649
+ - **音频质量**:与OpenAI相当的高质量合成
650
+
651
+ ### 优化技巧
652
+
653
+ ```python
654
+ # 使用异步客户端获得更好的性能
655
+ async with AsyncTTSClient() as client:
656
+ # 并发处理多个请求
657
+ tasks = [
658
+ client.generate_speech(f"文本 {i}")
659
+ for i in range(10)
660
+ ]
661
+ responses = await asyncio.gather(*tasks)
662
+
663
+ # 重用客户端实例
664
+ client = TTSClient()
665
+ for text in texts:
666
+ response = client.generate_speech(text) # 重用连接
667
+ ```
668
+
669
+ ## 🔐 API密钥保护(可选)
670
+
671
+ TTSFM支持**OpenAI兼容的API密钥身份验证**用于安全部署:
672
+
673
+ ### 快速设置
674
+
675
+ ```bash
676
+ # 启用API密钥保护
677
+ export REQUIRE_API_KEY=true
678
+ export TTSFM_API_KEY=your-secret-api-key
679
+
680
+ # 启用保护运行
681
+ docker run -p 8000:8000 \
682
+ -e REQUIRE_API_KEY=true \
683
+ -e TTSFM_API_KEY=your-secret-api-key \
684
+ ghcr.io/dbccccccc/ttsfm:latest
685
+ ```
686
+
687
+ ### 身份验证方法
688
+
689
+ API密钥以**OpenAI兼容格式**接受:
690
+
691
+ ```python
692
+ from openai import OpenAI
693
+
694
+ # 标准OpenAI格式
695
+ client = OpenAI(
696
+ api_key="your-secret-api-key",
697
+ base_url="http://localhost:8000/v1"
698
+ )
699
+
700
+ # 或使用curl
701
+ curl -X POST http://localhost:8000/v1/audio/speech \
702
+ -H "Authorization: Bearer your-secret-api-key" \
703
+ -H "Content-Type: application/json" \
704
+ -d '{"model":"gpt-4o-mini-tts","input":"你好!","voice":"alloy"}'
705
+ ```
706
+
707
+ ### 功能
708
+
709
+ - 🔑 **OpenAI兼容**:使用标准`Authorization: Bearer`头
710
+ - 🛡️ **多种认证方法**:头部、查询参数或JSON正文
711
+ - 🎛️ **可配置**:通过环境变量轻松启用/禁用
712
+ - 📊 **安全日志**:跟踪无效访问尝试
713
+ - 🌐 **Web界面**:自动API密钥字段检测
714
+
715
+ ### 受保护的端点
716
+
717
+ 启用时,这些端点需要身份验证:
718
+ - `POST /v1/audio/speech` - 语音生成
719
+ - `POST /api/generate` - 传统语音生成
720
+ - `POST /api/generate-combined` - 合并语音生成
721
+
722
+ ### 公共端点
723
+
724
+ 这些端点无需身份验证即可访问:
725
+ - `GET /` - Web界面
726
+ - `GET /playground` - 交互式试用平台
727
+ - `GET /api/health` - 健康检查
728
+ - `GET /api/voices` - 可用声音
729
+ - `GET /api/formats` - 支持的格式
730
+
731
+ ## 🔒 安全和隐私
732
+
733
+ - **可选API密钥**:默认免费,需要时安全
734
+ - **无数据存储**:音频按需生成,不存储
735
+ - **HTTPS支持**:到TTS服务的安全连接
736
+ - **无跟踪**:TTSFM不收集或存储用户数据
737
+ - **开源**:完整源代码可供审计
738
+
739
+ ## 📋 更新日志
740
+
741
+ 查看[CHANGELOG.md](CHANGELOG.md)了解详细版本历史。
742
+
743
+ ### 最新更改(v3.2.3)
744
+
745
+ - ✨ **默认自动合并**:长文本现在自动分割并合并为单个音频文件
746
+ - 🔄 **统一API端点**:单个`/v1/audio/speech`端点智能处理短文本和长文本
747
+ - 🎛️ **可配置行为**:新的`auto_combine`参数(默认:`true`)提供完全控制
748
+ - 🤖 **增强OpenAI兼容性**:具有智能长文本处理的直接替代品
749
+ - 📊 **丰富响应头**:`X-Auto-Combine`、`X-Chunks-Combined`和处理元数据
750
+ - 🧹 **简化Web界面**:移除传统批处理,提供更清洁的用户体验
751
+ - 📖 **简化文档**:Web文档强调现代自动合并方法
752
+ - 🎮 **增强试用平台**:专注于自动合并功能的清洁界面
753
+ - 🔐 **API密钥保护**:用于安全部署的可选OpenAI兼容身份验证
754
+ - 🛡️ **安全功能**:具有详细日志的全面访问控制
755
+
756
+ ## 🤝 支持和社区
757
+
758
+ - 🐛 **错误报告**:[GitHub Issues](https://github.com/dbccccccc/ttsfm/issues)
759
+ - 💬 **讨论**:[GitHub Discussions](https://github.com/dbccccccc/ttsfm/discussions)
760
+ - 👤 **作者**:[@dbcccc](https://github.com/dbccccccc)
761
+ - ⭐ **为项目加星**:如果您觉得TTSFM有用,请在GitHub上为其加星!
762
+
763
+ ## 📄 许可证
764
+
765
+ MIT许可证 - 详见[LICENSE](LICENSE)文件。
766
+
767
+ ## 🙏 致谢
768
+
769
+ - **OpenAI**:原始TTS API设计
770
+ - **openai.fm**:提供免费TTS服务
771
+ - **社区**:感谢所有帮助改进TTSFM的用户和贡献者
772
+
773
+ ---
774
+
775
+ <div align="center">
776
+
777
+ **TTSFM** - 免费文本转语音API,兼容OpenAI
778
+
779
+ [![GitHub](https://img.shields.io/badge/GitHub-dbccccccc/ttsfm-blue?style=flat-square&logo=github)](https://github.com/dbccccccc/ttsfm)
780
+ [![PyPI](https://img.shields.io/badge/PyPI-ttsfm-blue?style=flat-square&logo=pypi)](https://pypi.org/project/ttsfm/)
781
+ [![Docker](https://img.shields.io/badge/Docker-dbcccc/ttsfm-blue?style=flat-square&logo=docker)](https://hub.docker.com/r/dbcccc/ttsfm)
782
+
783
+ ---
784
+
785
+ ## 📖 文档
786
+
787
+ - 🇺🇸 **English**: [README.md](README.md)
788
+ - 🇨🇳 **中文**: [README.zh.md](README.zh.md)
789
+
790
+ 由[@dbcccc](https://github.com/dbccccccc)用❤️制作
791
+
792
+ </div>
docs/websocket-streaming.md ADDED
@@ -0,0 +1,244 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 🚀 WebSocket Streaming for TTSFM
2
+
3
+ Real-time audio streaming for text-to-speech generation using WebSockets.
4
+
5
+ ## Overview
6
+
7
+ The WebSocket streaming feature provides:
8
+ - **Real-time audio chunk delivery** as they're generated
9
+ - **Progress tracking** with live updates
10
+ - **Lower perceived latency** - start receiving audio before complete generation
11
+ - **Cancellable operations** - stop mid-generation if needed
12
+
13
+ ## Quick Start
14
+
15
+ ### 1. Docker Deployment (Recommended)
16
+
17
+ ```bash
18
+ # Build with WebSocket support
19
+ docker build -t ttsfm-websocket .
20
+
21
+ # Run with WebSocket enabled
22
+ docker run -p 8000:8000 \
23
+ -e DEBUG=false \
24
+ ttsfm-websocket
25
+ ```
26
+
27
+ ### 2. Test WebSocket Connection
28
+
29
+ Visit `http://localhost:8000/websocket-demo` for an interactive demo.
30
+
31
+ ### 3. Client Usage
32
+
33
+ ```javascript
34
+ // Initialize WebSocket client
35
+ const client = new WebSocketTTSClient({
36
+ socketUrl: 'http://localhost:8000',
37
+ debug: true
38
+ });
39
+
40
+ // Generate speech with streaming
41
+ const result = await client.generateSpeech('Hello, WebSocket world!', {
42
+ voice: 'alloy',
43
+ format: 'mp3',
44
+ onProgress: (progress) => {
45
+ console.log(`Progress: ${progress.progress}%`);
46
+ },
47
+ onChunk: (chunk) => {
48
+ console.log(`Received chunk ${chunk.chunkIndex + 1}`);
49
+ // Process audio chunk in real-time
50
+ },
51
+ onComplete: (result) => {
52
+ console.log('Generation complete!');
53
+ // Play or download the combined audio
54
+ }
55
+ });
56
+ ```
57
+
58
+ ## API Reference
59
+
60
+ ### WebSocket Events
61
+
62
+ #### Client → Server
63
+
64
+ **`generate_stream`**
65
+ ```javascript
66
+ {
67
+ text: string, // Text to convert
68
+ voice: string, // Voice ID (alloy, echo, etc.)
69
+ format: string, // Audio format (mp3, wav, opus)
70
+ chunk_size: number // Optional, default 1024
71
+ }
72
+ ```
73
+
74
+ **`cancel_stream`**
75
+ ```javascript
76
+ {
77
+ request_id: string // Request ID to cancel
78
+ }
79
+ ```
80
+
81
+ #### Server → Client
82
+
83
+ **`stream_started`**
84
+ ```javascript
85
+ {
86
+ request_id: string,
87
+ timestamp: number
88
+ }
89
+ ```
90
+
91
+ **`audio_chunk`**
92
+ ```javascript
93
+ {
94
+ request_id: string,
95
+ chunk_index: number,
96
+ total_chunks: number,
97
+ audio_data: string, // Hex-encoded audio data
98
+ format: string,
99
+ duration: number,
100
+ generation_time: number,
101
+ chunk_text: string // Preview of chunk text
102
+ }
103
+ ```
104
+
105
+ **`stream_progress`**
106
+ ```javascript
107
+ {
108
+ request_id: string,
109
+ progress: number, // 0-100
110
+ total_chunks: number,
111
+ chunks_completed: number,
112
+ status: string
113
+ }
114
+ ```
115
+
116
+ **`stream_complete`**
117
+ ```javascript
118
+ {
119
+ request_id: string,
120
+ total_chunks: number,
121
+ status: 'completed',
122
+ timestamp: number
123
+ }
124
+ ```
125
+
126
+ **`stream_error`**
127
+ ```javascript
128
+ {
129
+ request_id: string,
130
+ error: string,
131
+ timestamp: number
132
+ }
133
+ ```
134
+
135
+ ## Performance Considerations
136
+
137
+ 1. **Chunk Size**: Smaller chunks (512-1024 chars) provide more frequent updates but increase overhead
138
+ 2. **Network Latency**: WebSocket reduces latency compared to HTTP polling
139
+ 3. **Audio Buffering**: Client should buffer chunks for smooth playback
140
+ 4. **Concurrent Streams**: Server supports multiple concurrent streaming sessions
141
+
142
+ ## Browser Support
143
+
144
+ - Chrome/Edge: Full support
145
+ - Firefox: Full support
146
+ - Safari: Full support (iOS 11.3+)
147
+ - IE11: Not supported (use polling fallback)
148
+
149
+ ## Troubleshooting
150
+
151
+ ### Connection Issues
152
+ ```javascript
153
+ // Check WebSocket status
154
+ fetch('/api/websocket/status')
155
+ .then(res => res.json())
156
+ .then(data => console.log('WebSocket status:', data));
157
+ ```
158
+
159
+ ### Debug Mode
160
+ ```javascript
161
+ const client = new WebSocketTTSClient({
162
+ debug: true // Enable console logging
163
+ });
164
+ ```
165
+
166
+ ### Common Issues
167
+
168
+ 1. **"WebSocket connection failed"**
169
+ - Check if port 8000 is accessible
170
+ - Ensure eventlet is installed: `pip install eventlet>=0.33.3`
171
+ - Try polling transport as fallback
172
+
173
+ 2. **"Chunks arriving out of order"**
174
+ - Client automatically sorts chunks by index
175
+ - Check network stability
176
+
177
+ 3. **"Audio playback stuttering"**
178
+ - Increase chunk size for better buffering
179
+ - Check client-side audio buffer implementation
180
+
181
+ ## Advanced Usage
182
+
183
+ ### Custom Chunk Processing
184
+ ```javascript
185
+ client.generateSpeech(text, {
186
+ onChunk: async (chunk) => {
187
+ // Custom processing per chunk
188
+ const processed = await processAudioChunk(chunk.audioData);
189
+ audioQueue.push(processed);
190
+
191
+ // Start playback after first chunk
192
+ if (chunk.chunkIndex === 0) {
193
+ startStreamingPlayback(audioQueue);
194
+ }
195
+ }
196
+ });
197
+ ```
198
+
199
+ ### Progress Visualization
200
+ ```javascript
201
+ client.generateSpeech(text, {
202
+ onProgress: (progress) => {
203
+ // Update UI progress bar
204
+ progressBar.style.width = `${progress.progress}%`;
205
+ statusText.textContent = `Processing chunk ${progress.chunksCompleted}/${progress.totalChunks}`;
206
+ }
207
+ });
208
+ ```
209
+
210
+ ## Security
211
+
212
+ - WebSocket connections respect API key authentication if enabled
213
+ - CORS is configured for cross-origin requests
214
+ - SSL/TLS recommended for production deployments
215
+
216
+ ## Deployment Notes
217
+
218
+ For production deployment with your existing setup:
219
+
220
+ ```bash
221
+ # Build new image with WebSocket support
222
+ docker build -t ttsfm-websocket:latest .
223
+
224
+ # Deploy to your server (192.168.1.150)
225
+ docker stop ttsfm-container
226
+ docker rm ttsfm-container
227
+ docker run -d \
228
+ --name ttsfm-container \
229
+ -p 8000:8000 \
230
+ -e REQUIRE_API_KEY=true \
231
+ -e TTSFM_API_KEY=your-secret-key \
232
+ -e DEBUG=false \
233
+ ttsfm-websocket:latest
234
+ ```
235
+
236
+ ## Performance Metrics
237
+
238
+ Based on testing with openai.fm backend:
239
+ - First chunk delivery: ~0.5-1s
240
+ - Streaming overhead: ~10-15% vs batch processing
241
+ - Concurrent connections: 100+ (limited by server resources)
242
+ - Memory usage: ~50MB per active stream
243
+
244
+ *Built by a grumpy senior engineer who thinks HTTP was good enough*
pyproject.toml CHANGED
@@ -1,161 +1,169 @@
1
- [build-system]
2
- requires = ["setuptools>=45", "wheel", "setuptools_scm[toml]>=6.2"]
3
- build-backend = "setuptools.build_meta"
4
-
5
- [project]
6
- name = "ttsfm"
7
- version = "3.1.0"
8
- description = "Text-to-Speech API Client with OpenAI compatibility"
9
- readme = "README.md"
10
- license = "MIT"
11
- authors = [
12
- {name = "dbcccc", email = "[email protected]"}
13
- ]
14
- maintainers = [
15
- {name = "dbcccc", email = "[email protected]"}
16
- ]
17
- classifiers = [
18
- "Development Status :: 4 - Beta",
19
- "Intended Audience :: Developers",
20
-
21
- "Operating System :: OS Independent",
22
- "Programming Language :: Python :: 3",
23
- "Programming Language :: Python :: 3.8",
24
- "Programming Language :: Python :: 3.9",
25
- "Programming Language :: Python :: 3.10",
26
- "Programming Language :: Python :: 3.11",
27
- "Programming Language :: Python :: 3.12",
28
- "Topic :: Multimedia :: Sound/Audio :: Speech",
29
- "Topic :: Software Development :: Libraries :: Python Modules",
30
- "Topic :: Internet :: WWW/HTTP :: Dynamic Content",
31
- ]
32
- keywords = [
33
- "tts",
34
- "text-to-speech",
35
- "speech-synthesis",
36
- "openai",
37
- "api-client",
38
- "audio",
39
- "voice",
40
- "speech"
41
- ]
42
- requires-python = ">=3.8"
43
- dependencies = [
44
- "requests>=2.25.0",
45
- "aiohttp>=3.8.0",
46
- "fake-useragent>=1.4.0",
47
- ]
48
-
49
- [project.optional-dependencies]
50
- dev = [
51
- "pytest>=6.0",
52
- "pytest-asyncio>=0.18.0",
53
- "pytest-cov>=2.0",
54
- "black>=22.0",
55
- "isort>=5.0",
56
- "flake8>=4.0",
57
- "mypy>=0.900",
58
- "pre-commit>=2.0",
59
- ]
60
- docs = [
61
- "sphinx>=4.0",
62
- "sphinx-rtd-theme>=1.0",
63
- "myst-parser>=0.17",
64
- ]
65
- web = [
66
- "flask>=2.0.0",
67
- "flask-cors>=3.0.10",
68
- "waitress>=3.0.0",
69
- ]
70
-
71
- [project.urls]
72
- Homepage = "https://github.com/dbccccccc/ttsfm"
73
- Documentation = "https://github.com/dbccccccc/ttsfm/blob/main/docs/"
74
- Repository = "https://github.com/dbccccccc/ttsfm"
75
- "Bug Tracker" = "https://github.com/dbccccccc/ttsfm/issues"
76
-
77
- [project.scripts]
78
- ttsfm = "ttsfm.cli:main"
79
-
80
- [tool.setuptools]
81
- packages = ["ttsfm"]
82
-
83
- [tool.setuptools.package-data]
84
- ttsfm = ["py.typed"]
85
-
86
- [tool.black]
87
- line-length = 100
88
- target-version = ['py38']
89
- include = '\.pyi?$'
90
- extend-exclude = '''
91
- /(
92
- # directories
93
- \.eggs
94
- | \.git
95
- | \.hg
96
- | \.mypy_cache
97
- | \.tox
98
- | \.venv
99
- | build
100
- | dist
101
- )/
102
- '''
103
-
104
- [tool.isort]
105
- profile = "black"
106
- line_length = 100
107
- multi_line_output = 3
108
- include_trailing_comma = true
109
- force_grid_wrap = 0
110
- use_parentheses = true
111
- ensure_newline_before_comments = true
112
-
113
- [tool.mypy]
114
- python_version = "3.8"
115
- warn_return_any = true
116
- warn_unused_configs = true
117
- disallow_untyped_defs = true
118
- disallow_incomplete_defs = true
119
- check_untyped_defs = true
120
- disallow_untyped_decorators = true
121
- no_implicit_optional = true
122
- warn_redundant_casts = true
123
- warn_unused_ignores = true
124
- warn_no_return = true
125
- warn_unreachable = true
126
- strict_equality = true
127
-
128
- [tool.pytest.ini_options]
129
- minversion = "6.0"
130
- addopts = "-ra -q --strict-markers --strict-config"
131
- testpaths = ["tests"]
132
- python_files = ["test_*.py", "*_test.py"]
133
- python_classes = ["Test*"]
134
- python_functions = ["test_*"]
135
- markers = [
136
- "slow: marks tests as slow (deselect with '-m \"not slow\"')",
137
- "integration: marks tests as integration tests",
138
- "unit: marks tests as unit tests",
139
- ]
140
-
141
- [tool.coverage.run]
142
- source = ["ttsfm"]
143
- omit = [
144
- "*/tests/*",
145
- "*/test_*",
146
- "setup.py",
147
- ]
148
-
149
- [tool.coverage.report]
150
- exclude_lines = [
151
- "pragma: no cover",
152
- "def __repr__",
153
- "if self.debug:",
154
- "if settings.DEBUG",
155
- "raise AssertionError",
156
- "raise NotImplementedError",
157
- "if 0:",
158
- "if __name__ == .__main__.:",
159
- "class .*\\bProtocol\\):",
160
- "@(abc\\.)?abstractmethod",
161
- ]
 
 
 
 
 
 
 
 
 
1
+ [build-system]
2
+ requires = ["setuptools>=45", "wheel", "setuptools_scm[toml]>=6.2"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "ttsfm"
7
+ dynamic = ["version"]
8
+ description = "Text-to-Speech API Client with OpenAI compatibility"
9
+ readme = "README.md"
10
+ license = "MIT"
11
+ authors = [
12
+ {name = "dbcccc", email = "[email protected]"}
13
+ ]
14
+ maintainers = [
15
+ {name = "dbcccc", email = "[email protected]"}
16
+ ]
17
+ classifiers = [
18
+ "Development Status :: 4 - Beta",
19
+ "Intended Audience :: Developers",
20
+
21
+ "Operating System :: OS Independent",
22
+ "Programming Language :: Python :: 3",
23
+ "Programming Language :: Python :: 3.8",
24
+ "Programming Language :: Python :: 3.9",
25
+ "Programming Language :: Python :: 3.10",
26
+ "Programming Language :: Python :: 3.11",
27
+ "Programming Language :: Python :: 3.12",
28
+ "Topic :: Multimedia :: Sound/Audio :: Speech",
29
+ "Topic :: Software Development :: Libraries :: Python Modules",
30
+ "Topic :: Internet :: WWW/HTTP :: Dynamic Content",
31
+ ]
32
+ keywords = [
33
+ "tts",
34
+ "text-to-speech",
35
+ "speech-synthesis",
36
+ "openai",
37
+ "api-client",
38
+ "audio",
39
+ "voice",
40
+ "speech"
41
+ ]
42
+ requires-python = ">=3.8"
43
+ dependencies = [
44
+ "requests>=2.25.0",
45
+ "aiohttp>=3.8.0",
46
+ "fake-useragent>=1.4.0",
47
+ "python-dotenv>=1.0.1",
48
+ ]
49
+
50
+ [project.optional-dependencies]
51
+ dev = [
52
+ "pytest>=6.0",
53
+ "pytest-asyncio>=0.18.0",
54
+ "pytest-cov>=2.0",
55
+ "black>=22.0",
56
+ "isort>=5.0",
57
+ "flake8>=4.0",
58
+ "mypy>=0.900",
59
+ "pre-commit>=2.0",
60
+ ]
61
+ docs = [
62
+ "sphinx>=4.0",
63
+ "sphinx-rtd-theme>=1.0",
64
+ "myst-parser>=0.17",
65
+ ]
66
+ web = [
67
+ "flask>=2.0.0",
68
+ "flask-cors>=3.0.10",
69
+ "flask-socketio>=5.3.0",
70
+ "python-socketio>=5.10.0",
71
+ "eventlet>=0.33.3",
72
+ "waitress>=3.0.0",
73
+ ]
74
+
75
+ [project.urls]
76
+ Homepage = "https://github.com/dbccccccc/ttsfm"
77
+ Documentation = "https://github.com/dbccccccc/ttsfm/blob/main/docs/"
78
+ Repository = "https://github.com/dbccccccc/ttsfm"
79
+ "Bug Tracker" = "https://github.com/dbccccccc/ttsfm/issues"
80
+
81
+ [project.scripts]
82
+ ttsfm = "ttsfm.cli:main"
83
+
84
+ [tool.setuptools_scm]
85
+ version_scheme = "no-guess-dev"
86
+ local_scheme = "no-local-version"
87
+
88
+ [tool.setuptools]
89
+ packages = ["ttsfm"]
90
+
91
+ [tool.setuptools.package-data]
92
+ ttsfm = ["py.typed"]
93
+
94
+ [tool.black]
95
+ line-length = 100
96
+ target-version = ['py38']
97
+ include = '\\.pyi?$'
98
+ extend-exclude = '''
99
+ /(
100
+ # directories
101
+ \.eggs
102
+ | \.git
103
+ | \.hg
104
+ | \.mypy_cache
105
+ | \.tox
106
+ | \.venv
107
+ | build
108
+ | dist
109
+ )/
110
+ '''
111
+
112
+ [tool.isort]
113
+ profile = "black"
114
+ line_length = 100
115
+ multi_line_output = 3
116
+ include_trailing_comma = true
117
+ force_grid_wrap = 0
118
+ use_parentheses = true
119
+ ensure_newline_before_comments = true
120
+
121
+ [tool.mypy]
122
+ python_version = "3.8"
123
+ warn_return_any = true
124
+ warn_unused_configs = true
125
+ disallow_untyped_defs = true
126
+ disallow_incomplete_defs = true
127
+ check_untyped_defs = true
128
+ disallow_untyped_decorators = true
129
+ no_implicit_optional = true
130
+ warn_redundant_casts = true
131
+ warn_unused_ignores = true
132
+ warn_no_return = true
133
+ warn_unreachable = true
134
+ strict_equality = true
135
+
136
+ [tool.pytest.ini_options]
137
+ minversion = "6.0"
138
+ addopts = "-ra -q --strict-markers --strict-config"
139
+ testpaths = ["tests"]
140
+ python_files = ["test_*.py", "*_test.py"]
141
+ python_classes = ["Test*"]
142
+ python_functions = ["test_*"]
143
+ markers = [
144
+ "slow: marks tests as slow (deselect with '-m \"not slow\"')",
145
+ "integration: marks tests as integration tests",
146
+ "unit: marks tests as unit tests",
147
+ ]
148
+
149
+ [tool.coverage.run]
150
+ source = ["ttsfm"]
151
+ omit = [
152
+ "*/tests/*",
153
+ "*/test_*",
154
+ "setup.py",
155
+ ]
156
+
157
+ [tool.coverage.report]
158
+ exclude_lines = [
159
+ "pragma: no cover",
160
+ "def __repr__",
161
+ "if self.debug:",
162
+ "if settings.DEBUG",
163
+ "raise AssertionError",
164
+ "raise NotImplementedError",
165
+ "if 0:",
166
+ "if __name__ == .__main__.:",
167
+ "class .*\\bProtocol\\):",
168
+ "@(abc\\.)?abstractmethod",
169
+ ]
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
- # Core dependencies for the TTSFM package
2
- requests>=2.25.0
3
- aiohttp>=3.8.0
4
  fake-useragent>=1.4.0
 
1
+ # Core dependencies for the TTSFM package
2
+ requests>=2.25.0
3
+ aiohttp>=3.8.0
4
  fake-useragent>=1.4.0
ttsfm-web/app.py CHANGED
@@ -1,574 +1,988 @@
1
- """
2
- TTSFM Web Application
3
-
4
- A Flask web application that provides a user-friendly interface
5
- for the TTSFM text-to-speech package.
6
- """
7
-
8
- import os
9
- import json
10
- import logging
11
- from datetime import datetime
12
- from pathlib import Path
13
- from typing import Dict, Any, Optional
14
-
15
- from flask import Flask, request, jsonify, send_file, Response, render_template
16
- from flask_cors import CORS
17
- from dotenv import load_dotenv
18
-
19
- # Import the TTSFM package
20
- try:
21
- from ttsfm import TTSClient, Voice, AudioFormat, TTSException
22
- from ttsfm.exceptions import APIException, NetworkException, ValidationException
23
- from ttsfm.utils import validate_text_length, split_text_by_length
24
- except ImportError:
25
- # Fallback for development when package is not installed
26
- import sys
27
- sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
28
- from ttsfm import TTSClient, Voice, AudioFormat, TTSException
29
- from ttsfm.exceptions import APIException, NetworkException, ValidationException
30
- from ttsfm.utils import validate_text_length, split_text_by_length
31
-
32
- # Load environment variables
33
- load_dotenv()
34
-
35
- # Configure logging
36
- logging.basicConfig(
37
- level=logging.INFO,
38
- format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
39
- )
40
- logger = logging.getLogger(__name__)
41
-
42
- # Create Flask app
43
- app = Flask(__name__, static_folder='static', static_url_path='/static')
44
- CORS(app)
45
-
46
- # Configuration
47
- HOST = os.getenv("HOST", "localhost")
48
- PORT = int(os.getenv("PORT", "8000"))
49
- DEBUG = os.getenv("DEBUG", "false").lower() == "true"
50
-
51
- # Create TTS client - now uses openai.fm directly, no configuration needed
52
- tts_client = TTSClient()
53
-
54
- logger.info("Initialized web app with TTSFM using openai.fm free service")
55
-
56
- @app.route('/')
57
- def index():
58
- """Serve the main web interface."""
59
- return render_template('index.html')
60
-
61
- @app.route('/playground')
62
- def playground():
63
- """Serve the interactive playground."""
64
- return render_template('playground.html')
65
-
66
- @app.route('/docs')
67
- def docs():
68
- """Serve the API documentation."""
69
- return render_template('docs.html')
70
-
71
- @app.route('/api/voices', methods=['GET'])
72
- def get_voices():
73
- """Get list of available voices."""
74
- try:
75
- voices = [
76
- {
77
- "id": voice.value,
78
- "name": voice.value.title(),
79
- "description": f"{voice.value.title()} voice"
80
- }
81
- for voice in Voice
82
- ]
83
-
84
- return jsonify({
85
- "voices": voices,
86
- "count": len(voices)
87
- })
88
-
89
- except Exception as e:
90
- logger.error(f"Error getting voices: {e}")
91
- return jsonify({"error": "Failed to get voices"}), 500
92
-
93
- @app.route('/api/formats', methods=['GET'])
94
- def get_formats():
95
- """Get list of supported audio formats."""
96
- try:
97
- formats = [
98
- {
99
- "id": "mp3",
100
- "name": "MP3",
101
- "mime_type": "audio/mpeg",
102
- "description": "MP3 audio format - good quality, small file size",
103
- "quality": "Good",
104
- "file_size": "Small",
105
- "use_case": "Web, mobile apps, general use"
106
- },
107
- {
108
- "id": "opus",
109
- "name": "OPUS",
110
- "mime_type": "audio/opus",
111
- "description": "OPUS audio format - excellent quality, small file size",
112
- "quality": "Excellent",
113
- "file_size": "Small",
114
- "use_case": "Web streaming, VoIP"
115
- },
116
- {
117
- "id": "aac",
118
- "name": "AAC",
119
- "mime_type": "audio/aac",
120
- "description": "AAC audio format - good quality, medium file size",
121
- "quality": "Good",
122
- "file_size": "Medium",
123
- "use_case": "Apple devices, streaming"
124
- },
125
- {
126
- "id": "flac",
127
- "name": "FLAC",
128
- "mime_type": "audio/flac",
129
- "description": "FLAC audio format - lossless quality, large file size",
130
- "quality": "Lossless",
131
- "file_size": "Large",
132
- "use_case": "High-quality archival"
133
- },
134
- {
135
- "id": "wav",
136
- "name": "WAV",
137
- "mime_type": "audio/wav",
138
- "description": "WAV audio format - lossless quality, large file size",
139
- "quality": "Lossless",
140
- "file_size": "Large",
141
- "use_case": "Professional audio"
142
- },
143
- {
144
- "id": "pcm",
145
- "name": "PCM",
146
- "mime_type": "audio/pcm",
147
- "description": "PCM audio format - raw audio data, large file size",
148
- "quality": "Raw",
149
- "file_size": "Large",
150
- "use_case": "Audio processing"
151
- }
152
- ]
153
-
154
- return jsonify({
155
- "formats": formats,
156
- "count": len(formats)
157
- })
158
-
159
- except Exception as e:
160
- logger.error(f"Error getting formats: {e}")
161
- return jsonify({"error": "Failed to get formats"}), 500
162
-
163
- @app.route('/api/validate-text', methods=['POST'])
164
- def validate_text():
165
- """Validate text length and provide splitting suggestions."""
166
- try:
167
- data = request.get_json()
168
- if not data:
169
- return jsonify({"error": "No JSON data provided"}), 400
170
-
171
- text = data.get('text', '').strip()
172
- max_length = data.get('max_length', 4096)
173
-
174
- if not text:
175
- return jsonify({"error": "Text is required"}), 400
176
-
177
- text_length = len(text)
178
- is_valid = text_length <= max_length
179
-
180
- result = {
181
- "text_length": text_length,
182
- "max_length": max_length,
183
- "is_valid": is_valid,
184
- "needs_splitting": not is_valid
185
- }
186
-
187
- if not is_valid:
188
- # Provide splitting suggestions
189
- chunks = split_text_by_length(text, max_length, preserve_words=True)
190
- result.update({
191
- "suggested_chunks": len(chunks),
192
- "chunk_preview": [chunk[:100] + "..." if len(chunk) > 100 else chunk for chunk in chunks[:3]]
193
- })
194
-
195
- return jsonify(result)
196
-
197
- except Exception as e:
198
- logger.error(f"Text validation error: {e}")
199
- return jsonify({"error": "Text validation failed"}), 500
200
-
201
- @app.route('/api/generate', methods=['POST'])
202
- def generate_speech():
203
- """Generate speech from text using the TTSFM package."""
204
- try:
205
- # Parse request data
206
- data = request.get_json()
207
- if not data:
208
- return jsonify({"error": "No JSON data provided"}), 400
209
-
210
- # Extract parameters
211
- text = data.get('text', '').strip()
212
- voice = data.get('voice', Voice.ALLOY.value)
213
- response_format = data.get('format', AudioFormat.MP3.value)
214
- instructions = data.get('instructions', '').strip() or None
215
- max_length = data.get('max_length', 4096)
216
- validate_length = data.get('validate_length', True)
217
-
218
- # Validate required fields
219
- if not text:
220
- return jsonify({"error": "Text is required"}), 400
221
-
222
- # Validate voice
223
- try:
224
- voice_enum = Voice(voice.lower())
225
- except ValueError:
226
- return jsonify({
227
- "error": f"Invalid voice: {voice}. Must be one of: {[v.value for v in Voice]}"
228
- }), 400
229
-
230
- # Validate format
231
- try:
232
- format_enum = AudioFormat(response_format.lower())
233
- except ValueError:
234
- return jsonify({
235
- "error": f"Invalid format: {response_format}. Must be one of: {[f.value for f in AudioFormat]}"
236
- }), 400
237
-
238
- logger.info(f"Generating speech: text='{text[:50]}...', voice={voice}, format={response_format}")
239
-
240
- # Generate speech using the TTSFM package with validation
241
- response = tts_client.generate_speech(
242
- text=text,
243
- voice=voice_enum,
244
- response_format=format_enum,
245
- instructions=instructions,
246
- max_length=max_length,
247
- validate_length=validate_length
248
- )
249
-
250
- # Return audio data
251
- return Response(
252
- response.audio_data,
253
- mimetype=response.content_type,
254
- headers={
255
- 'Content-Disposition': f'attachment; filename="speech.{response.format.value}"',
256
- 'Content-Length': str(response.size),
257
- 'X-Audio-Format': response.format.value,
258
- 'X-Audio-Size': str(response.size)
259
- }
260
- )
261
-
262
- except ValidationException as e:
263
- logger.warning(f"Validation error: {e}")
264
- return jsonify({"error": str(e)}), 400
265
-
266
- except APIException as e:
267
- logger.error(f"API error: {e}")
268
- return jsonify({
269
- "error": str(e),
270
- "status_code": getattr(e, 'status_code', 500)
271
- }), getattr(e, 'status_code', 500)
272
-
273
- except NetworkException as e:
274
- logger.error(f"Network error: {e}")
275
- return jsonify({
276
- "error": "TTS service is currently unavailable",
277
- "details": str(e)
278
- }), 503
279
-
280
- except TTSException as e:
281
- logger.error(f"TTS error: {e}")
282
- return jsonify({"error": str(e)}), 500
283
-
284
- except Exception as e:
285
- logger.error(f"Unexpected error: {e}")
286
- return jsonify({"error": "Internal server error"}), 500
287
-
288
- @app.route('/api/generate-batch', methods=['POST'])
289
- def generate_speech_batch():
290
- """Generate speech from long text by splitting into chunks."""
291
- try:
292
- data = request.get_json()
293
- if not data:
294
- return jsonify({"error": "No JSON data provided"}), 400
295
-
296
- text = data.get('text', '').strip()
297
- voice = data.get('voice', Voice.ALLOY.value)
298
- response_format = data.get('format', AudioFormat.MP3.value)
299
- instructions = data.get('instructions', '').strip() or None
300
- max_length = data.get('max_length', 4096)
301
- preserve_words = data.get('preserve_words', True)
302
-
303
- if not text:
304
- return jsonify({"error": "Text is required"}), 400
305
-
306
- # Validate voice and format
307
- try:
308
- voice_enum = Voice(voice.lower())
309
- format_enum = AudioFormat(response_format.lower())
310
- except ValueError as e:
311
- return jsonify({"error": f"Invalid voice or format: {e}"}), 400
312
-
313
- # Split text into chunks
314
- chunks = split_text_by_length(text, max_length, preserve_words)
315
-
316
- if not chunks:
317
- return jsonify({"error": "No valid text chunks found"}), 400
318
-
319
- logger.info(f"Processing {len(chunks)} chunks for batch generation")
320
-
321
- # Generate speech for each chunk
322
- results = []
323
- for i, chunk in enumerate(chunks):
324
- try:
325
- response = tts_client.generate_speech(
326
- text=chunk,
327
- voice=voice_enum,
328
- response_format=format_enum,
329
- instructions=instructions,
330
- max_length=max_length,
331
- validate_length=False # Already split
332
- )
333
-
334
- # Convert to base64 for JSON response
335
- import base64
336
- audio_b64 = base64.b64encode(response.audio_data).decode('utf-8')
337
-
338
- results.append({
339
- "chunk_index": i + 1,
340
- "chunk_text": chunk[:100] + "..." if len(chunk) > 100 else chunk,
341
- "audio_data": audio_b64,
342
- "content_type": response.content_type,
343
- "size": response.size,
344
- "format": response.format.value
345
- })
346
-
347
- except Exception as e:
348
- logger.error(f"Failed to generate chunk {i+1}: {e}")
349
- results.append({
350
- "chunk_index": i + 1,
351
- "chunk_text": chunk[:100] + "..." if len(chunk) > 100 else chunk,
352
- "error": str(e)
353
- })
354
-
355
- return jsonify({
356
- "total_chunks": len(chunks),
357
- "successful_chunks": len([r for r in results if "audio_data" in r]),
358
- "results": results
359
- })
360
-
361
- except Exception as e:
362
- logger.error(f"Batch generation error: {e}")
363
- return jsonify({"error": "Batch generation failed"}), 500
364
-
365
- @app.route('/api/status', methods=['GET'])
366
- def get_status():
367
- """Get service status."""
368
- try:
369
- # Try to make a simple request to check if the TTS service is available
370
- test_response = tts_client.generate_speech(
371
- text="test",
372
- voice=Voice.ALLOY,
373
- response_format=AudioFormat.MP3
374
- )
375
-
376
- return jsonify({
377
- "status": "online",
378
- "tts_service": "openai.fm (free)",
379
- "package_version": "3.0.0",
380
- "timestamp": datetime.now().isoformat()
381
- })
382
-
383
- except Exception as e:
384
- logger.error(f"Status check failed: {e}")
385
- return jsonify({
386
- "status": "error",
387
- "tts_service": "openai.fm (free)",
388
- "error": str(e),
389
- "timestamp": datetime.now().isoformat()
390
- }), 503
391
-
392
- @app.route('/api/health', methods=['GET'])
393
- def health_check():
394
- """Simple health check endpoint."""
395
- return jsonify({
396
- "status": "healthy",
397
- "timestamp": datetime.now().isoformat()
398
- })
399
-
400
- # OpenAI-compatible API endpoints
401
- @app.route('/v1/audio/speech', methods=['POST'])
402
- def openai_speech():
403
- """OpenAI-compatible speech generation endpoint."""
404
- try:
405
- # Parse request data
406
- data = request.get_json()
407
- if not data:
408
- return jsonify({
409
- "error": {
410
- "message": "No JSON data provided",
411
- "type": "invalid_request_error",
412
- "code": "missing_data"
413
- }
414
- }), 400
415
-
416
- # Extract OpenAI-compatible parameters
417
- model = data.get('model', 'gpt-4o-mini-tts') # Accept but ignore model
418
- input_text = data.get('input', '').strip()
419
- voice = data.get('voice', 'alloy')
420
- response_format = data.get('response_format', 'mp3')
421
- instructions = data.get('instructions', '').strip() or None
422
- speed = data.get('speed', 1.0) # Accept but ignore speed
423
-
424
- # Validate required fields
425
- if not input_text:
426
- return jsonify({
427
- "error": {
428
- "message": "Input text is required",
429
- "type": "invalid_request_error",
430
- "code": "missing_input"
431
- }
432
- }), 400
433
-
434
- # Validate voice
435
- try:
436
- voice_enum = Voice(voice.lower())
437
- except ValueError:
438
- return jsonify({
439
- "error": {
440
- "message": f"Invalid voice: {voice}. Must be one of: {[v.value for v in Voice]}",
441
- "type": "invalid_request_error",
442
- "code": "invalid_voice"
443
- }
444
- }), 400
445
-
446
- # Validate format
447
- try:
448
- format_enum = AudioFormat(response_format.lower())
449
- except ValueError:
450
- return jsonify({
451
- "error": {
452
- "message": f"Invalid response_format: {response_format}. Must be one of: {[f.value for f in AudioFormat]}",
453
- "type": "invalid_request_error",
454
- "code": "invalid_format"
455
- }
456
- }), 400
457
-
458
- logger.info(f"OpenAI API: Generating speech: text='{input_text[:50]}...', voice={voice}, format={response_format}")
459
-
460
- # Generate speech using the TTSFM package
461
- response = tts_client.generate_speech(
462
- text=input_text,
463
- voice=voice_enum,
464
- response_format=format_enum,
465
- instructions=instructions,
466
- max_length=4096,
467
- validate_length=True
468
- )
469
-
470
- # Return audio data in OpenAI format
471
- return Response(
472
- response.audio_data,
473
- mimetype=response.content_type,
474
- headers={
475
- 'Content-Type': response.content_type,
476
- 'Content-Length': str(response.size),
477
- 'X-Audio-Format': response.format.value,
478
- 'X-Audio-Size': str(response.size),
479
- 'X-Powered-By': 'TTSFM-OpenAI-Compatible'
480
- }
481
- )
482
-
483
- except ValidationException as e:
484
- logger.warning(f"OpenAI API validation error: {e}")
485
- return jsonify({
486
- "error": {
487
- "message": str(e),
488
- "type": "invalid_request_error",
489
- "code": "validation_error"
490
- }
491
- }), 400
492
-
493
- except APIException as e:
494
- logger.error(f"OpenAI API error: {e}")
495
- return jsonify({
496
- "error": {
497
- "message": str(e),
498
- "type": "api_error",
499
- "code": "tts_error"
500
- }
501
- }), getattr(e, 'status_code', 500)
502
-
503
- except NetworkException as e:
504
- logger.error(f"OpenAI API network error: {e}")
505
- return jsonify({
506
- "error": {
507
- "message": "TTS service is currently unavailable",
508
- "type": "service_unavailable_error",
509
- "code": "service_unavailable"
510
- }
511
- }), 503
512
-
513
- except Exception as e:
514
- logger.error(f"OpenAI API unexpected error: {e}")
515
- return jsonify({
516
- "error": {
517
- "message": "An unexpected error occurred",
518
- "type": "internal_error",
519
- "code": "internal_error"
520
- }
521
- }), 500
522
-
523
- @app.route('/v1/models', methods=['GET'])
524
- def openai_models():
525
- """OpenAI-compatible models endpoint."""
526
- return jsonify({
527
- "object": "list",
528
- "data": [
529
- {
530
- "id": "gpt-4o-mini-tts",
531
- "object": "model",
532
- "created": 1699564800,
533
- "owned_by": "ttsfm",
534
- "permission": [],
535
- "root": "gpt-4o-mini-tts",
536
- "parent": None
537
- }
538
- ]
539
- })
540
-
541
- @app.errorhandler(404)
542
- def not_found(error):
543
- """Handle 404 errors."""
544
- return jsonify({"error": "Endpoint not found"}), 404
545
-
546
- @app.errorhandler(405)
547
- def method_not_allowed(error):
548
- """Handle 405 errors."""
549
- return jsonify({"error": "Method not allowed"}), 405
550
-
551
- @app.errorhandler(500)
552
- def internal_error(error):
553
- """Handle 500 errors."""
554
- logger.error(f"Internal server error: {error}")
555
- return jsonify({"error": "Internal server error"}), 500
556
-
557
- if __name__ == '__main__':
558
- logger.info(f"Starting TTSFM web application on {HOST}:{PORT}")
559
- logger.info("Using openai.fm free TTS service")
560
- logger.info(f"Debug mode: {DEBUG}")
561
-
562
- try:
563
- app.run(
564
- host=HOST,
565
- port=PORT,
566
- debug=DEBUG
567
- )
568
- except KeyboardInterrupt:
569
- logger.info("Application stopped by user")
570
- except Exception as e:
571
- logger.error(f"Failed to start application: {e}")
572
- finally:
573
- # Clean up TTS client
574
- tts_client.close()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ TTSFM Web Application
3
+
4
+ A Flask web application that provides a user-friendly interface
5
+ for the TTSFM text-to-speech package.
6
+ """
7
+
8
+ import os
9
+ import json
10
+ import logging
11
+ import tempfile
12
+ import io
13
+ from datetime import datetime
14
+ from pathlib import Path
15
+ from typing import Dict, Any, Optional, List
16
+ from functools import wraps
17
+ from urllib.parse import urlparse, urljoin
18
+
19
+ from flask import Flask, request, jsonify, send_file, Response, render_template, redirect, url_for
20
+ from flask_cors import CORS
21
+ from flask_socketio import SocketIO
22
+ from dotenv import load_dotenv
23
+
24
+ # Import i18n support
25
+ from i18n import init_i18n, get_locale, set_locale, _
26
+
27
+ # Import the TTSFM package
28
+ try:
29
+ from ttsfm import TTSClient, Voice, AudioFormat, TTSException
30
+ from ttsfm.exceptions import APIException, NetworkException, ValidationException
31
+ from ttsfm.utils import validate_text_length, split_text_by_length
32
+ except ImportError:
33
+ # Fallback for development when package is not installed
34
+ import sys
35
+ sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
36
+ from ttsfm import TTSClient, Voice, AudioFormat, TTSException
37
+ from ttsfm.exceptions import APIException, NetworkException, ValidationException
38
+ from ttsfm.utils import validate_text_length, split_text_by_length
39
+
40
+ # Load environment variables
41
+ load_dotenv()
42
+
43
+ # Configure logging
44
+ logging.basicConfig(
45
+ level=logging.INFO,
46
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
47
+ )
48
+ logger = logging.getLogger(__name__)
49
+
50
+ # Create Flask app
51
+ app = Flask(__name__, static_folder='static', static_url_path='/static')
52
+ app.secret_key = os.getenv("SECRET_KEY", "ttsfm-secret-key-change-in-production")
53
+ CORS(app)
54
+
55
+ # Configuration (moved up for socketio initialization)
56
+ HOST = os.getenv("HOST", "localhost")
57
+ PORT = int(os.getenv("PORT", "8000"))
58
+ DEBUG = os.getenv("DEBUG", "false").lower() == "true"
59
+
60
+ # Initialize SocketIO with proper async mode
61
+ # Using eventlet for production, threading for development
62
+ async_mode = 'eventlet' if not DEBUG else 'threading'
63
+ socketio = SocketIO(app, cors_allowed_origins="*", async_mode=async_mode)
64
+
65
+ # Initialize i18n support
66
+ init_i18n(app)
67
+
68
+ # API Key configuration
69
+ API_KEY = os.getenv("TTSFM_API_KEY") # Set this environment variable for API protection
70
+ REQUIRE_API_KEY = os.getenv("REQUIRE_API_KEY", "false").lower() == "true"
71
+
72
+ # Create TTS client - now uses openai.fm directly, no configuration needed
73
+ tts_client = TTSClient()
74
+
75
+ # Initialize WebSocket handler
76
+ from websocket_handler import WebSocketTTSHandler
77
+ websocket_handler = WebSocketTTSHandler(socketio, tts_client)
78
+
79
+ logger.info("Initialized web app with TTSFM using openai.fm free service")
80
+ logger.info(f"WebSocket support enabled with {async_mode} async mode")
81
+
82
+ # API Key validation decorator
83
+ def require_api_key(f):
84
+ """Decorator to require API key for protected endpoints."""
85
+ @wraps(f)
86
+ def decorated_function(*args, **kwargs):
87
+ # Skip API key check if not required
88
+ if not REQUIRE_API_KEY:
89
+ return f(*args, **kwargs)
90
+
91
+ # Check if API key is configured
92
+ if not API_KEY:
93
+ logger.warning("API key protection is enabled but TTSFM_API_KEY is not set")
94
+ return jsonify({
95
+ "error": "API key protection is enabled but not configured properly"
96
+ }), 500
97
+
98
+ # Get API key from request headers - prioritize Authorization header (OpenAI compatible)
99
+ provided_key = None
100
+
101
+ # 1. Check Authorization header first (OpenAI standard)
102
+ auth_header = request.headers.get('Authorization')
103
+ if auth_header and auth_header.startswith('Bearer '):
104
+ provided_key = auth_header[7:] # Remove 'Bearer ' prefix
105
+
106
+ # 2. Check X-API-Key header as fallback
107
+ if not provided_key:
108
+ provided_key = request.headers.get('X-API-Key')
109
+
110
+ # 3. Check API key from query parameters as fallback
111
+ if not provided_key:
112
+ provided_key = request.args.get('api_key')
113
+
114
+ # 4. Check API key from JSON body as fallback
115
+ if not provided_key and request.is_json:
116
+ data = request.get_json(silent=True)
117
+ if data:
118
+ provided_key = data.get('api_key')
119
+
120
+ # Validate API key
121
+ if not provided_key or provided_key != API_KEY:
122
+ logger.warning(f"Invalid API key attempt from {request.remote_addr}")
123
+ return jsonify({
124
+ "error": {
125
+ "message": "Invalid API key provided",
126
+ "type": "invalid_request_error",
127
+ "code": "invalid_api_key"
128
+ }
129
+ }), 401
130
+
131
+ return f(*args, **kwargs)
132
+ return decorated_function
133
+
134
+ def combine_audio_chunks(audio_chunks: List[bytes], format_type: str = "mp3") -> bytes:
135
+ """
136
+ Combine multiple audio chunks into a single audio file.
137
+
138
+ Args:
139
+ audio_chunks: List of audio data as bytes
140
+ format_type: Audio format (mp3, wav, etc.)
141
+
142
+ Returns:
143
+ bytes: Combined audio data
144
+ """
145
+ try:
146
+ # Try to use pydub for audio processing if available
147
+ try:
148
+ from pydub import AudioSegment
149
+
150
+ # Convert each chunk to AudioSegment
151
+ audio_segments = []
152
+ for chunk in audio_chunks:
153
+ if format_type.lower() == "mp3":
154
+ segment = AudioSegment.from_mp3(io.BytesIO(chunk))
155
+ elif format_type.lower() == "wav":
156
+ segment = AudioSegment.from_wav(io.BytesIO(chunk))
157
+ elif format_type.lower() == "opus":
158
+ # For OPUS, we'll treat it as WAV since openai.fm returns WAV for OPUS requests
159
+ segment = AudioSegment.from_wav(io.BytesIO(chunk))
160
+ else:
161
+ # For other formats, try to auto-detect or default to WAV
162
+ try:
163
+ segment = AudioSegment.from_file(io.BytesIO(chunk))
164
+ except:
165
+ segment = AudioSegment.from_wav(io.BytesIO(chunk))
166
+
167
+ audio_segments.append(segment)
168
+
169
+ # Combine all segments
170
+ combined = audio_segments[0]
171
+ for segment in audio_segments[1:]:
172
+ combined += segment
173
+
174
+ # Export to bytes
175
+ output_buffer = io.BytesIO()
176
+ if format_type.lower() == "mp3":
177
+ combined.export(output_buffer, format="mp3")
178
+ elif format_type.lower() == "wav":
179
+ combined.export(output_buffer, format="wav")
180
+ else:
181
+ # Default to the original format or WAV
182
+ try:
183
+ combined.export(output_buffer, format=format_type.lower())
184
+ except:
185
+ combined.export(output_buffer, format="wav")
186
+
187
+ return output_buffer.getvalue()
188
+
189
+ except ImportError:
190
+ # Fallback: Simple concatenation for WAV files
191
+ logger.warning("pydub not available, using simple concatenation for WAV files")
192
+
193
+ if format_type.lower() == "wav":
194
+ return _simple_wav_concatenation(audio_chunks)
195
+ else:
196
+ # For non-WAV formats without pydub, just concatenate raw bytes
197
+ # This won't produce valid audio but is better than failing
198
+ logger.warning(f"Cannot properly combine {format_type} files without pydub, using raw concatenation")
199
+ return b''.join(audio_chunks)
200
+
201
+ except Exception as e:
202
+ logger.error(f"Error combining audio chunks: {e}")
203
+ # Fallback to simple concatenation
204
+ return b''.join(audio_chunks)
205
+
206
+ def _simple_wav_concatenation(wav_chunks: List[bytes]) -> bytes:
207
+ """
208
+ Simple WAV file concatenation without external dependencies.
209
+ This is a basic implementation that works for simple WAV files.
210
+ """
211
+ if not wav_chunks:
212
+ return b''
213
+
214
+ if len(wav_chunks) == 1:
215
+ return wav_chunks[0]
216
+
217
+ try:
218
+ # For WAV files, we can do a simple concatenation by:
219
+ # 1. Taking the header from the first file
220
+ # 2. Concatenating all the audio data
221
+ # 3. Updating the file size in the header
222
+
223
+ first_wav = wav_chunks[0]
224
+ if len(first_wav) < 44: # WAV header is at least 44 bytes
225
+ return b''.join(wav_chunks)
226
+
227
+ # Extract header from first file (first 44 bytes)
228
+ header = bytearray(first_wav[:44])
229
+
230
+ # Collect all audio data (skip headers for subsequent files)
231
+ audio_data = first_wav[44:] # Audio data from first file
232
+
233
+ for wav_chunk in wav_chunks[1:]:
234
+ if len(wav_chunk) > 44:
235
+ audio_data += wav_chunk[44:] # Skip header, append audio data
236
+
237
+ # Update file size in header (bytes 4-7)
238
+ total_size = len(header) + len(audio_data) - 8
239
+ header[4:8] = total_size.to_bytes(4, byteorder='little')
240
+
241
+ # Update data chunk size in header (bytes 40-43)
242
+ data_size = len(audio_data)
243
+ header[40:44] = data_size.to_bytes(4, byteorder='little')
244
+
245
+ return bytes(header) + audio_data
246
+
247
+ except Exception as e:
248
+ logger.error(f"Error in simple WAV concatenation: {e}")
249
+ # Ultimate fallback
250
+ return b''.join(wav_chunks)
251
+
252
+ def _is_safe_url(target: Optional[str]) -> bool:
253
+ """Validate that a target URL is safe for redirection.
254
+
255
+ Allows only relative URLs or absolute URLs that match this server's host
256
+ and http/https schemes. Prevents open redirects to external domains.
257
+ """
258
+ if not target:
259
+ return False
260
+
261
+ parsed = urlparse(target)
262
+ if parsed.scheme or parsed.netloc or target.startswith('//'):
263
+ return False
264
+ if not parsed.path.startswith('/'):
265
+ return False
266
+ joined = urljoin(request.host_url, target)
267
+ host = urlparse(request.host_url)
268
+ j = urlparse(joined)
269
+ return j.scheme in ("http", "https") and j.netloc == host.netloc
270
+
271
+ @app.route('/set-language/<lang_code>')
272
+ def set_language(lang_code):
273
+ """Set the user's language preference."""
274
+ if set_locale(lang_code):
275
+ # Redirect back only if the referrer is safe; otherwise go home
276
+ target = request.referrer
277
+ if _is_safe_url(target):
278
+ return redirect(target)
279
+ return redirect(url_for('index'))
280
+ else:
281
+ # Invalid language code, redirect to home
282
+ return redirect(url_for('index'))
283
+
284
+ @app.route('/')
285
+ def index():
286
+ """Serve the main web interface."""
287
+ return render_template('index.html')
288
+
289
+ @app.route('/playground')
290
+ def playground():
291
+ """Serve the interactive playground."""
292
+ return render_template('playground.html')
293
+
294
+ @app.route('/docs')
295
+ def docs():
296
+ """Serve the API documentation."""
297
+ return render_template('docs.html')
298
+
299
+ @app.route('/websocket-demo')
300
+ def websocket_demo():
301
+ """Serve the WebSocket streaming demo page."""
302
+ return render_template('websocket_demo.html')
303
+
304
+ @app.route('/api/voices', methods=['GET'])
305
+ def get_voices():
306
+ """Get list of available voices."""
307
+ try:
308
+ voices = [
309
+ {
310
+ "id": voice.value,
311
+ "name": voice.value.title(),
312
+ "description": f"{voice.value.title()} voice"
313
+ }
314
+ for voice in Voice
315
+ ]
316
+
317
+ return jsonify({
318
+ "voices": voices,
319
+ "count": len(voices)
320
+ })
321
+
322
+ except Exception as e:
323
+ logger.error(f"Error getting voices: {e}")
324
+ return jsonify({"error": "Failed to get voices"}), 500
325
+
326
+ @app.route('/api/formats', methods=['GET'])
327
+ def get_formats():
328
+ """Get list of supported audio formats."""
329
+ try:
330
+ formats = [
331
+ {
332
+ "id": "mp3",
333
+ "name": "MP3",
334
+ "mime_type": "audio/mpeg",
335
+ "description": "MP3 audio format - good quality, small file size",
336
+ "quality": "Good",
337
+ "file_size": "Small",
338
+ "use_case": "Web, mobile apps, general use"
339
+ },
340
+ {
341
+ "id": "opus",
342
+ "name": "OPUS",
343
+ "mime_type": "audio/opus",
344
+ "description": "OPUS audio format - excellent quality, small file size",
345
+ "quality": "Excellent",
346
+ "file_size": "Small",
347
+ "use_case": "Web streaming, VoIP"
348
+ },
349
+ {
350
+ "id": "aac",
351
+ "name": "AAC",
352
+ "mime_type": "audio/aac",
353
+ "description": "AAC audio format - good quality, medium file size",
354
+ "quality": "Good",
355
+ "file_size": "Medium",
356
+ "use_case": "Apple devices, streaming"
357
+ },
358
+ {
359
+ "id": "flac",
360
+ "name": "FLAC",
361
+ "mime_type": "audio/flac",
362
+ "description": "FLAC audio format - lossless quality, large file size",
363
+ "quality": "Lossless",
364
+ "file_size": "Large",
365
+ "use_case": "High-quality archival"
366
+ },
367
+ {
368
+ "id": "wav",
369
+ "name": "WAV",
370
+ "mime_type": "audio/wav",
371
+ "description": "WAV audio format - lossless quality, large file size",
372
+ "quality": "Lossless",
373
+ "file_size": "Large",
374
+ "use_case": "Professional audio"
375
+ },
376
+ {
377
+ "id": "pcm",
378
+ "name": "PCM",
379
+ "mime_type": "audio/pcm",
380
+ "description": "PCM audio format - raw audio data, large file size",
381
+ "quality": "Raw",
382
+ "file_size": "Large",
383
+ "use_case": "Audio processing"
384
+ }
385
+ ]
386
+
387
+ return jsonify({
388
+ "formats": formats,
389
+ "count": len(formats)
390
+ })
391
+
392
+ except Exception as e:
393
+ logger.error(f"Error getting formats: {e}")
394
+ return jsonify({"error": "Failed to get formats"}), 500
395
+
396
+ @app.route('/api/validate-text', methods=['POST'])
397
+ def validate_text():
398
+ """Validate text length and provide splitting suggestions."""
399
+ try:
400
+ data = request.get_json()
401
+ if not data:
402
+ return jsonify({"error": "No JSON data provided"}), 400
403
+
404
+ text = data.get('text', '').strip()
405
+ max_length = data.get('max_length', 4096)
406
+
407
+ if not text:
408
+ return jsonify({"error": "Text is required"}), 400
409
+
410
+ text_length = len(text)
411
+ is_valid = text_length <= max_length
412
+
413
+ result = {
414
+ "text_length": text_length,
415
+ "max_length": max_length,
416
+ "is_valid": is_valid,
417
+ "needs_splitting": not is_valid
418
+ }
419
+
420
+ if not is_valid:
421
+ # Provide splitting suggestions
422
+ chunks = split_text_by_length(text, max_length, preserve_words=True)
423
+ result.update({
424
+ "suggested_chunks": len(chunks),
425
+ "chunk_preview": [chunk[:100] + "..." if len(chunk) > 100 else chunk for chunk in chunks[:3]]
426
+ })
427
+
428
+ return jsonify(result)
429
+
430
+ except Exception as e:
431
+ logger.error(f"Text validation error: {e}")
432
+ return jsonify({"error": "Text validation failed"}), 500
433
+
434
+ @app.route('/api/generate', methods=['POST'])
435
+ @require_api_key
436
+ def generate_speech():
437
+ """Generate speech from text using the TTSFM package."""
438
+ try:
439
+ # Parse request data
440
+ data = request.get_json()
441
+ if not data:
442
+ return jsonify({"error": "No JSON data provided"}), 400
443
+
444
+ # Extract parameters
445
+ text = data.get('text', '').strip()
446
+ voice = data.get('voice', Voice.ALLOY.value)
447
+ response_format = data.get('format', AudioFormat.MP3.value)
448
+ instructions = data.get('instructions', '').strip() or None
449
+ max_length = data.get('max_length', 4096)
450
+ validate_length = data.get('validate_length', True)
451
+
452
+ # Validate required fields
453
+ if not text:
454
+ return jsonify({"error": "Text is required"}), 400
455
+
456
+ # Validate voice
457
+ try:
458
+ voice_enum = Voice(voice.lower())
459
+ except ValueError:
460
+ return jsonify({
461
+ "error": f"Invalid voice: {voice}. Must be one of: {[v.value for v in Voice]}"
462
+ }), 400
463
+
464
+ # Validate format
465
+ try:
466
+ format_enum = AudioFormat(response_format.lower())
467
+ except ValueError:
468
+ return jsonify({
469
+ "error": f"Invalid format: {response_format}. Must be one of: {[f.value for f in AudioFormat]}"
470
+ }), 400
471
+
472
+ logger.info(f"Generating speech: text='{text[:50]}...', voice={voice}, format={response_format}")
473
+
474
+ # Generate speech using the TTSFM package with validation
475
+ response = tts_client.generate_speech(
476
+ text=text,
477
+ voice=voice_enum,
478
+ response_format=format_enum,
479
+ instructions=instructions,
480
+ max_length=max_length,
481
+ validate_length=validate_length
482
+ )
483
+
484
+ # Return audio data
485
+ return Response(
486
+ response.audio_data,
487
+ mimetype=response.content_type,
488
+ headers={
489
+ 'Content-Disposition': f'attachment; filename="speech.{response.format.value}"',
490
+ 'Content-Length': str(response.size),
491
+ 'X-Audio-Format': response.format.value,
492
+ 'X-Audio-Size': str(response.size)
493
+ }
494
+ )
495
+
496
+ except ValidationException as e:
497
+ logger.warning(f"Validation error: {e}")
498
+ return jsonify({"error": "Invalid input parameters"}), 400
499
+
500
+ except APIException as e:
501
+ logger.error(f"API error: {e}")
502
+ return jsonify({
503
+ "error": "TTS service error",
504
+ "status_code": getattr(e, 'status_code', 500)
505
+ }), getattr(e, 'status_code', 500)
506
+
507
+ except NetworkException as e:
508
+ logger.error(f"Network error: {e}")
509
+ return jsonify({
510
+ "error": "TTS service is currently unavailable"
511
+ }), 503
512
+
513
+ except TTSException as e:
514
+ logger.error(f"TTS error: {e}")
515
+ return jsonify({"error": "Text-to-speech generation failed"}), 500
516
+
517
+ except Exception as e:
518
+ logger.error(f"Unexpected error: {e}")
519
+ return jsonify({"error": "Internal server error"}), 500
520
+
521
+
522
+
523
+ @app.route('/api/generate-combined', methods=['POST'])
524
+ @require_api_key
525
+ def generate_speech_combined():
526
+ """Generate speech from long text and return a single combined audio file."""
527
+ try:
528
+ data = request.get_json()
529
+ if not data:
530
+ return jsonify({"error": "No JSON data provided"}), 400
531
+
532
+ text = data.get('text', '').strip()
533
+ voice = data.get('voice', Voice.ALLOY.value)
534
+ response_format = data.get('format', AudioFormat.MP3.value)
535
+ instructions = data.get('instructions', '').strip() or None
536
+ max_length = data.get('max_length', 4096)
537
+ preserve_words = data.get('preserve_words', True)
538
+
539
+ if not text:
540
+ return jsonify({"error": "Text is required"}), 400
541
+
542
+ # Check if text needs splitting
543
+ if len(text) <= max_length:
544
+ # Text is short enough, use regular generation
545
+ try:
546
+ voice_enum = Voice(voice.lower())
547
+ format_enum = AudioFormat(response_format.lower())
548
+ except ValueError as e:
549
+ logger.warning(f"Invalid voice or format: {e}")
550
+ return jsonify({"error": "Invalid voice or format specified"}), 400
551
+
552
+ response = tts_client.generate_speech(
553
+ text=text,
554
+ voice=voice_enum,
555
+ response_format=format_enum,
556
+ instructions=instructions,
557
+ max_length=max_length,
558
+ validate_length=True
559
+ )
560
+
561
+ return Response(
562
+ response.audio_data,
563
+ mimetype=response.content_type,
564
+ headers={
565
+ 'Content-Disposition': f'attachment; filename="combined_speech.{response.format.value}"',
566
+ 'Content-Length': str(response.size),
567
+ 'X-Audio-Format': response.format.value,
568
+ 'X-Audio-Size': str(response.size),
569
+ 'X-Chunks-Combined': '1'
570
+ }
571
+ )
572
+
573
+ # Text is long, split and combine
574
+ try:
575
+ voice_enum = Voice(voice.lower())
576
+ format_enum = AudioFormat(response_format.lower())
577
+ except ValueError as e:
578
+ logger.warning(f"Invalid voice or format: {e}")
579
+ return jsonify({"error": "Invalid voice or format specified"}), 400
580
+
581
+ logger.info(f"Generating combined speech for long text: {len(text)} characters, splitting into chunks")
582
+
583
+ # Generate speech chunks
584
+ try:
585
+ responses = tts_client.generate_speech_long_text(
586
+ text=text,
587
+ voice=voice_enum,
588
+ response_format=format_enum,
589
+ instructions=instructions,
590
+ max_length=max_length,
591
+ preserve_words=preserve_words
592
+ )
593
+ except Exception as e:
594
+ logger.error(f"Long text generation failed: {e}")
595
+ return jsonify({"error": "Long text generation failed"}), 500
596
+
597
+ if not responses:
598
+ return jsonify({"error": "No valid text chunks found"}), 400
599
+
600
+ logger.info(f"Generated {len(responses)} chunks, combining into single audio file")
601
+
602
+ # Extract audio data from responses
603
+ audio_chunks = [response.audio_data for response in responses]
604
+
605
+ # Combine audio chunks
606
+ try:
607
+ combined_audio = combine_audio_chunks(audio_chunks, format_enum.value)
608
+ except Exception as e:
609
+ logger.error(f"Failed to combine audio chunks: {e}")
610
+ return jsonify({"error": "Failed to combine audio chunks"}), 500
611
+
612
+ if not combined_audio:
613
+ return jsonify({"error": "Failed to generate combined audio"}), 500
614
+
615
+ # Determine content type
616
+ content_type = responses[0].content_type # Use content type from first chunk
617
+
618
+ logger.info(f"Successfully combined {len(responses)} chunks into single audio file ({len(combined_audio)} bytes)")
619
+
620
+ return Response(
621
+ combined_audio,
622
+ mimetype=content_type,
623
+ headers={
624
+ 'Content-Disposition': f'attachment; filename="combined_speech.{format_enum.value}"',
625
+ 'Content-Length': str(len(combined_audio)),
626
+ 'X-Audio-Format': format_enum.value,
627
+ 'X-Audio-Size': str(len(combined_audio)),
628
+ 'X-Chunks-Combined': str(len(responses)),
629
+ 'X-Original-Text-Length': str(len(text))
630
+ }
631
+ )
632
+
633
+ except ValidationException as e:
634
+ logger.warning(f"Validation error: {e}")
635
+ return jsonify({"error": "Invalid input parameters"}), 400
636
+
637
+ except APIException as e:
638
+ logger.error(f"API error: {e}")
639
+ return jsonify({
640
+ "error": "TTS service error",
641
+ "status_code": getattr(e, 'status_code', 500)
642
+ }), getattr(e, 'status_code', 500)
643
+
644
+ except NetworkException as e:
645
+ logger.error(f"Network error: {e}")
646
+ return jsonify({
647
+ "error": "TTS service is currently unavailable"
648
+ }), 503
649
+
650
+ except TTSException as e:
651
+ logger.error(f"TTS error: {e}")
652
+ return jsonify({"error": "Text-to-speech generation failed"}), 500
653
+
654
+ except Exception as e:
655
+ logger.error(f"Combined generation error: {e}")
656
+ return jsonify({"error": "Combined audio generation failed"}), 500
657
+
658
+ @app.route('/api/status', methods=['GET'])
659
+ def get_status():
660
+ """Get service status."""
661
+ try:
662
+ # Try to make a simple request to check if the TTS service is available
663
+ test_response = tts_client.generate_speech(
664
+ text="test",
665
+ voice=Voice.ALLOY,
666
+ response_format=AudioFormat.MP3
667
+ )
668
+
669
+ return jsonify({
670
+ "status": "online",
671
+ "tts_service": "openai.fm (free)",
672
+ "package_version": "3.2.3",
673
+ "timestamp": datetime.now().isoformat()
674
+ })
675
+
676
+ except Exception as e:
677
+ logger.error(f"Status check failed: {e}")
678
+ return jsonify({
679
+ "status": "error",
680
+ "tts_service": "openai.fm (free)",
681
+ "error": "Service status check failed",
682
+ "timestamp": datetime.now().isoformat()
683
+ }), 503
684
+
685
+ @app.route('/api/health', methods=['GET'])
686
+ def health_check():
687
+ """Simple health check endpoint."""
688
+ return jsonify({
689
+ "status": "healthy",
690
+ "package_version": "3.2.3",
691
+ "timestamp": datetime.now().isoformat()
692
+ })
693
+
694
+ @app.route('/api/websocket/status', methods=['GET'])
695
+ def websocket_status():
696
+ """Get WebSocket server status and active connections."""
697
+ return jsonify({
698
+ "websocket_enabled": True,
699
+ "async_mode": async_mode,
700
+ "active_sessions": websocket_handler.get_active_sessions_count(),
701
+ "transport_options": ["websocket", "polling"],
702
+ "endpoint": f"ws{'s' if request.is_secure else ''}://{request.host}/socket.io/",
703
+ "timestamp": datetime.now().isoformat()
704
+ })
705
+
706
+ @app.route('/api/auth-status', methods=['GET'])
707
+ def auth_status():
708
+ """Get authentication status and requirements."""
709
+ return jsonify({
710
+ "api_key_required": REQUIRE_API_KEY,
711
+ "api_key_configured": bool(API_KEY) if REQUIRE_API_KEY else None,
712
+ "timestamp": datetime.now().isoformat()
713
+ })
714
+
715
+ @app.route('/api/translations/<lang_code>', methods=['GET'])
716
+ def get_translations(lang_code):
717
+ """Get translations for a specific language."""
718
+ try:
719
+ if hasattr(app, 'language_manager'):
720
+ translations = app.language_manager.translations.get(lang_code, {})
721
+ return jsonify(translations)
722
+ else:
723
+ return jsonify({}), 404
724
+ except Exception as e:
725
+ logger.error(f"Error getting translations for {lang_code}: {e}")
726
+ return jsonify({"error": "Failed to get translations"}), 500
727
+
728
+ # OpenAI-compatible API endpoints
729
+ @app.route('/v1/audio/speech', methods=['POST'])
730
+ @require_api_key
731
+ def openai_speech():
732
+ """OpenAI-compatible speech generation endpoint with auto-combine feature."""
733
+ try:
734
+ # Parse request data
735
+ data = request.get_json()
736
+ if not data:
737
+ return jsonify({
738
+ "error": {
739
+ "message": "No JSON data provided",
740
+ "type": "invalid_request_error",
741
+ "code": "missing_data"
742
+ }
743
+ }), 400
744
+
745
+ # Extract OpenAI-compatible parameters
746
+ model = data.get('model', 'gpt-4o-mini-tts') # Accept but ignore model
747
+ input_text = data.get('input', '').strip()
748
+ voice = data.get('voice', 'alloy')
749
+ response_format = data.get('response_format', 'mp3')
750
+ instructions = data.get('instructions', '').strip() or None
751
+ speed = data.get('speed', 1.0) # Accept but ignore speed
752
+
753
+ # TTSFM-specific parameters
754
+ auto_combine = data.get('auto_combine', True) # New parameter: auto-combine long text (default: True)
755
+ max_length = data.get('max_length', 4096) # Custom parameter for chunk size
756
+
757
+ # Validate required fields
758
+ if not input_text:
759
+ return jsonify({
760
+ "error": {
761
+ "message": "Input text is required",
762
+ "type": "invalid_request_error",
763
+ "code": "missing_input"
764
+ }
765
+ }), 400
766
+
767
+ # Validate voice
768
+ try:
769
+ voice_enum = Voice(voice.lower())
770
+ except ValueError:
771
+ return jsonify({
772
+ "error": {
773
+ "message": f"Invalid voice: {voice}. Must be one of: {[v.value for v in Voice]}",
774
+ "type": "invalid_request_error",
775
+ "code": "invalid_voice"
776
+ }
777
+ }), 400
778
+
779
+ # Validate format
780
+ try:
781
+ format_enum = AudioFormat(response_format.lower())
782
+ except ValueError:
783
+ return jsonify({
784
+ "error": {
785
+ "message": f"Invalid response_format: {response_format}. Must be one of: {[f.value for f in AudioFormat]}",
786
+ "type": "invalid_request_error",
787
+ "code": "invalid_format"
788
+ }
789
+ }), 400
790
+
791
+ logger.info(f"OpenAI API: Generating speech: text='{input_text[:50]}...', voice={voice}, format={response_format}, auto_combine={auto_combine}")
792
+
793
+ # Check if text exceeds limit and auto_combine is enabled
794
+ if len(input_text) > max_length and auto_combine:
795
+ # Long text with auto-combine enabled: split and combine
796
+ logger.info(f"Long text detected ({len(input_text)} chars), auto-combining enabled")
797
+
798
+ # Generate speech chunks
799
+ responses = tts_client.generate_speech_long_text(
800
+ text=input_text,
801
+ voice=voice_enum,
802
+ response_format=format_enum,
803
+ instructions=instructions,
804
+ max_length=max_length,
805
+ preserve_words=True
806
+ )
807
+
808
+ if not responses:
809
+ return jsonify({
810
+ "error": {
811
+ "message": "No valid text chunks found",
812
+ "type": "processing_error",
813
+ "code": "no_chunks"
814
+ }
815
+ }), 400
816
+
817
+ # Extract audio data and combine
818
+ audio_chunks = [response.audio_data for response in responses]
819
+ combined_audio = combine_audio_chunks(audio_chunks, format_enum.value)
820
+
821
+ if not combined_audio:
822
+ return jsonify({
823
+ "error": {
824
+ "message": "Failed to combine audio chunks",
825
+ "type": "processing_error",
826
+ "code": "combine_failed"
827
+ }
828
+ }), 500
829
+
830
+ content_type = responses[0].content_type
831
+
832
+ logger.info(f"Successfully combined {len(responses)} chunks into single audio file")
833
+
834
+ return Response(
835
+ combined_audio,
836
+ mimetype=content_type,
837
+ headers={
838
+ 'Content-Type': content_type,
839
+ 'Content-Length': str(len(combined_audio)),
840
+ 'X-Audio-Format': format_enum.value,
841
+ 'X-Audio-Size': str(len(combined_audio)),
842
+ 'X-Chunks-Combined': str(len(responses)),
843
+ 'X-Original-Text-Length': str(len(input_text)),
844
+ 'X-Auto-Combine': 'true',
845
+ 'X-Powered-By': 'TTSFM-OpenAI-Compatible'
846
+ }
847
+ )
848
+
849
+ else:
850
+ # Short text or auto_combine disabled: use regular generation
851
+ if len(input_text) > max_length and not auto_combine:
852
+ # Text is too long but auto_combine is disabled - return error
853
+ return jsonify({
854
+ "error": {
855
+ "message": f"Input text is too long ({len(input_text)} characters). Maximum allowed length is {max_length} characters. Enable auto_combine parameter to automatically split and combine long text.",
856
+ "type": "invalid_request_error",
857
+ "code": "text_too_long"
858
+ }
859
+ }), 400
860
+
861
+ # Generate speech using the TTSFM package
862
+ response = tts_client.generate_speech(
863
+ text=input_text,
864
+ voice=voice_enum,
865
+ response_format=format_enum,
866
+ instructions=instructions,
867
+ max_length=max_length,
868
+ validate_length=True
869
+ )
870
+
871
+ # Return audio data in OpenAI format
872
+ return Response(
873
+ response.audio_data,
874
+ mimetype=response.content_type,
875
+ headers={
876
+ 'Content-Type': response.content_type,
877
+ 'Content-Length': str(response.size),
878
+ 'X-Audio-Format': response.format.value,
879
+ 'X-Audio-Size': str(response.size),
880
+ 'X-Chunks-Combined': '1',
881
+ 'X-Auto-Combine': str(auto_combine).lower(),
882
+ 'X-Powered-By': 'TTSFM-OpenAI-Compatible'
883
+ }
884
+ )
885
+
886
+ except ValidationException as e:
887
+ logger.warning(f"OpenAI API validation error: {e}")
888
+ return jsonify({
889
+ "error": {
890
+ "message": "Invalid request parameters",
891
+ "type": "invalid_request_error",
892
+ "code": "validation_error"
893
+ }
894
+ }), 400
895
+
896
+ except APIException as e:
897
+ logger.error(f"OpenAI API error: {e}")
898
+ return jsonify({
899
+ "error": {
900
+ "message": "Text-to-speech generation failed",
901
+ "type": "api_error",
902
+ "code": "tts_error"
903
+ }
904
+ }), getattr(e, 'status_code', 500)
905
+
906
+ except NetworkException as e:
907
+ logger.error(f"OpenAI API network error: {e}")
908
+ return jsonify({
909
+ "error": {
910
+ "message": "TTS service is currently unavailable",
911
+ "type": "service_unavailable_error",
912
+ "code": "service_unavailable"
913
+ }
914
+ }), 503
915
+
916
+ except Exception as e:
917
+ logger.error(f"OpenAI API unexpected error: {e}")
918
+ return jsonify({
919
+ "error": {
920
+ "message": "An unexpected error occurred",
921
+ "type": "internal_error",
922
+ "code": "internal_error"
923
+ }
924
+ }), 500
925
+
926
+
927
+
928
+ @app.route('/v1/models', methods=['GET'])
929
+ def openai_models():
930
+ """OpenAI-compatible models endpoint."""
931
+ return jsonify({
932
+ "object": "list",
933
+ "data": [
934
+ {
935
+ "id": "gpt-4o-mini-tts",
936
+ "object": "model",
937
+ "created": 1699564800,
938
+ "owned_by": "ttsfm",
939
+ "permission": [],
940
+ "root": "gpt-4o-mini-tts",
941
+ "parent": None
942
+ }
943
+ ]
944
+ })
945
+
946
+ @app.errorhandler(404)
947
+ def not_found(error):
948
+ """Handle 404 errors."""
949
+ return jsonify({"error": "Endpoint not found"}), 404
950
+
951
+ @app.errorhandler(405)
952
+ def method_not_allowed(error):
953
+ """Handle 405 errors."""
954
+ return jsonify({"error": "Method not allowed"}), 405
955
+
956
+ @app.errorhandler(500)
957
+ def internal_error(error):
958
+ """Handle 500 errors."""
959
+ logger.error(f"Internal server error: {error}")
960
+ return jsonify({"error": "Internal server error"}), 500
961
+
962
+ if __name__ == '__main__':
963
+ logger.info(f"Starting TTSFM web application on {HOST}:{PORT}")
964
+ logger.info("Using openai.fm free TTS service")
965
+ logger.info(f"Debug mode: {DEBUG}")
966
+
967
+ # Log API key protection status
968
+ if REQUIRE_API_KEY:
969
+ if API_KEY:
970
+ logger.info("🔒 API key protection is ENABLED")
971
+ logger.info("All TTS generation requests require a valid API key")
972
+ else:
973
+ logger.warning("⚠️ API key protection is enabled but TTSFM_API_KEY is not set!")
974
+ logger.warning("Please set the TTSFM_API_KEY environment variable")
975
+ else:
976
+ logger.info("🔓 API key protection is DISABLED - all requests are allowed")
977
+ logger.info("Set REQUIRE_API_KEY=true to enable API key protection")
978
+
979
+ try:
980
+ logger.info(f"Starting with {async_mode} async mode")
981
+ socketio.run(app, host=HOST, port=PORT, debug=DEBUG)
982
+ except KeyboardInterrupt:
983
+ logger.info("Application stopped by user")
984
+ except Exception as e:
985
+ logger.error(f"Failed to start application: {e}")
986
+ finally:
987
+ # Clean up TTS client
988
+ tts_client.close()
ttsfm-web/i18n.py ADDED
@@ -0,0 +1,238 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Internationalization (i18n) support for TTSFM Web Application
3
+
4
+ This module provides multi-language support for the Flask web application,
5
+ including language detection, translation management, and template functions.
6
+ """
7
+
8
+ import json
9
+ import os
10
+ from typing import Dict, Any, Optional
11
+ from flask import request, session, current_app
12
+
13
+
14
+ class LanguageManager:
15
+ """Manages language detection, translation loading, and text translation."""
16
+
17
+ def __init__(self, app=None, translations_dir: str = "translations"):
18
+ """
19
+ Initialize the LanguageManager.
20
+
21
+ Args:
22
+ app: Flask application instance
23
+ translations_dir: Directory containing translation files
24
+ """
25
+ self.translations_dir = translations_dir
26
+ self.translations: Dict[str, Dict[str, Any]] = {}
27
+ self.supported_languages = ['en', 'zh']
28
+ self.default_language = 'en'
29
+
30
+ if app is not None:
31
+ self.init_app(app)
32
+
33
+ def init_app(self, app):
34
+ """Initialize the Flask application with i18n support."""
35
+ app.config.setdefault('LANGUAGES', self.supported_languages)
36
+ app.config.setdefault('DEFAULT_LANGUAGE', self.default_language)
37
+
38
+ # Load translations
39
+ self.load_translations()
40
+
41
+ # Register template functions
42
+ app.jinja_env.globals['_'] = self.translate
43
+ app.jinja_env.globals['get_locale'] = self.get_locale
44
+ app.jinja_env.globals['get_supported_languages'] = self.get_supported_languages
45
+
46
+ # Store reference to this instance
47
+ app.language_manager = self
48
+
49
+ def load_translations(self):
50
+ """Load all translation files from the translations directory."""
51
+ translations_path = os.path.join(
52
+ os.path.dirname(__file__),
53
+ self.translations_dir
54
+ )
55
+
56
+ if not os.path.exists(translations_path):
57
+ print(f"Warning: Translations directory not found: {translations_path}")
58
+ return
59
+
60
+ for lang_code in self.supported_languages:
61
+ file_path = os.path.join(translations_path, f"{lang_code}.json")
62
+
63
+ if os.path.exists(file_path):
64
+ try:
65
+ with open(file_path, 'r', encoding='utf-8') as f:
66
+ self.translations[lang_code] = json.load(f)
67
+ print(f"Info: Loaded translations for language: {lang_code}")
68
+ except Exception as e:
69
+ print(f"Error: Failed to load translations for {lang_code}: {e}")
70
+ else:
71
+ print(f"Warning: Translation file not found: {file_path}")
72
+
73
+ def get_locale(self) -> str:
74
+ """
75
+ Get the current locale based on user preference, session, or browser settings.
76
+
77
+ Returns:
78
+ Language code (e.g., 'en', 'zh')
79
+ """
80
+ # 1. Check URL parameter (for language switching)
81
+ if 'lang' in request.args:
82
+ lang = request.args.get('lang')
83
+ if lang in self.supported_languages:
84
+ session['language'] = lang
85
+ return lang
86
+
87
+ # 2. Check session (user's previous choice)
88
+ if 'language' in session:
89
+ lang = session['language']
90
+ if lang in self.supported_languages:
91
+ return lang
92
+
93
+ # 3. Check browser's Accept-Language header
94
+ if request.headers.get('Accept-Language'):
95
+ browser_langs = request.headers.get('Accept-Language').split(',')
96
+ for browser_lang in browser_langs:
97
+ # Extract language code (e.g., 'zh-CN' -> 'zh')
98
+ lang_code = browser_lang.split(';')[0].split('-')[0].strip().lower()
99
+ if lang_code in self.supported_languages:
100
+ session['language'] = lang_code
101
+ return lang_code
102
+
103
+ # 4. Fall back to default language
104
+ return self.default_language
105
+
106
+ def set_locale(self, lang_code: str) -> bool:
107
+ """
108
+ Set the current locale.
109
+
110
+ Args:
111
+ lang_code: Language code to set
112
+
113
+ Returns:
114
+ True if successful, False if language not supported
115
+ """
116
+ if lang_code in self.supported_languages:
117
+ session['language'] = lang_code
118
+ return True
119
+ return False
120
+
121
+ def translate(self, key: str, **kwargs) -> str:
122
+ """
123
+ Translate a text key to the current locale.
124
+
125
+ Args:
126
+ key: Translation key in dot notation (e.g., 'nav.home')
127
+ **kwargs: Variables for string formatting
128
+
129
+ Returns:
130
+ Translated text or the key if translation not found
131
+ """
132
+ locale = self.get_locale()
133
+
134
+ # Get translation for current locale
135
+ translation = self._get_nested_value(
136
+ self.translations.get(locale, {}),
137
+ key
138
+ )
139
+
140
+ # Fall back to default language if not found
141
+ if translation is None and locale != self.default_language:
142
+ translation = self._get_nested_value(
143
+ self.translations.get(self.default_language, {}),
144
+ key
145
+ )
146
+
147
+ # Fall back to key if still not found
148
+ if translation is None:
149
+ translation = key
150
+
151
+ # Format with variables if provided
152
+ if kwargs and isinstance(translation, str):
153
+ try:
154
+ translation = translation.format(**kwargs)
155
+ except (KeyError, ValueError):
156
+ pass # Ignore formatting errors
157
+
158
+ return translation
159
+
160
+ def _get_nested_value(self, data: Dict[str, Any], key: str) -> Optional[str]:
161
+ """
162
+ Get a nested value from a dictionary using dot notation.
163
+
164
+ Args:
165
+ data: Dictionary to search in
166
+ key: Dot-separated key (e.g., 'nav.home')
167
+
168
+ Returns:
169
+ Value if found, None otherwise
170
+ """
171
+ keys = key.split('.')
172
+ current = data
173
+
174
+ for k in keys:
175
+ if isinstance(current, dict) and k in current:
176
+ current = current[k]
177
+ else:
178
+ return None
179
+
180
+ return current if isinstance(current, str) else None
181
+
182
+ def get_supported_languages(self) -> Dict[str, str]:
183
+ """
184
+ Get a dictionary of supported languages with their display names.
185
+
186
+ Returns:
187
+ Dictionary mapping language codes to display names
188
+ """
189
+ return {
190
+ 'en': 'English',
191
+ 'zh': '中文'
192
+ }
193
+
194
+ def get_language_info(self, lang_code: str) -> Dict[str, str]:
195
+ """
196
+ Get information about a specific language.
197
+
198
+ Args:
199
+ lang_code: Language code
200
+
201
+ Returns:
202
+ Dictionary with language information
203
+ """
204
+ language_names = {
205
+ 'en': {'name': 'English', 'native': 'English'},
206
+ 'zh': {'name': 'Chinese', 'native': '中文'}
207
+ }
208
+
209
+ return language_names.get(lang_code, {
210
+ 'name': lang_code.upper(),
211
+ 'native': lang_code.upper()
212
+ })
213
+
214
+
215
+ # Global instance
216
+ language_manager = LanguageManager()
217
+
218
+
219
+ def init_i18n(app):
220
+ """Initialize i18n support for the Flask application."""
221
+ language_manager.init_app(app)
222
+ return language_manager
223
+
224
+
225
+ # Template helper functions
226
+ def _(key: str, **kwargs) -> str:
227
+ """Shorthand translation function for use in templates and code."""
228
+ return language_manager.translate(key, **kwargs)
229
+
230
+
231
+ def get_locale() -> str:
232
+ """Get the current locale."""
233
+ return language_manager.get_locale()
234
+
235
+
236
+ def set_locale(lang_code: str) -> bool:
237
+ """Set the current locale."""
238
+ return language_manager.set_locale(lang_code)
ttsfm-web/requirements.txt CHANGED
@@ -1,9 +1,16 @@
1
- # Web application dependencies
2
- flask>=2.0.0
3
- flask-cors>=3.0.10
4
- waitress>=3.0.0
5
- python-dotenv>=1.0.0
6
-
7
- # TTSFM package (install from local directory or PyPI)
8
- # For local development: pip install -e ../
9
- # For Docker/production: installed via pyproject.toml[web] dependencies
 
 
 
 
 
 
 
 
1
+ # Web application dependencies
2
+ flask>=2.0.0
3
+ flask-cors>=3.0.10
4
+ flask-socketio>=5.3.0
5
+ python-socketio>=5.10.0
6
+ eventlet>=0.33.3
7
+ waitress>=3.0.0
8
+ python-dotenv>=1.0.0
9
+
10
+ # Audio processing (optional, for combining audio files)
11
+ # If not installed, will fall back to simple concatenation for WAV files
12
+ pydub>=0.25.0
13
+
14
+ # TTSFM package (install from local directory or PyPI)
15
+ # For local development: pip install -e ../
16
+ # For Docker/production: installed via pyproject.toml[web] dependencies
ttsfm-web/run.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ """
3
+ Run script for TTSFM web application with proper eventlet initialization
4
+ """
5
+
6
+ # MUST be the first imports for eventlet to work properly
7
+ import eventlet
8
+ eventlet.monkey_patch()
9
+
10
+ # Now import the app
11
+ from app import app, socketio, HOST, PORT, DEBUG
12
+
13
+ if __name__ == '__main__':
14
+ print(f"Starting TTSFM with WebSocket support on {HOST}:{PORT}")
15
+ socketio.run(app, host=HOST, port=PORT, debug=DEBUG, allow_unsafe_werkzeug=True)
ttsfm-web/static/css/style.css CHANGED
@@ -1,1390 +1,1399 @@
1
- /* TTSFM Web Application Custom Styles */
2
-
3
- :root {
4
- /* Clean Color Palette */
5
- --primary-color: #2563eb;
6
- --primary-dark: #1d4ed8;
7
- --primary-light: #3b82f6;
8
- --secondary-color: #64748b;
9
- --secondary-dark: #475569;
10
- --accent-color: #10b981;
11
- --accent-dark: #059669;
12
-
13
- /* Status Colors */
14
- --success-color: #10b981;
15
- --warning-color: #f59e0b;
16
- --danger-color: #ef4444;
17
- --info-color: #3b82f6;
18
-
19
- /* Clean Neutral Colors */
20
- --light-color: #ffffff;
21
- --light-gray: #f8fafc;
22
- --medium-gray: #64748b;
23
- --dark-color: #1e293b;
24
- --text-color: #374151;
25
- --text-muted: #6b7280;
26
-
27
- /* Design System */
28
- --border-radius: 0.75rem;
29
- --border-radius-sm: 0.5rem;
30
- --border-radius-lg: 1rem;
31
- --box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06);
32
- --box-shadow-lg: 0 20px 25px -5px rgba(0, 0, 0, 0.1), 0 10px 10px -5px rgba(0, 0, 0, 0.04);
33
- --box-shadow-xl: 0 25px 50px -12px rgba(0, 0, 0, 0.25);
34
- --transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1);
35
- --transition-fast: all 0.15s cubic-bezier(0.4, 0, 0.2, 1);
36
-
37
- /* Gradients */
38
- --gradient-primary: linear-gradient(135deg, var(--primary-color) 0%, var(--primary-light) 100%);
39
- --gradient-secondary: linear-gradient(135deg, var(--secondary-color) 0%, var(--secondary-dark) 100%);
40
- --gradient-accent: linear-gradient(135deg, var(--accent-color) 0%, var(--accent-dark) 100%);
41
- --gradient-hero: linear-gradient(135deg, var(--primary-color) 0%, var(--secondary-color) 50%, var(--accent-color) 100%);
42
- }
43
-
44
- /* Global Styles */
45
- body {
46
- font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
47
- line-height: 1.6;
48
- color: var(--text-color);
49
- background-color: #ffffff;
50
- font-weight: 400;
51
- -webkit-font-smoothing: antialiased;
52
- -moz-osx-font-smoothing: grayscale;
53
- }
54
-
55
- /* Enhanced Typography */
56
- h1, h2, h3, h4, h5, h6 {
57
- font-weight: 700;
58
- line-height: 1.3;
59
- color: var(--dark-color);
60
- letter-spacing: -0.025em;
61
- }
62
-
63
- .display-1, .display-2, .display-3, .display-4 {
64
- font-weight: 800;
65
- letter-spacing: -0.05em;
66
- }
67
-
68
- .lead {
69
- font-size: 1.125rem;
70
- font-weight: 400;
71
- color: var(--text-muted);
72
- line-height: 1.8;
73
- }
74
-
75
- /* Simplified Button Styles */
76
- .btn {
77
- font-weight: 600;
78
- border-radius: var(--border-radius-sm);
79
- transition: all 0.2s ease;
80
- letter-spacing: 0.025em;
81
- }
82
-
83
- .btn-primary {
84
- background-color: var(--primary-color);
85
- border-color: var(--primary-color);
86
- color: white;
87
- }
88
-
89
- .btn-primary:hover {
90
- background-color: var(--primary-dark);
91
- border-color: var(--primary-dark);
92
- color: white;
93
- }
94
-
95
- .btn-outline-primary {
96
- border: 2px solid var(--primary-color);
97
- color: var(--primary-color);
98
- background: transparent;
99
- }
100
-
101
- .btn-outline-primary:hover {
102
- background: var(--primary-color);
103
- border-color: var(--primary-color);
104
- color: white;
105
- }
106
-
107
- .btn-lg {
108
- padding: 0.875rem 2rem;
109
- font-size: 1.125rem;
110
- border-radius: var(--border-radius);
111
- }
112
-
113
- .btn-sm {
114
- padding: 0.5rem 1rem;
115
- font-size: 0.875rem;
116
- border-radius: var(--border-radius-sm);
117
- }
118
-
119
- /* Clean Card Styles */
120
- .card {
121
- border: 1px solid #e5e7eb;
122
- box-shadow: 0 1px 2px rgba(0, 0, 0, 0.05);
123
- transition: all 0.2s ease;
124
- border-radius: 12px;
125
- background: white;
126
- }
127
-
128
- .card:hover {
129
- box-shadow: 0 4px 6px rgba(0, 0, 0, 0.07);
130
- border-color: #d1d5db;
131
- }
132
-
133
- .card-body {
134
- padding: 2rem;
135
- }
136
-
137
- /* Clean Hero Section */
138
- .hero-section {
139
- background: linear-gradient(135deg, #f8fafc 0%, #ffffff 100%);
140
- color: var(--text-color);
141
- padding: 6rem 0;
142
- min-height: 80vh;
143
- display: flex;
144
- align-items: center;
145
- border-bottom: 1px solid #e5e7eb;
146
- }
147
-
148
- .min-vh-75 {
149
- min-height: 75vh;
150
- }
151
-
152
- /* Status Indicators */
153
- .status-indicator {
154
- display: inline-block;
155
- width: 8px;
156
- height: 8px;
157
- border-radius: 50%;
158
- background-color: #6c757d;
159
- }
160
-
161
- .status-online {
162
- background-color: #28a745;
163
- }
164
-
165
- .status-offline {
166
- background-color: #dc3545;
167
- }
168
-
169
- /* Footer */
170
- .footer {
171
- margin-top: auto;
172
- }
173
-
174
- /* Clean Code Blocks */
175
- pre {
176
- background-color: #f8fafc !important;
177
- border: 1px solid #e5e7eb;
178
- border-radius: 8px;
179
- font-size: 0.875rem;
180
- }
181
-
182
- code {
183
- color: #374151;
184
- font-family: 'SF Mono', Monaco, 'Cascadia Code', 'Roboto Mono', Consolas, 'Courier New', monospace;
185
- }
186
-
187
- /* Enhanced Form Styles */
188
- .form-control, .form-select {
189
- border-radius: var(--border-radius-sm);
190
- border: 2px solid #e2e8f0;
191
- transition: var(--transition);
192
- padding: 0.875rem 1rem;
193
- font-size: 1rem;
194
- background-color: #ffffff;
195
- color: var(--text-color);
196
- }
197
-
198
- .form-control:focus, .form-select:focus {
199
- border-color: var(--primary-color);
200
- box-shadow: 0 0 0 3px rgba(99, 102, 241, 0.1);
201
- outline: none;
202
- background-color: #ffffff;
203
- }
204
-
205
- .form-control:hover, .form-select:hover {
206
- border-color: #cbd5e1;
207
- }
208
-
209
- .form-label {
210
- font-weight: 600;
211
- color: var(--dark-color);
212
- margin-bottom: 0.75rem;
213
- font-size: 0.95rem;
214
- }
215
-
216
- .form-text {
217
- color: var(--text-muted);
218
- font-size: 0.875rem;
219
- margin-top: 0.5rem;
220
- }
221
-
222
- .form-check-input {
223
- border-radius: var(--border-radius-sm);
224
- border: 2px solid #e2e8f0;
225
- width: 1.25rem;
226
- height: 1.25rem;
227
- }
228
-
229
- .form-check-input:checked {
230
- background-color: var(--primary-color);
231
- border-color: var(--primary-color);
232
- }
233
-
234
- .form-check-input:focus {
235
- box-shadow: 0 0 0 3px rgba(99, 102, 241, 0.1);
236
- }
237
-
238
- .form-check-label {
239
- color: var(--text-color);
240
- font-weight: 500;
241
- margin-left: 0.5rem;
242
- }
243
-
244
- /* Enhanced Status Indicators */
245
- .status-indicator {
246
- display: inline-block;
247
- width: 12px;
248
- height: 12px;
249
- border-radius: 50%;
250
- margin-right: 8px;
251
- position: relative;
252
- animation: statusPulse 2s infinite;
253
- }
254
-
255
- .status-indicator::before {
256
- content: '';
257
- position: absolute;
258
- top: -2px;
259
- left: -2px;
260
- right: -2px;
261
- bottom: -2px;
262
- border-radius: 50%;
263
- opacity: 0.3;
264
- animation: statusRing 2s infinite;
265
- }
266
-
267
- .status-online {
268
- background-color: var(--success-color);
269
- box-shadow: 0 0 8px rgba(16, 185, 129, 0.4);
270
- }
271
-
272
- .status-online::before {
273
- background-color: var(--success-color);
274
- }
275
-
276
- .status-offline {
277
- background-color: var(--danger-color);
278
- box-shadow: 0 0 8px rgba(239, 68, 68, 0.4);
279
- }
280
-
281
- .status-offline::before {
282
- background-color: var(--danger-color);
283
- }
284
-
285
- @keyframes statusPulse {
286
- 0%, 100% { opacity: 1; }
287
- 50% { opacity: 0.7; }
288
- }
289
-
290
- @keyframes statusRing {
291
- 0% { transform: scale(0.8); opacity: 0.8; }
292
- 100% { transform: scale(1.4); opacity: 0; }
293
- }
294
-
295
- /* Enhanced Audio Player */
296
- .audio-player {
297
- width: 100%;
298
- margin-top: 1rem;
299
- border-radius: var(--border-radius);
300
- box-shadow: var(--box-shadow);
301
- background: var(--light-color);
302
- padding: 0.5rem;
303
- }
304
-
305
- .audio-player::-webkit-media-controls-panel {
306
- background-color: var(--light-color);
307
- border-radius: var(--border-radius-sm);
308
- }
309
-
310
- /* Enhanced Sections */
311
- .features-section {
312
- padding: 6rem 0;
313
- background: linear-gradient(180deg, #ffffff 0%, var(--light-color) 100%);
314
- }
315
-
316
- .stats-section {
317
- padding: 4rem 0;
318
- background: var(--gradient-primary);
319
- color: white;
320
- position: relative;
321
- overflow: hidden;
322
- }
323
-
324
- .stats-section::before {
325
- content: '';
326
- position: absolute;
327
- top: 0;
328
- left: 0;
329
- right: 0;
330
- bottom: 0;
331
- background: url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100"><defs><pattern id="stats-pattern" width="40" height="40" patternUnits="userSpaceOnUse"><circle cx="20" cy="20" r="1" fill="white" opacity="0.1"/></pattern></defs><rect width="100" height="100" fill="url(%23stats-pattern)"/></svg>');
332
- }
333
-
334
- .stat-card {
335
- text-align: center;
336
- padding: 2rem 1rem;
337
- background: rgba(255, 255, 255, 0.1);
338
- border-radius: var(--border-radius);
339
- backdrop-filter: blur(10px);
340
- border: 1px solid rgba(255, 255, 255, 0.2);
341
- transition: var(--transition);
342
- }
343
-
344
- .stat-card:hover {
345
- transform: translateY(-5px);
346
- background: rgba(255, 255, 255, 0.15);
347
- }
348
-
349
- .stat-icon {
350
- font-size: 2.5rem;
351
- margin-bottom: 1rem;
352
- color: rgba(255, 255, 255, 0.9);
353
- }
354
-
355
- .stat-number {
356
- font-size: 3rem;
357
- font-weight: 800;
358
- color: white;
359
- margin-bottom: 0.5rem;
360
- display: block;
361
- }
362
-
363
- .stat-label {
364
- color: rgba(255, 255, 255, 0.9);
365
- font-weight: 500;
366
- font-size: 0.95rem;
367
- }
368
-
369
- .quick-start-section {
370
- padding: 6rem 0;
371
- }
372
-
373
- .use-cases-section {
374
- padding: 6rem 0;
375
- background: var(--light-color);
376
- }
377
-
378
- .tech-specs-section {
379
- padding: 6rem 0;
380
- }
381
-
382
- .faq-section {
383
- padding: 6rem 0;
384
- background: var(--light-color);
385
- }
386
-
387
- .final-cta-section {
388
- padding: 6rem 0;
389
- background: var(--gradient-hero);
390
- color: white;
391
- position: relative;
392
- overflow: hidden;
393
- }
394
-
395
- .cta-background-animation {
396
- position: absolute;
397
- top: 0;
398
- left: 0;
399
- right: 0;
400
- bottom: 0;
401
- background: linear-gradient(45deg, transparent 30%, rgba(255,255,255,0.05) 50%, transparent 70%);
402
- animation: shimmer 4s ease-in-out infinite;
403
- }
404
-
405
- .section-badge {
406
- display: inline-block;
407
- background: var(--gradient-primary);
408
- color: white;
409
- padding: 0.5rem 1.5rem;
410
- border-radius: 2rem;
411
- font-size: 0.875rem;
412
- font-weight: 600;
413
- margin-bottom: 1.5rem;
414
- box-shadow: 0 4px 14px 0 rgba(99, 102, 241, 0.3);
415
- }
416
-
417
- /* Enhanced Loading States */
418
- .loading-spinner {
419
- display: none;
420
- }
421
-
422
- .loading .loading-spinner {
423
- display: inline-block;
424
- }
425
-
426
- .loading .btn-text {
427
- display: none;
428
- }
429
-
430
- .loading {
431
- position: relative;
432
- overflow: hidden;
433
- }
434
-
435
- .loading::after {
436
- content: '';
437
- position: absolute;
438
- top: 0;
439
- left: -100%;
440
- width: 100%;
441
- height: 100%;
442
- background: linear-gradient(90deg, transparent, rgba(255,255,255,0.3), transparent);
443
- animation: loading-shimmer 1.5s infinite;
444
- }
445
-
446
- @keyframes loading-shimmer {
447
- 0% { left: -100%; }
448
- 100% { left: 100%; }
449
- }
450
-
451
- /* Enhanced Code Blocks */
452
- .code-card {
453
- background: white;
454
- border-radius: var(--border-radius);
455
- box-shadow: var(--box-shadow);
456
- overflow: hidden;
457
- border: 1px solid #e2e8f0;
458
- transition: var(--transition);
459
- }
460
-
461
- .code-card:hover {
462
- transform: translateY(-2px);
463
- box-shadow: var(--box-shadow-lg);
464
- }
465
-
466
- .code-header {
467
- background: var(--light-gray);
468
- padding: 1rem 1.5rem;
469
- border-bottom: 1px solid #e2e8f0;
470
- display: flex;
471
- justify-content: between;
472
- align-items: center;
473
- }
474
-
475
- .code-header h4 {
476
- margin: 0;
477
- font-size: 1.1rem;
478
- color: var(--dark-color);
479
- }
480
-
481
- .code-content {
482
- padding: 1.5rem;
483
- background: #f8fafc;
484
- margin: 0;
485
- overflow-x: auto;
486
- }
487
-
488
- .code-content code {
489
- font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace;
490
- font-size: 0.9rem;
491
- line-height: 1.6;
492
- color: var(--text-color);
493
- }
494
-
495
- .code-footer {
496
- padding: 1rem 1.5rem;
497
- background: white;
498
- border-top: 1px solid #e2e8f0;
499
- }
500
-
501
- .copy-btn {
502
- font-size: 0.8rem;
503
- padding: 0.25rem 0.75rem;
504
- }
505
-
506
- /* Enhanced Use Case Cards */
507
- .use-case-card {
508
- background: white;
509
- border-radius: var(--border-radius);
510
- padding: 2rem;
511
- box-shadow: var(--box-shadow);
512
- transition: var(--transition);
513
- border: 1px solid #e2e8f0;
514
- height: 100%;
515
- text-align: center;
516
- }
517
-
518
- .use-case-card:hover {
519
- transform: translateY(-4px);
520
- box-shadow: var(--box-shadow-lg);
521
- border-color: rgba(99, 102, 241, 0.2);
522
- }
523
-
524
- .use-case-icon {
525
- width: 4rem;
526
- height: 4rem;
527
- background: var(--gradient-primary);
528
- border-radius: 50%;
529
- display: flex;
530
- align-items: center;
531
- justify-content: center;
532
- font-size: 1.5rem;
533
- color: white;
534
- margin: 0 auto 1.5rem;
535
- box-shadow: 0 4px 14px 0 rgba(99, 102, 241, 0.3);
536
- }
537
-
538
- .use-case-title {
539
- font-size: 1.25rem;
540
- font-weight: 700;
541
- color: var(--dark-color);
542
- margin-bottom: 1rem;
543
- }
544
-
545
- .use-case-description {
546
- color: var(--text-muted);
547
- margin-bottom: 1.5rem;
548
- line-height: 1.7;
549
- }
550
-
551
- .use-case-examples {
552
- display: flex;
553
- flex-wrap: wrap;
554
- gap: 0.5rem;
555
- justify-content: center;
556
- }
557
-
558
- .use-case-examples .badge {
559
- font-size: 0.75rem;
560
- padding: 0.4rem 0.8rem;
561
- border-radius: 1rem;
562
- background: var(--light-gray);
563
- color: var(--text-color);
564
- border: 1px solid #e2e8f0;
565
- }
566
-
567
- /* Enhanced Tech Spec Cards */
568
- .tech-spec-card {
569
- background: white;
570
- border-radius: var(--border-radius);
571
- padding: 2rem;
572
- box-shadow: var(--box-shadow);
573
- transition: var(--transition);
574
- border: 1px solid #e2e8f0;
575
- height: 100%;
576
- }
577
-
578
- .tech-spec-card:hover {
579
- transform: translateY(-2px);
580
- box-shadow: var(--box-shadow-lg);
581
- }
582
-
583
- .tech-spec-icon {
584
- width: 3rem;
585
- height: 3rem;
586
- background: var(--gradient-accent);
587
- border-radius: var(--border-radius-sm);
588
- display: flex;
589
- align-items: center;
590
- justify-content: center;
591
- font-size: 1.25rem;
592
- color: white;
593
- margin: 0 auto 1rem;
594
- }
595
-
596
- .tech-spec-card h4, .tech-spec-card h5 {
597
- color: var(--dark-color);
598
- margin-bottom: 1.5rem;
599
- }
600
-
601
- .tech-spec-card ul {
602
- list-style: none;
603
- padding: 0;
604
- }
605
-
606
- .tech-spec-card li {
607
- padding: 0.5rem 0;
608
- color: var(--text-color);
609
- border-bottom: 1px solid #f1f5f9;
610
- }
611
-
612
- .tech-spec-card li:last-child {
613
- border-bottom: none;
614
- }
615
-
616
- /* Enhanced Validation Styles */
617
- .badge {
618
- font-size: 0.75em;
619
- padding: 0.4em 0.8em;
620
- border-radius: 1rem;
621
- font-weight: 600;
622
- letter-spacing: 0.025em;
623
- }
624
-
625
- .validation-result {
626
- animation: slideDown 0.3s ease;
627
- }
628
-
629
- @keyframes slideDown {
630
- from {
631
- opacity: 0;
632
- transform: translateY(-10px);
633
- }
634
- to {
635
- opacity: 1;
636
- transform: translateY(0);
637
- }
638
- }
639
-
640
- /* Enhanced Alert Styles */
641
- .alert {
642
- border-radius: var(--border-radius);
643
- border: none;
644
- box-shadow: var(--box-shadow);
645
- padding: 1rem 1.5rem;
646
- }
647
-
648
- .alert-success {
649
- background: linear-gradient(135deg, rgba(16, 185, 129, 0.1) 0%, rgba(16, 185, 129, 0.05) 100%);
650
- color: #065f46;
651
- border-left: 4px solid var(--success-color);
652
- }
653
-
654
- .alert-warning {
655
- background: linear-gradient(135deg, rgba(245, 158, 11, 0.1) 0%, rgba(245, 158, 11, 0.05) 100%);
656
- color: #92400e;
657
- border-left: 4px solid var(--warning-color);
658
- }
659
-
660
- .alert-danger {
661
- background: linear-gradient(135deg, rgba(239, 68, 68, 0.1) 0%, rgba(239, 68, 68, 0.05) 100%);
662
- color: #991b1b;
663
- border-left: 4px solid var(--danger-color);
664
- }
665
-
666
- .alert-info {
667
- background: linear-gradient(135deg, rgba(59, 130, 246, 0.1) 0%, rgba(59, 130, 246, 0.05) 100%);
668
- color: #1e40af;
669
- border-left: 4px solid var(--info-color);
670
- }
671
-
672
- /* Enhanced Accordion */
673
- .accordion-item {
674
- border: none;
675
- margin-bottom: 1rem;
676
- border-radius: var(--border-radius) !important;
677
- box-shadow: var(--box-shadow);
678
- overflow: hidden;
679
- }
680
-
681
- .accordion-button {
682
- background: white;
683
- border: none;
684
- padding: 1.5rem;
685
- font-weight: 600;
686
- color: var(--dark-color);
687
- border-radius: var(--border-radius) !important;
688
- }
689
-
690
- .accordion-button:not(.collapsed) {
691
- background: var(--light-gray);
692
- color: var(--primary-color);
693
- box-shadow: none;
694
- }
695
-
696
- .accordion-button:focus {
697
- box-shadow: 0 0 0 3px rgba(99, 102, 241, 0.1);
698
- border-color: transparent;
699
- }
700
-
701
- .accordion-body {
702
- padding: 1.5rem;
703
- background: white;
704
- color: var(--text-color);
705
- line-height: 1.7;
706
- }
707
-
708
- /* Enhanced CTA Buttons */
709
- .cta-btn-primary, .cta-btn-secondary {
710
- position: relative;
711
- overflow: hidden;
712
- backdrop-filter: blur(10px);
713
- border-radius: var(--border-radius);
714
- }
715
-
716
- .cta-btn-primary small, .cta-btn-secondary small {
717
- font-size: 0.75rem;
718
- opacity: 0.9;
719
- font-weight: 400;
720
- }
721
-
722
- .cta-content {
723
- position: relative;
724
- z-index: 2;
725
- }
726
-
727
- .cta-buttons {
728
- margin: 2rem 0;
729
- }
730
-
731
- .cta-stats {
732
- margin-top: 3rem;
733
- }
734
-
735
- .cta-stat h4 {
736
- font-size: 2rem;
737
- font-weight: 800;
738
- margin-bottom: 0.25rem;
739
- }
740
-
741
- .cta-stat small {
742
- font-size: 0.9rem;
743
- opacity: 0.9;
744
- }
745
-
746
- /* Enhanced Quick Start */
747
- .quick-start-cta {
748
- background: white;
749
- border-radius: var(--border-radius-lg);
750
- padding: 3rem;
751
- box-shadow: var(--box-shadow-lg);
752
- text-align: center;
753
- border: 1px solid #e2e8f0;
754
- }
755
-
756
- .quick-start-cta h4 {
757
- color: var(--dark-color);
758
- margin-bottom: 1.5rem;
759
- }
760
-
761
- /* Enhanced Batch Processing */
762
- .batch-chunk-card {
763
- transition: var(--transition);
764
- border: 1px solid #e2e8f0;
765
- border-radius: var(--border-radius);
766
- overflow: hidden;
767
- }
768
-
769
- .batch-chunk-card:hover {
770
- transform: translateY(-2px);
771
- box-shadow: var(--box-shadow-lg);
772
- border-color: rgba(99, 102, 241, 0.2);
773
- }
774
-
775
- .batch-chunk-card .card-body {
776
- padding: 1.5rem;
777
- }
778
-
779
- .batch-chunk-card .card-title {
780
- font-size: 1rem;
781
- font-weight: 600;
782
- color: var(--dark-color);
783
- }
784
-
785
- .batch-chunk-card .card-text {
786
- color: var(--text-muted);
787
- line-height: 1.6;
788
- }
789
-
790
- .download-chunk {
791
- transition: var(--transition-fast);
792
- }
793
-
794
- .download-chunk:hover {
795
- transform: scale(1.1);
796
- }
797
-
798
- /* Enhanced Navigation */
799
- .navbar {
800
- backdrop-filter: blur(10px);
801
- background: rgba(255, 255, 255, 0.95) !important;
802
- border-bottom: 1px solid rgba(226, 232, 240, 0.8);
803
- box-shadow: 0 1px 3px 0 rgba(0, 0, 0, 0.1);
804
- }
805
-
806
- .navbar-brand {
807
- font-weight: 800;
808
- font-size: 1.5rem;
809
- color: var(--primary-color) !important;
810
- transition: var(--transition);
811
- }
812
-
813
- .navbar-brand:hover {
814
- transform: scale(1.05);
815
- }
816
-
817
- .navbar-nav .nav-link {
818
- font-weight: 500;
819
- transition: var(--transition);
820
- color: var(--text-color) !important;
821
- position: relative;
822
- padding: 0.75rem 1rem !important;
823
- }
824
-
825
- .navbar-nav .nav-link::after {
826
- content: '';
827
- position: absolute;
828
- bottom: 0;
829
- left: 50%;
830
- width: 0;
831
- height: 2px;
832
- background: var(--gradient-primary);
833
- transition: var(--transition);
834
- transform: translateX(-50%);
835
- }
836
-
837
- .navbar-nav .nav-link:hover::after {
838
- width: 80%;
839
- }
840
-
841
- .navbar-nav .nav-link:hover {
842
- color: var(--primary-color) !important;
843
- }
844
-
845
- .navbar-text {
846
- color: var(--text-muted) !important;
847
- font-weight: 500;
848
- }
849
-
850
- /* Enhanced Footer */
851
- .footer {
852
- background: linear-gradient(135deg, var(--dark-color) 0%, #2d3748 100%);
853
- color: white;
854
- padding: 3rem 0 2rem;
855
- margin-top: 6rem;
856
- position: relative;
857
- overflow: hidden;
858
- }
859
-
860
- .footer::before {
861
- content: '';
862
- position: absolute;
863
- top: 0;
864
- left: 0;
865
- right: 0;
866
- bottom: 0;
867
- background: url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100"><defs><pattern id="footer-pattern" width="20" height="20" patternUnits="userSpaceOnUse"><circle cx="10" cy="10" r="0.5" fill="white" opacity="0.1"/></pattern></defs><rect width="100" height="100" fill="url(%23footer-pattern)"/></svg>');
868
- }
869
-
870
- .footer h5 {
871
- color: white;
872
- font-weight: 700;
873
- margin-bottom: 1rem;
874
- }
875
-
876
- .footer p, .footer a {
877
- color: rgba(255, 255, 255, 0.8);
878
- transition: var(--transition);
879
- }
880
-
881
- .footer a:hover {
882
- color: white;
883
- text-decoration: none;
884
- }
885
-
886
- /* Enhanced Responsive Design */
887
- @media (max-width: 1200px) {
888
- .hero-section {
889
- padding: 4rem 0;
890
- }
891
-
892
- .floating-icon-container {
893
- width: 250px;
894
- height: 250px;
895
- }
896
-
897
- .floating-icon {
898
- width: 50px;
899
- height: 50px;
900
- font-size: 1.25rem;
901
- }
902
-
903
- .hero-main-icon {
904
- width: 100px;
905
- height: 100px;
906
- font-size: 2.5rem;
907
- }
908
- }
909
-
910
- @media (max-width: 992px) {
911
- .hero-section {
912
- padding: 3rem 0;
913
- min-height: auto;
914
- }
915
-
916
- .display-3 {
917
- font-size: 2.5rem;
918
- }
919
-
920
- .features-section, .stats-section, .quick-start-section,
921
- .use-cases-section, .tech-specs-section, .faq-section,
922
- .final-cta-section {
923
- padding: 4rem 0;
924
- }
925
-
926
- .floating-icon-container {
927
- display: none;
928
- }
929
-
930
- .hero-visual {
931
- margin-top: 2rem;
932
- }
933
- }
934
-
935
- @media (max-width: 768px) {
936
- .hero-section {
937
- padding: 2rem 0;
938
- text-align: center;
939
- }
940
-
941
- .display-3 {
942
- font-size: 2rem;
943
- }
944
-
945
- .lead {
946
- font-size: 1rem;
947
- }
948
-
949
- .btn-lg {
950
- padding: 0.75rem 1.5rem;
951
- font-size: 1rem;
952
- width: 100%;
953
- margin-bottom: 1rem;
954
- }
955
-
956
- .hero-stats .col-4 {
957
- margin-bottom: 1rem;
958
- }
959
-
960
- .stat-item h3 {
961
- font-size: 2rem;
962
- }
963
-
964
- .features-section, .stats-section, .quick-start-section,
965
- .use-cases-section, .tech-specs-section, .faq-section,
966
- .final-cta-section {
967
- padding: 3rem 0;
968
- }
969
-
970
- .feature-card-enhanced, .use-case-card, .tech-spec-card {
971
- margin-bottom: 2rem;
972
- }
973
-
974
- .code-card {
975
- margin-bottom: 1.5rem;
976
- }
977
-
978
- .code-header {
979
- flex-direction: column;
980
- gap: 1rem;
981
- text-align: center;
982
- }
983
-
984
- .quick-start-cta {
985
- padding: 2rem 1rem;
986
- }
987
-
988
- .cta-buttons .btn {
989
- width: 100%;
990
- margin-bottom: 1rem;
991
- }
992
-
993
- .navbar-nav {
994
- text-align: center;
995
- padding: 1rem 0;
996
- }
997
-
998
- .toc {
999
- position: static;
1000
- margin-bottom: 2rem;
1001
- max-height: none;
1002
- }
1003
- }
1004
-
1005
- @media (max-width: 576px) {
1006
- .container {
1007
- padding-left: 1rem;
1008
- padding-right: 1rem;
1009
- }
1010
-
1011
- .hero-section {
1012
- padding: 1.5rem 0;
1013
- }
1014
-
1015
- .display-3 {
1016
- font-size: 1.75rem;
1017
- }
1018
-
1019
- .card-body {
1020
- padding: 1.5rem;
1021
- }
1022
-
1023
- .feature-card-enhanced, .use-case-card, .tech-spec-card {
1024
- padding: 1.5rem;
1025
- }
1026
-
1027
- .stat-number {
1028
- font-size: 2.5rem;
1029
- }
1030
-
1031
- .hero-main-icon {
1032
- width: 80px;
1033
- height: 80px;
1034
- font-size: 2rem;
1035
- }
1036
-
1037
- .pulse-ring {
1038
- width: 100px;
1039
- height: 100px;
1040
- }
1041
- }
1042
-
1043
- /* Enhanced Accessibility */
1044
- .btn:focus,
1045
- .form-control:focus,
1046
- .form-select:focus,
1047
- .form-check-input:focus {
1048
- outline: 3px solid rgba(99, 102, 241, 0.3);
1049
- outline-offset: 2px;
1050
- }
1051
-
1052
- .btn:focus-visible,
1053
- .form-control:focus-visible,
1054
- .form-select:focus-visible {
1055
- outline: 3px solid var(--primary-color);
1056
- outline-offset: 2px;
1057
- }
1058
-
1059
- /* Skip to content link for screen readers */
1060
- .skip-link {
1061
- position: absolute;
1062
- top: -40px;
1063
- left: 6px;
1064
- background: var(--primary-color);
1065
- color: white;
1066
- padding: 8px;
1067
- text-decoration: none;
1068
- border-radius: 4px;
1069
- z-index: 1000;
1070
- }
1071
-
1072
- .skip-link:focus {
1073
- top: 6px;
1074
- }
1075
-
1076
- /* Enhanced Animation Classes */
1077
- .fade-in {
1078
- animation: fadeIn 0.6s cubic-bezier(0.4, 0, 0.2, 1);
1079
- }
1080
-
1081
- @keyframes fadeIn {
1082
- from {
1083
- opacity: 0;
1084
- transform: translateY(10px);
1085
- }
1086
- to {
1087
- opacity: 1;
1088
- transform: translateY(0);
1089
- }
1090
- }
1091
-
1092
- .slide-up {
1093
- animation: slideUp 0.6s cubic-bezier(0.4, 0, 0.2, 1);
1094
- }
1095
-
1096
- @keyframes slideUp {
1097
- from {
1098
- opacity: 0;
1099
- transform: translateY(30px);
1100
- }
1101
- to {
1102
- opacity: 1;
1103
- transform: translateY(0);
1104
- }
1105
- }
1106
-
1107
- .scale-in {
1108
- animation: scaleIn 0.5s cubic-bezier(0.4, 0, 0.2, 1);
1109
- }
1110
-
1111
- @keyframes scaleIn {
1112
- from {
1113
- opacity: 0;
1114
- transform: scale(0.9);
1115
- }
1116
- to {
1117
- opacity: 1;
1118
- transform: scale(1);
1119
- }
1120
- }
1121
-
1122
- /* Enhanced Utility Classes */
1123
- .text-gradient {
1124
- background: var(--gradient-primary);
1125
- -webkit-background-clip: text;
1126
- -webkit-text-fill-color: transparent;
1127
- background-clip: text;
1128
- }
1129
-
1130
- .text-gradient-secondary {
1131
- background: var(--gradient-secondary);
1132
- -webkit-background-clip: text;
1133
- -webkit-text-fill-color: transparent;
1134
- background-clip: text;
1135
- }
1136
-
1137
- .shadow-custom {
1138
- box-shadow: var(--box-shadow);
1139
- }
1140
-
1141
- .shadow-lg-custom {
1142
- box-shadow: var(--box-shadow-lg);
1143
- }
1144
-
1145
- .shadow-xl-custom {
1146
- box-shadow: var(--box-shadow-xl);
1147
- }
1148
-
1149
- .border-radius-custom {
1150
- border-radius: var(--border-radius);
1151
- }
1152
-
1153
- .bg-gradient-primary {
1154
- background: var(--gradient-primary);
1155
- }
1156
-
1157
- .bg-gradient-secondary {
1158
- background: var(--gradient-secondary);
1159
- }
1160
-
1161
- .bg-gradient-accent {
1162
- background: var(--gradient-accent);
1163
- }
1164
-
1165
- /* Enhanced Progress Indicators */
1166
- .progress-custom {
1167
- height: 10px;
1168
- border-radius: var(--border-radius-sm);
1169
- background-color: #e2e8f0;
1170
- overflow: hidden;
1171
- box-shadow: inset 0 1px 3px rgba(0, 0, 0, 0.1);
1172
- }
1173
-
1174
- .progress-bar-custom {
1175
- height: 100%;
1176
- background: var(--gradient-primary);
1177
- transition: width 0.6s cubic-bezier(0.4, 0, 0.2, 1);
1178
- position: relative;
1179
- overflow: hidden;
1180
- }
1181
-
1182
- .progress-bar-custom::after {
1183
- content: '';
1184
- position: absolute;
1185
- top: 0;
1186
- left: 0;
1187
- right: 0;
1188
- bottom: 0;
1189
- background: linear-gradient(90deg, transparent, rgba(255,255,255,0.3), transparent);
1190
- animation: progress-shimmer 2s infinite;
1191
- }
1192
-
1193
- @keyframes progress-shimmer {
1194
- 0% { transform: translateX(-100%); }
1195
- 100% { transform: translateX(100%); }
1196
- }
1197
-
1198
- /* Enhanced Tooltip */
1199
- .tooltip-inner {
1200
- background-color: var(--dark-color);
1201
- border-radius: var(--border-radius-sm);
1202
- font-size: 0.875rem;
1203
- padding: 0.5rem 0.75rem;
1204
- box-shadow: var(--box-shadow);
1205
- }
1206
-
1207
- /* Enhanced Custom Scrollbar */
1208
- ::-webkit-scrollbar {
1209
- width: 10px;
1210
- height: 10px;
1211
- }
1212
-
1213
- ::-webkit-scrollbar-track {
1214
- background: var(--light-gray);
1215
- border-radius: var(--border-radius-sm);
1216
- }
1217
-
1218
- ::-webkit-scrollbar-thumb {
1219
- background: var(--gradient-primary);
1220
- border-radius: var(--border-radius-sm);
1221
- border: 2px solid var(--light-gray);
1222
- }
1223
-
1224
- ::-webkit-scrollbar-thumb:hover {
1225
- background: var(--gradient-secondary);
1226
- }
1227
-
1228
- ::-webkit-scrollbar-corner {
1229
- background: var(--light-gray);
1230
- }
1231
-
1232
- /* Print Styles */
1233
- @media print {
1234
- .navbar, .footer, .hero-scroll-indicator, .floating-icon-container {
1235
- display: none !important;
1236
- }
1237
-
1238
- .hero-section {
1239
- background: white !important;
1240
- color: black !important;
1241
- padding: 1rem 0 !important;
1242
- }
1243
-
1244
- .card {
1245
- box-shadow: none !important;
1246
- border: 1px solid #ddd !important;
1247
- }
1248
-
1249
- .btn {
1250
- border: 1px solid #ddd !important;
1251
- background: white !important;
1252
- color: black !important;
1253
- }
1254
- }
1255
-
1256
- /* Playground-Specific Styles */
1257
- .playground-visual {
1258
- position: relative;
1259
- display: flex;
1260
- justify-content: center;
1261
- align-items: center;
1262
- height: 200px;
1263
- }
1264
-
1265
- .playground-icon {
1266
- width: 100px;
1267
- height: 100px;
1268
- background: rgba(255, 255, 255, 0.15);
1269
- border-radius: 50%;
1270
- display: flex;
1271
- align-items: center;
1272
- justify-content: center;
1273
- font-size: 2.5rem;
1274
- color: white;
1275
- backdrop-filter: blur(20px);
1276
- border: 2px solid rgba(255, 255, 255, 0.3);
1277
- position: relative;
1278
- }
1279
-
1280
- .audio-player-container {
1281
- border: 2px solid #e2e8f0;
1282
- transition: var(--transition);
1283
- }
1284
-
1285
- .audio-player-container:hover {
1286
- border-color: var(--primary-color);
1287
- box-shadow: 0 0 0 3px rgba(99, 102, 241, 0.1);
1288
- }
1289
-
1290
- .stat-item {
1291
- padding: 1rem;
1292
- text-align: center;
1293
- }
1294
-
1295
- .stat-item i {
1296
- font-size: 1.5rem;
1297
- margin-bottom: 0.5rem;
1298
- display: block;
1299
- }
1300
-
1301
- .stat-value {
1302
- font-size: 1.25rem;
1303
- font-weight: 700;
1304
- color: var(--dark-color);
1305
- margin-bottom: 0.25rem;
1306
- }
1307
-
1308
- .stat-label {
1309
- font-size: 0.875rem;
1310
- color: var(--text-muted);
1311
- font-weight: 500;
1312
- }
1313
-
1314
- .card-header {
1315
- border-bottom: none;
1316
- border-radius: var(--border-radius) var(--border-radius) 0 0 !important;
1317
- }
1318
-
1319
- /* Enhanced Form Controls for Playground */
1320
- .playground .form-control,
1321
- .playground .form-select {
1322
- border: 2px solid #e2e8f0;
1323
- border-radius: var(--border-radius-sm);
1324
- padding: 1rem;
1325
- font-size: 1rem;
1326
- transition: var(--transition);
1327
- }
1328
-
1329
- .playground .form-control:focus,
1330
- .playground .form-select:focus {
1331
- border-color: var(--primary-color);
1332
- box-shadow: 0 0 0 4px rgba(99, 102, 241, 0.1);
1333
- transform: translateY(-1px);
1334
- }
1335
-
1336
- .playground .btn-group .btn {
1337
- border-radius: var(--border-radius-sm);
1338
- }
1339
-
1340
- .playground .btn-group .btn:first-child {
1341
- border-top-right-radius: 0;
1342
- border-bottom-right-radius: 0;
1343
- }
1344
-
1345
- .playground .btn-group .btn:last-child {
1346
- border-top-left-radius: 0;
1347
- border-bottom-left-radius: 0;
1348
- }
1349
-
1350
- /* Audio Player Enhancements */
1351
- audio::-webkit-media-controls-panel {
1352
- background-color: var(--light-gray);
1353
- border-radius: var(--border-radius-sm);
1354
- }
1355
-
1356
- audio::-webkit-media-controls-play-button,
1357
- audio::-webkit-media-controls-pause-button {
1358
- background-color: var(--primary-color);
1359
- border-radius: 50%;
1360
- }
1361
-
1362
- audio::-webkit-media-controls-timeline {
1363
- background-color: var(--light-gray);
1364
- border-radius: var(--border-radius-sm);
1365
- }
1366
-
1367
- audio::-webkit-media-controls-current-time-display,
1368
- audio::-webkit-media-controls-time-remaining-display {
1369
- color: var(--text-color);
1370
- font-weight: 500;
1371
- }
1372
-
1373
- /* Reduced Motion Support */
1374
- @media (prefers-reduced-motion: reduce) {
1375
- *,
1376
- *::before,
1377
- *::after {
1378
- animation-duration: 0.01ms !important;
1379
- animation-iteration-count: 1 !important;
1380
- transition-duration: 0.01ms !important;
1381
- }
1382
-
1383
- .hero-background-animation,
1384
- .floating-icon,
1385
- .pulse-ring,
1386
- .hero-scroll-indicator,
1387
- .playground-icon {
1388
- animation: none !important;
1389
- }
1390
- }
 
 
 
 
 
 
 
 
 
 
1
+ /* TTSFM Web Application Custom Styles */
2
+
3
+ :root {
4
+ /* Clean Color Palette */
5
+ --primary-color: #4f46e5;
6
+ --primary-dark: #3730a3;
7
+ --primary-light: #6366f1;
8
+ --secondary-color: #6b7280;
9
+ --secondary-dark: #4b5563;
10
+ --accent-color: #059669;
11
+ --accent-dark: #047857;
12
+
13
+ /* Status Colors */
14
+ --success-color: #059669;
15
+ --warning-color: #d97706;
16
+ --danger-color: #dc2626;
17
+ --info-color: #2563eb;
18
+
19
+ /* Clean Neutral Colors */
20
+ --light-color: #ffffff;
21
+ --light-gray: #f9fafb;
22
+ --medium-gray: #6b7280;
23
+ --dark-color: #111827;
24
+ --text-color: #374151;
25
+ --text-muted: #6b7280;
26
+
27
+ /* Design System */
28
+ --border-radius: 0.75rem;
29
+ --border-radius-sm: 0.5rem;
30
+ --border-radius-lg: 1rem;
31
+ --box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06);
32
+ --box-shadow-lg: 0 20px 25px -5px rgba(0, 0, 0, 0.1), 0 10px 10px -5px rgba(0, 0, 0, 0.04);
33
+ --box-shadow-xl: 0 25px 50px -12px rgba(0, 0, 0, 0.25);
34
+ --transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1);
35
+ --transition-fast: all 0.15s cubic-bezier(0.4, 0, 0.2, 1);
36
+
37
+ /* Gradients */
38
+ --gradient-primary: linear-gradient(135deg, var(--primary-color) 0%, var(--primary-light) 100%);
39
+ --gradient-secondary: linear-gradient(135deg, var(--secondary-color) 0%, var(--secondary-dark) 100%);
40
+ --gradient-accent: linear-gradient(135deg, var(--accent-color) 0%, var(--accent-dark) 100%);
41
+ --gradient-hero: linear-gradient(135deg, var(--primary-color) 0%, var(--secondary-color) 50%, var(--accent-color) 100%);
42
+ }
43
+
44
+ /* Global Styles */
45
+ body {
46
+ font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
47
+ line-height: 1.6;
48
+ color: var(--text-color);
49
+ background-color: #ffffff;
50
+ font-weight: 400;
51
+ -webkit-font-smoothing: antialiased;
52
+ -moz-osx-font-smoothing: grayscale;
53
+ }
54
+
55
+ /* Enhanced Typography */
56
+ h1, h2, h3, h4, h5, h6 {
57
+ font-weight: 700;
58
+ line-height: 1.3;
59
+ color: var(--dark-color);
60
+ letter-spacing: -0.025em;
61
+ }
62
+
63
+ .display-1, .display-2, .display-3, .display-4 {
64
+ font-weight: 800;
65
+ letter-spacing: -0.05em;
66
+ }
67
+
68
+ .lead {
69
+ font-size: 1.125rem;
70
+ font-weight: 400;
71
+ color: var(--text-muted);
72
+ line-height: 1.8;
73
+ }
74
+
75
+ /* Simplified Button Styles */
76
+ .btn {
77
+ font-weight: 600;
78
+ border-radius: 12px;
79
+ transition: all 0.3s ease;
80
+ letter-spacing: 0.025em;
81
+ border: none;
82
+ box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
83
+ }
84
+
85
+ .btn-primary {
86
+ background: linear-gradient(135deg, var(--primary-color) 0%, var(--primary-light) 100%);
87
+ color: white;
88
+ }
89
+
90
+ .btn-primary:hover {
91
+ background: linear-gradient(135deg, var(--primary-dark) 0%, var(--primary-color) 100%);
92
+ color: white;
93
+ transform: translateY(-1px);
94
+ box-shadow: 0 4px 8px rgba(0, 0, 0, 0.15);
95
+ }
96
+
97
+ .btn-outline-primary {
98
+ border: 2px solid var(--primary-color);
99
+ color: var(--primary-color);
100
+ background: transparent;
101
+ box-shadow: none;
102
+ }
103
+
104
+ .btn-outline-primary:hover {
105
+ background: var(--primary-color);
106
+ border-color: var(--primary-color);
107
+ color: white;
108
+ transform: translateY(-1px);
109
+ box-shadow: 0 4px 8px rgba(0, 0, 0, 0.15);
110
+ }
111
+
112
+ .btn-lg {
113
+ padding: 0.875rem 2rem;
114
+ font-size: 1.125rem;
115
+ border-radius: var(--border-radius);
116
+ }
117
+
118
+ .btn-sm {
119
+ padding: 0.5rem 1rem;
120
+ font-size: 0.875rem;
121
+ border-radius: var(--border-radius-sm);
122
+ }
123
+
124
+ /* Clean Card Styles */
125
+ .card {
126
+ border: 1px solid #e5e7eb;
127
+ box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1);
128
+ transition: all 0.3s ease;
129
+ border-radius: 16px;
130
+ background: white;
131
+ }
132
+
133
+ .card:hover {
134
+ box-shadow: 0 10px 25px rgba(0, 0, 0, 0.1);
135
+ border-color: var(--primary-light);
136
+ transform: translateY(-2px);
137
+ }
138
+
139
+ .card-body {
140
+ padding: 2rem;
141
+ }
142
+
143
+ /* Clean Hero Section */
144
+ .hero-section {
145
+ background: linear-gradient(135deg, #f9fafb 0%, #ffffff 100%);
146
+ color: var(--text-color);
147
+ padding: 5rem 0;
148
+ min-height: 75vh;
149
+ display: flex;
150
+ align-items: center;
151
+ border-bottom: 1px solid #e5e7eb;
152
+ }
153
+
154
+ .min-vh-75 {
155
+ min-height: 75vh;
156
+ }
157
+
158
+ /* Status Indicators */
159
+ .status-indicator {
160
+ display: inline-block;
161
+ width: 8px;
162
+ height: 8px;
163
+ border-radius: 50%;
164
+ background-color: #6c757d;
165
+ }
166
+
167
+ .status-online {
168
+ background-color: #28a745;
169
+ }
170
+
171
+ .status-offline {
172
+ background-color: #dc3545;
173
+ }
174
+
175
+ /* Footer */
176
+ .footer {
177
+ margin-top: auto;
178
+ }
179
+
180
+ /* Clean Code Blocks */
181
+ pre {
182
+ background-color: #f8fafc !important;
183
+ border: 1px solid #e5e7eb;
184
+ border-radius: 8px;
185
+ font-size: 0.875rem;
186
+ }
187
+
188
+ code {
189
+ color: #374151;
190
+ font-family: 'SF Mono', Monaco, 'Cascadia Code', 'Roboto Mono', Consolas, 'Courier New', monospace;
191
+ }
192
+
193
+ /* Enhanced Form Styles */
194
+ .form-control, .form-select {
195
+ border-radius: 12px;
196
+ border: 2px solid #e5e7eb;
197
+ transition: var(--transition);
198
+ padding: 1rem 1.25rem;
199
+ font-size: 1rem;
200
+ background-color: #ffffff;
201
+ color: var(--text-color);
202
+ box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1);
203
+ }
204
+
205
+ .form-control:focus, .form-select:focus {
206
+ border-color: var(--primary-color);
207
+ box-shadow: 0 0 0 4px rgba(79, 70, 229, 0.1);
208
+ outline: none;
209
+ background-color: #ffffff;
210
+ transform: translateY(-1px);
211
+ }
212
+
213
+ .form-control:hover, .form-select:hover {
214
+ border-color: var(--primary-light);
215
+ box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
216
+ }
217
+
218
+ .form-label {
219
+ font-weight: 600;
220
+ color: var(--dark-color);
221
+ margin-bottom: 0.75rem;
222
+ font-size: 0.95rem;
223
+ }
224
+
225
+ .form-text {
226
+ color: var(--text-muted);
227
+ font-size: 0.875rem;
228
+ margin-top: 0.5rem;
229
+ }
230
+
231
+ .form-check-input {
232
+ border-radius: var(--border-radius-sm);
233
+ border: 2px solid #e2e8f0;
234
+ width: 1.25rem;
235
+ height: 1.25rem;
236
+ }
237
+
238
+ .form-check-input:checked {
239
+ background-color: var(--primary-color);
240
+ border-color: var(--primary-color);
241
+ }
242
+
243
+ .form-check-input:focus {
244
+ box-shadow: 0 0 0 3px rgba(99, 102, 241, 0.1);
245
+ }
246
+
247
+ .form-check-label {
248
+ color: var(--text-color);
249
+ font-weight: 500;
250
+ margin-left: 0.5rem;
251
+ }
252
+
253
+ /* Enhanced Status Indicators */
254
+ .status-indicator {
255
+ display: inline-block;
256
+ width: 12px;
257
+ height: 12px;
258
+ border-radius: 50%;
259
+ margin-right: 8px;
260
+ position: relative;
261
+ animation: statusPulse 2s infinite;
262
+ }
263
+
264
+ .status-indicator::before {
265
+ content: '';
266
+ position: absolute;
267
+ top: -2px;
268
+ left: -2px;
269
+ right: -2px;
270
+ bottom: -2px;
271
+ border-radius: 50%;
272
+ opacity: 0.3;
273
+ animation: statusRing 2s infinite;
274
+ }
275
+
276
+ .status-online {
277
+ background-color: var(--success-color);
278
+ box-shadow: 0 0 8px rgba(16, 185, 129, 0.4);
279
+ }
280
+
281
+ .status-online::before {
282
+ background-color: var(--success-color);
283
+ }
284
+
285
+ .status-offline {
286
+ background-color: var(--danger-color);
287
+ box-shadow: 0 0 8px rgba(239, 68, 68, 0.4);
288
+ }
289
+
290
+ .status-offline::before {
291
+ background-color: var(--danger-color);
292
+ }
293
+
294
+ @keyframes statusPulse {
295
+ 0%, 100% { opacity: 1; }
296
+ 50% { opacity: 0.7; }
297
+ }
298
+
299
+ @keyframes statusRing {
300
+ 0% { transform: scale(0.8); opacity: 0.8; }
301
+ 100% { transform: scale(1.4); opacity: 0; }
302
+ }
303
+
304
+ /* Enhanced Audio Player */
305
+ .audio-player {
306
+ width: 100%;
307
+ margin-top: 1rem;
308
+ border-radius: var(--border-radius);
309
+ box-shadow: var(--box-shadow);
310
+ background: var(--light-color);
311
+ padding: 0.5rem;
312
+ }
313
+
314
+ .audio-player::-webkit-media-controls-panel {
315
+ background-color: var(--light-color);
316
+ border-radius: var(--border-radius-sm);
317
+ }
318
+
319
+ /* Enhanced Sections */
320
+ .features-section {
321
+ padding: 6rem 0;
322
+ background: linear-gradient(180deg, #ffffff 0%, var(--light-color) 100%);
323
+ }
324
+
325
+ .stats-section {
326
+ padding: 4rem 0;
327
+ background: var(--gradient-primary);
328
+ color: white;
329
+ position: relative;
330
+ overflow: hidden;
331
+ }
332
+
333
+ .stats-section::before {
334
+ content: '';
335
+ position: absolute;
336
+ top: 0;
337
+ left: 0;
338
+ right: 0;
339
+ bottom: 0;
340
+ background: url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100"><defs><pattern id="stats-pattern" width="40" height="40" patternUnits="userSpaceOnUse"><circle cx="20" cy="20" r="1" fill="white" opacity="0.1"/></pattern></defs><rect width="100" height="100" fill="url(%23stats-pattern)"/></svg>');
341
+ }
342
+
343
+ .stat-card {
344
+ text-align: center;
345
+ padding: 2rem 1rem;
346
+ background: rgba(255, 255, 255, 0.1);
347
+ border-radius: var(--border-radius);
348
+ backdrop-filter: blur(10px);
349
+ border: 1px solid rgba(255, 255, 255, 0.2);
350
+ transition: var(--transition);
351
+ }
352
+
353
+ .stat-card:hover {
354
+ transform: translateY(-5px);
355
+ background: rgba(255, 255, 255, 0.15);
356
+ }
357
+
358
+ .stat-icon {
359
+ font-size: 2.5rem;
360
+ margin-bottom: 1rem;
361
+ color: rgba(255, 255, 255, 0.9);
362
+ }
363
+
364
+ .stat-number {
365
+ font-size: 3rem;
366
+ font-weight: 800;
367
+ color: white;
368
+ margin-bottom: 0.5rem;
369
+ display: block;
370
+ }
371
+
372
+ .stat-label {
373
+ color: rgba(255, 255, 255, 0.9);
374
+ font-weight: 500;
375
+ font-size: 0.95rem;
376
+ }
377
+
378
+ .quick-start-section {
379
+ padding: 6rem 0;
380
+ }
381
+
382
+ .use-cases-section {
383
+ padding: 6rem 0;
384
+ background: var(--light-color);
385
+ }
386
+
387
+ .tech-specs-section {
388
+ padding: 6rem 0;
389
+ }
390
+
391
+ .faq-section {
392
+ padding: 6rem 0;
393
+ background: var(--light-color);
394
+ }
395
+
396
+ .final-cta-section {
397
+ padding: 6rem 0;
398
+ background: var(--gradient-hero);
399
+ color: white;
400
+ position: relative;
401
+ overflow: hidden;
402
+ }
403
+
404
+ .cta-background-animation {
405
+ position: absolute;
406
+ top: 0;
407
+ left: 0;
408
+ right: 0;
409
+ bottom: 0;
410
+ background: linear-gradient(45deg, transparent 30%, rgba(255,255,255,0.05) 50%, transparent 70%);
411
+ animation: shimmer 4s ease-in-out infinite;
412
+ }
413
+
414
+ .section-badge {
415
+ display: inline-block;
416
+ background: var(--gradient-primary);
417
+ color: white;
418
+ padding: 0.5rem 1.5rem;
419
+ border-radius: 2rem;
420
+ font-size: 0.875rem;
421
+ font-weight: 600;
422
+ margin-bottom: 1.5rem;
423
+ box-shadow: 0 4px 14px 0 rgba(99, 102, 241, 0.3);
424
+ }
425
+
426
+ /* Enhanced Loading States */
427
+ .loading-spinner {
428
+ display: none;
429
+ }
430
+
431
+ .loading .loading-spinner {
432
+ display: inline-block;
433
+ }
434
+
435
+ .loading .btn-text {
436
+ display: none;
437
+ }
438
+
439
+ .loading {
440
+ position: relative;
441
+ overflow: hidden;
442
+ }
443
+
444
+ .loading::after {
445
+ content: '';
446
+ position: absolute;
447
+ top: 0;
448
+ left: -100%;
449
+ width: 100%;
450
+ height: 100%;
451
+ background: linear-gradient(90deg, transparent, rgba(255,255,255,0.3), transparent);
452
+ animation: loading-shimmer 1.5s infinite;
453
+ }
454
+
455
+ @keyframes loading-shimmer {
456
+ 0% { left: -100%; }
457
+ 100% { left: 100%; }
458
+ }
459
+
460
+ /* Enhanced Code Blocks */
461
+ .code-card {
462
+ background: white;
463
+ border-radius: var(--border-radius);
464
+ box-shadow: var(--box-shadow);
465
+ overflow: hidden;
466
+ border: 1px solid #e2e8f0;
467
+ transition: var(--transition);
468
+ }
469
+
470
+ .code-card:hover {
471
+ transform: translateY(-2px);
472
+ box-shadow: var(--box-shadow-lg);
473
+ }
474
+
475
+ .code-header {
476
+ background: var(--light-gray);
477
+ padding: 1rem 1.5rem;
478
+ border-bottom: 1px solid #e2e8f0;
479
+ display: flex;
480
+ justify-content: between;
481
+ align-items: center;
482
+ }
483
+
484
+ .code-header h4 {
485
+ margin: 0;
486
+ font-size: 1.1rem;
487
+ color: var(--dark-color);
488
+ }
489
+
490
+ .code-content {
491
+ padding: 1.5rem;
492
+ background: #f8fafc;
493
+ margin: 0;
494
+ overflow-x: auto;
495
+ }
496
+
497
+ .code-content code {
498
+ font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace;
499
+ font-size: 0.9rem;
500
+ line-height: 1.6;
501
+ color: var(--text-color);
502
+ }
503
+
504
+ .code-footer {
505
+ padding: 1rem 1.5rem;
506
+ background: white;
507
+ border-top: 1px solid #e2e8f0;
508
+ }
509
+
510
+ .copy-btn {
511
+ font-size: 0.8rem;
512
+ padding: 0.25rem 0.75rem;
513
+ }
514
+
515
+ /* Enhanced Use Case Cards */
516
+ .use-case-card {
517
+ background: white;
518
+ border-radius: var(--border-radius);
519
+ padding: 2rem;
520
+ box-shadow: var(--box-shadow);
521
+ transition: var(--transition);
522
+ border: 1px solid #e2e8f0;
523
+ height: 100%;
524
+ text-align: center;
525
+ }
526
+
527
+ .use-case-card:hover {
528
+ transform: translateY(-4px);
529
+ box-shadow: var(--box-shadow-lg);
530
+ border-color: rgba(99, 102, 241, 0.2);
531
+ }
532
+
533
+ .use-case-icon {
534
+ width: 4rem;
535
+ height: 4rem;
536
+ background: var(--gradient-primary);
537
+ border-radius: 50%;
538
+ display: flex;
539
+ align-items: center;
540
+ justify-content: center;
541
+ font-size: 1.5rem;
542
+ color: white;
543
+ margin: 0 auto 1.5rem;
544
+ box-shadow: 0 4px 14px 0 rgba(99, 102, 241, 0.3);
545
+ }
546
+
547
+ .use-case-title {
548
+ font-size: 1.25rem;
549
+ font-weight: 700;
550
+ color: var(--dark-color);
551
+ margin-bottom: 1rem;
552
+ }
553
+
554
+ .use-case-description {
555
+ color: var(--text-muted);
556
+ margin-bottom: 1.5rem;
557
+ line-height: 1.7;
558
+ }
559
+
560
+ .use-case-examples {
561
+ display: flex;
562
+ flex-wrap: wrap;
563
+ gap: 0.5rem;
564
+ justify-content: center;
565
+ }
566
+
567
+ .use-case-examples .badge {
568
+ font-size: 0.75rem;
569
+ padding: 0.4rem 0.8rem;
570
+ border-radius: 1rem;
571
+ background: var(--light-gray);
572
+ color: var(--text-color);
573
+ border: 1px solid #e2e8f0;
574
+ }
575
+
576
+ /* Enhanced Tech Spec Cards */
577
+ .tech-spec-card {
578
+ background: white;
579
+ border-radius: var(--border-radius);
580
+ padding: 2rem;
581
+ box-shadow: var(--box-shadow);
582
+ transition: var(--transition);
583
+ border: 1px solid #e2e8f0;
584
+ height: 100%;
585
+ }
586
+
587
+ .tech-spec-card:hover {
588
+ transform: translateY(-2px);
589
+ box-shadow: var(--box-shadow-lg);
590
+ }
591
+
592
+ .tech-spec-icon {
593
+ width: 3rem;
594
+ height: 3rem;
595
+ background: var(--gradient-accent);
596
+ border-radius: var(--border-radius-sm);
597
+ display: flex;
598
+ align-items: center;
599
+ justify-content: center;
600
+ font-size: 1.25rem;
601
+ color: white;
602
+ margin: 0 auto 1rem;
603
+ }
604
+
605
+ .tech-spec-card h4, .tech-spec-card h5 {
606
+ color: var(--dark-color);
607
+ margin-bottom: 1.5rem;
608
+ }
609
+
610
+ .tech-spec-card ul {
611
+ list-style: none;
612
+ padding: 0;
613
+ }
614
+
615
+ .tech-spec-card li {
616
+ padding: 0.5rem 0;
617
+ color: var(--text-color);
618
+ border-bottom: 1px solid #f1f5f9;
619
+ }
620
+
621
+ .tech-spec-card li:last-child {
622
+ border-bottom: none;
623
+ }
624
+
625
+ /* Enhanced Validation Styles */
626
+ .badge {
627
+ font-size: 0.75em;
628
+ padding: 0.4em 0.8em;
629
+ border-radius: 1rem;
630
+ font-weight: 600;
631
+ letter-spacing: 0.025em;
632
+ }
633
+
634
+ .validation-result {
635
+ animation: slideDown 0.3s ease;
636
+ }
637
+
638
+ @keyframes slideDown {
639
+ from {
640
+ opacity: 0;
641
+ transform: translateY(-10px);
642
+ }
643
+ to {
644
+ opacity: 1;
645
+ transform: translateY(0);
646
+ }
647
+ }
648
+
649
+ /* Enhanced Alert Styles */
650
+ .alert {
651
+ border-radius: var(--border-radius);
652
+ border: none;
653
+ box-shadow: var(--box-shadow);
654
+ padding: 1rem 1.5rem;
655
+ }
656
+
657
+ .alert-success {
658
+ background: linear-gradient(135deg, rgba(16, 185, 129, 0.1) 0%, rgba(16, 185, 129, 0.05) 100%);
659
+ color: #065f46;
660
+ border-left: 4px solid var(--success-color);
661
+ }
662
+
663
+ .alert-warning {
664
+ background: linear-gradient(135deg, rgba(245, 158, 11, 0.1) 0%, rgba(245, 158, 11, 0.05) 100%);
665
+ color: #92400e;
666
+ border-left: 4px solid var(--warning-color);
667
+ }
668
+
669
+ .alert-danger {
670
+ background: linear-gradient(135deg, rgba(239, 68, 68, 0.1) 0%, rgba(239, 68, 68, 0.05) 100%);
671
+ color: #991b1b;
672
+ border-left: 4px solid var(--danger-color);
673
+ }
674
+
675
+ .alert-info {
676
+ background: linear-gradient(135deg, rgba(59, 130, 246, 0.1) 0%, rgba(59, 130, 246, 0.05) 100%);
677
+ color: #1e40af;
678
+ border-left: 4px solid var(--info-color);
679
+ }
680
+
681
+ /* Enhanced Accordion */
682
+ .accordion-item {
683
+ border: none;
684
+ margin-bottom: 1rem;
685
+ border-radius: var(--border-radius) !important;
686
+ box-shadow: var(--box-shadow);
687
+ overflow: hidden;
688
+ }
689
+
690
+ .accordion-button {
691
+ background: white;
692
+ border: none;
693
+ padding: 1.5rem;
694
+ font-weight: 600;
695
+ color: var(--dark-color);
696
+ border-radius: var(--border-radius) !important;
697
+ }
698
+
699
+ .accordion-button:not(.collapsed) {
700
+ background: var(--light-gray);
701
+ color: var(--primary-color);
702
+ box-shadow: none;
703
+ }
704
+
705
+ .accordion-button:focus {
706
+ box-shadow: 0 0 0 3px rgba(99, 102, 241, 0.1);
707
+ border-color: transparent;
708
+ }
709
+
710
+ .accordion-body {
711
+ padding: 1.5rem;
712
+ background: white;
713
+ color: var(--text-color);
714
+ line-height: 1.7;
715
+ }
716
+
717
+ /* Enhanced CTA Buttons */
718
+ .cta-btn-primary, .cta-btn-secondary {
719
+ position: relative;
720
+ overflow: hidden;
721
+ backdrop-filter: blur(10px);
722
+ border-radius: var(--border-radius);
723
+ }
724
+
725
+ .cta-btn-primary small, .cta-btn-secondary small {
726
+ font-size: 0.75rem;
727
+ opacity: 0.9;
728
+ font-weight: 400;
729
+ }
730
+
731
+ .cta-content {
732
+ position: relative;
733
+ z-index: 2;
734
+ }
735
+
736
+ .cta-buttons {
737
+ margin: 2rem 0;
738
+ }
739
+
740
+ .cta-stats {
741
+ margin-top: 3rem;
742
+ }
743
+
744
+ .cta-stat h4 {
745
+ font-size: 2rem;
746
+ font-weight: 800;
747
+ margin-bottom: 0.25rem;
748
+ }
749
+
750
+ .cta-stat small {
751
+ font-size: 0.9rem;
752
+ opacity: 0.9;
753
+ }
754
+
755
+ /* Enhanced Quick Start */
756
+ .quick-start-cta {
757
+ background: white;
758
+ border-radius: var(--border-radius-lg);
759
+ padding: 3rem;
760
+ box-shadow: var(--box-shadow-lg);
761
+ text-align: center;
762
+ border: 1px solid #e2e8f0;
763
+ }
764
+
765
+ .quick-start-cta h4 {
766
+ color: var(--dark-color);
767
+ margin-bottom: 1.5rem;
768
+ }
769
+
770
+ /* Enhanced Batch Processing */
771
+ .batch-chunk-card {
772
+ transition: var(--transition);
773
+ border: 1px solid #e2e8f0;
774
+ border-radius: var(--border-radius);
775
+ overflow: hidden;
776
+ }
777
+
778
+ .batch-chunk-card:hover {
779
+ transform: translateY(-2px);
780
+ box-shadow: var(--box-shadow-lg);
781
+ border-color: rgba(99, 102, 241, 0.2);
782
+ }
783
+
784
+ .batch-chunk-card .card-body {
785
+ padding: 1.5rem;
786
+ }
787
+
788
+ .batch-chunk-card .card-title {
789
+ font-size: 1rem;
790
+ font-weight: 600;
791
+ color: var(--dark-color);
792
+ }
793
+
794
+ .batch-chunk-card .card-text {
795
+ color: var(--text-muted);
796
+ line-height: 1.6;
797
+ }
798
+
799
+ .download-chunk {
800
+ transition: var(--transition-fast);
801
+ }
802
+
803
+ .download-chunk:hover {
804
+ transform: scale(1.1);
805
+ }
806
+
807
+ /* Enhanced Navigation */
808
+ .navbar {
809
+ backdrop-filter: blur(10px);
810
+ background: rgba(255, 255, 255, 0.95) !important;
811
+ border-bottom: 1px solid rgba(226, 232, 240, 0.8);
812
+ box-shadow: 0 1px 3px 0 rgba(0, 0, 0, 0.1);
813
+ }
814
+
815
+ .navbar-brand {
816
+ font-weight: 800;
817
+ font-size: 1.5rem;
818
+ color: var(--primary-color) !important;
819
+ transition: var(--transition);
820
+ }
821
+
822
+ .navbar-brand:hover {
823
+ transform: scale(1.05);
824
+ }
825
+
826
+ .navbar-nav .nav-link {
827
+ font-weight: 500;
828
+ transition: var(--transition);
829
+ color: var(--text-color) !important;
830
+ position: relative;
831
+ padding: 0.75rem 1rem !important;
832
+ }
833
+
834
+ .navbar-nav .nav-link::after {
835
+ content: '';
836
+ position: absolute;
837
+ bottom: 0;
838
+ left: 50%;
839
+ width: 0;
840
+ height: 2px;
841
+ background: var(--gradient-primary);
842
+ transition: var(--transition);
843
+ transform: translateX(-50%);
844
+ }
845
+
846
+ .navbar-nav .nav-link:hover::after {
847
+ width: 80%;
848
+ }
849
+
850
+ .navbar-nav .nav-link:hover {
851
+ color: var(--primary-color) !important;
852
+ }
853
+
854
+ .navbar-text {
855
+ color: var(--text-muted) !important;
856
+ font-weight: 500;
857
+ }
858
+
859
+ /* Enhanced Footer */
860
+ .footer {
861
+ background: linear-gradient(135deg, var(--dark-color) 0%, #2d3748 100%);
862
+ color: white;
863
+ padding: 3rem 0 2rem;
864
+ margin-top: 6rem;
865
+ position: relative;
866
+ overflow: hidden;
867
+ }
868
+
869
+ .footer::before {
870
+ content: '';
871
+ position: absolute;
872
+ top: 0;
873
+ left: 0;
874
+ right: 0;
875
+ bottom: 0;
876
+ background: url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100"><defs><pattern id="footer-pattern" width="20" height="20" patternUnits="userSpaceOnUse"><circle cx="10" cy="10" r="0.5" fill="white" opacity="0.1"/></pattern></defs><rect width="100" height="100" fill="url(%23footer-pattern)"/></svg>');
877
+ }
878
+
879
+ .footer h5 {
880
+ color: white;
881
+ font-weight: 700;
882
+ margin-bottom: 1rem;
883
+ }
884
+
885
+ .footer p, .footer a {
886
+ color: rgba(255, 255, 255, 0.8);
887
+ transition: var(--transition);
888
+ }
889
+
890
+ .footer a:hover {
891
+ color: white;
892
+ text-decoration: none;
893
+ }
894
+
895
+ /* Enhanced Responsive Design */
896
+ @media (max-width: 1200px) {
897
+ .hero-section {
898
+ padding: 4rem 0;
899
+ }
900
+
901
+ .floating-icon-container {
902
+ width: 250px;
903
+ height: 250px;
904
+ }
905
+
906
+ .floating-icon {
907
+ width: 50px;
908
+ height: 50px;
909
+ font-size: 1.25rem;
910
+ }
911
+
912
+ .hero-main-icon {
913
+ width: 100px;
914
+ height: 100px;
915
+ font-size: 2.5rem;
916
+ }
917
+ }
918
+
919
+ @media (max-width: 992px) {
920
+ .hero-section {
921
+ padding: 3rem 0;
922
+ min-height: auto;
923
+ }
924
+
925
+ .display-3 {
926
+ font-size: 2.5rem;
927
+ }
928
+
929
+ .features-section, .stats-section, .quick-start-section,
930
+ .use-cases-section, .tech-specs-section, .faq-section,
931
+ .final-cta-section {
932
+ padding: 4rem 0;
933
+ }
934
+
935
+ .floating-icon-container {
936
+ display: none;
937
+ }
938
+
939
+ .hero-visual {
940
+ margin-top: 2rem;
941
+ }
942
+ }
943
+
944
+ @media (max-width: 768px) {
945
+ .hero-section {
946
+ padding: 2rem 0;
947
+ text-align: center;
948
+ }
949
+
950
+ .display-3 {
951
+ font-size: 2rem;
952
+ }
953
+
954
+ .lead {
955
+ font-size: 1rem;
956
+ }
957
+
958
+ .btn-lg {
959
+ padding: 0.75rem 1.5rem;
960
+ font-size: 1rem;
961
+ width: 100%;
962
+ margin-bottom: 1rem;
963
+ }
964
+
965
+ .hero-stats .col-4 {
966
+ margin-bottom: 1rem;
967
+ }
968
+
969
+ .stat-item h3 {
970
+ font-size: 2rem;
971
+ }
972
+
973
+ .features-section, .stats-section, .quick-start-section,
974
+ .use-cases-section, .tech-specs-section, .faq-section,
975
+ .final-cta-section {
976
+ padding: 3rem 0;
977
+ }
978
+
979
+ .feature-card-enhanced, .use-case-card, .tech-spec-card {
980
+ margin-bottom: 2rem;
981
+ }
982
+
983
+ .code-card {
984
+ margin-bottom: 1.5rem;
985
+ }
986
+
987
+ .code-header {
988
+ flex-direction: column;
989
+ gap: 1rem;
990
+ text-align: center;
991
+ }
992
+
993
+ .quick-start-cta {
994
+ padding: 2rem 1rem;
995
+ }
996
+
997
+ .cta-buttons .btn {
998
+ width: 100%;
999
+ margin-bottom: 1rem;
1000
+ }
1001
+
1002
+ .navbar-nav {
1003
+ text-align: center;
1004
+ padding: 1rem 0;
1005
+ }
1006
+
1007
+ .toc {
1008
+ position: static;
1009
+ margin-bottom: 2rem;
1010
+ max-height: none;
1011
+ }
1012
+ }
1013
+
1014
+ @media (max-width: 576px) {
1015
+ .container {
1016
+ padding-left: 1rem;
1017
+ padding-right: 1rem;
1018
+ }
1019
+
1020
+ .hero-section {
1021
+ padding: 1.5rem 0;
1022
+ }
1023
+
1024
+ .display-3 {
1025
+ font-size: 1.75rem;
1026
+ }
1027
+
1028
+ .card-body {
1029
+ padding: 1.5rem;
1030
+ }
1031
+
1032
+ .feature-card-enhanced, .use-case-card, .tech-spec-card {
1033
+ padding: 1.5rem;
1034
+ }
1035
+
1036
+ .stat-number {
1037
+ font-size: 2.5rem;
1038
+ }
1039
+
1040
+ .hero-main-icon {
1041
+ width: 80px;
1042
+ height: 80px;
1043
+ font-size: 2rem;
1044
+ }
1045
+
1046
+ .pulse-ring {
1047
+ width: 100px;
1048
+ height: 100px;
1049
+ }
1050
+ }
1051
+
1052
+ /* Enhanced Accessibility */
1053
+ .btn:focus,
1054
+ .form-control:focus,
1055
+ .form-select:focus,
1056
+ .form-check-input:focus {
1057
+ outline: 3px solid rgba(99, 102, 241, 0.3);
1058
+ outline-offset: 2px;
1059
+ }
1060
+
1061
+ .btn:focus-visible,
1062
+ .form-control:focus-visible,
1063
+ .form-select:focus-visible {
1064
+ outline: 3px solid var(--primary-color);
1065
+ outline-offset: 2px;
1066
+ }
1067
+
1068
+ /* Skip to content link for screen readers */
1069
+ .skip-link {
1070
+ position: absolute;
1071
+ top: -40px;
1072
+ left: 6px;
1073
+ background: var(--primary-color);
1074
+ color: white;
1075
+ padding: 8px;
1076
+ text-decoration: none;
1077
+ border-radius: 4px;
1078
+ z-index: 1000;
1079
+ }
1080
+
1081
+ .skip-link:focus {
1082
+ top: 6px;
1083
+ }
1084
+
1085
+ /* Enhanced Animation Classes */
1086
+ .fade-in {
1087
+ animation: fadeIn 0.6s cubic-bezier(0.4, 0, 0.2, 1);
1088
+ }
1089
+
1090
+ @keyframes fadeIn {
1091
+ from {
1092
+ opacity: 0;
1093
+ transform: translateY(10px);
1094
+ }
1095
+ to {
1096
+ opacity: 1;
1097
+ transform: translateY(0);
1098
+ }
1099
+ }
1100
+
1101
+ .slide-up {
1102
+ animation: slideUp 0.6s cubic-bezier(0.4, 0, 0.2, 1);
1103
+ }
1104
+
1105
+ @keyframes slideUp {
1106
+ from {
1107
+ opacity: 0;
1108
+ transform: translateY(30px);
1109
+ }
1110
+ to {
1111
+ opacity: 1;
1112
+ transform: translateY(0);
1113
+ }
1114
+ }
1115
+
1116
+ .scale-in {
1117
+ animation: scaleIn 0.5s cubic-bezier(0.4, 0, 0.2, 1);
1118
+ }
1119
+
1120
+ @keyframes scaleIn {
1121
+ from {
1122
+ opacity: 0;
1123
+ transform: scale(0.9);
1124
+ }
1125
+ to {
1126
+ opacity: 1;
1127
+ transform: scale(1);
1128
+ }
1129
+ }
1130
+
1131
+ /* Enhanced Utility Classes */
1132
+ .text-gradient {
1133
+ background: var(--gradient-primary);
1134
+ -webkit-background-clip: text;
1135
+ -webkit-text-fill-color: transparent;
1136
+ background-clip: text;
1137
+ }
1138
+
1139
+ .text-gradient-secondary {
1140
+ background: var(--gradient-secondary);
1141
+ -webkit-background-clip: text;
1142
+ -webkit-text-fill-color: transparent;
1143
+ background-clip: text;
1144
+ }
1145
+
1146
+ .shadow-custom {
1147
+ box-shadow: var(--box-shadow);
1148
+ }
1149
+
1150
+ .shadow-lg-custom {
1151
+ box-shadow: var(--box-shadow-lg);
1152
+ }
1153
+
1154
+ .shadow-xl-custom {
1155
+ box-shadow: var(--box-shadow-xl);
1156
+ }
1157
+
1158
+ .border-radius-custom {
1159
+ border-radius: var(--border-radius);
1160
+ }
1161
+
1162
+ .bg-gradient-primary {
1163
+ background: var(--gradient-primary);
1164
+ }
1165
+
1166
+ .bg-gradient-secondary {
1167
+ background: var(--gradient-secondary);
1168
+ }
1169
+
1170
+ .bg-gradient-accent {
1171
+ background: var(--gradient-accent);
1172
+ }
1173
+
1174
+ /* Enhanced Progress Indicators */
1175
+ .progress-custom {
1176
+ height: 10px;
1177
+ border-radius: var(--border-radius-sm);
1178
+ background-color: #e2e8f0;
1179
+ overflow: hidden;
1180
+ box-shadow: inset 0 1px 3px rgba(0, 0, 0, 0.1);
1181
+ }
1182
+
1183
+ .progress-bar-custom {
1184
+ height: 100%;
1185
+ background: var(--gradient-primary);
1186
+ transition: width 0.6s cubic-bezier(0.4, 0, 0.2, 1);
1187
+ position: relative;
1188
+ overflow: hidden;
1189
+ }
1190
+
1191
+ .progress-bar-custom::after {
1192
+ content: '';
1193
+ position: absolute;
1194
+ top: 0;
1195
+ left: 0;
1196
+ right: 0;
1197
+ bottom: 0;
1198
+ background: linear-gradient(90deg, transparent, rgba(255,255,255,0.3), transparent);
1199
+ animation: progress-shimmer 2s infinite;
1200
+ }
1201
+
1202
+ @keyframes progress-shimmer {
1203
+ 0% { transform: translateX(-100%); }
1204
+ 100% { transform: translateX(100%); }
1205
+ }
1206
+
1207
+ /* Enhanced Tooltip */
1208
+ .tooltip-inner {
1209
+ background-color: var(--dark-color);
1210
+ border-radius: var(--border-radius-sm);
1211
+ font-size: 0.875rem;
1212
+ padding: 0.5rem 0.75rem;
1213
+ box-shadow: var(--box-shadow);
1214
+ }
1215
+
1216
+ /* Enhanced Custom Scrollbar */
1217
+ ::-webkit-scrollbar {
1218
+ width: 10px;
1219
+ height: 10px;
1220
+ }
1221
+
1222
+ ::-webkit-scrollbar-track {
1223
+ background: var(--light-gray);
1224
+ border-radius: var(--border-radius-sm);
1225
+ }
1226
+
1227
+ ::-webkit-scrollbar-thumb {
1228
+ background: var(--gradient-primary);
1229
+ border-radius: var(--border-radius-sm);
1230
+ border: 2px solid var(--light-gray);
1231
+ }
1232
+
1233
+ ::-webkit-scrollbar-thumb:hover {
1234
+ background: var(--gradient-secondary);
1235
+ }
1236
+
1237
+ ::-webkit-scrollbar-corner {
1238
+ background: var(--light-gray);
1239
+ }
1240
+
1241
+ /* Print Styles */
1242
+ @media print {
1243
+ .navbar, .footer, .hero-scroll-indicator, .floating-icon-container {
1244
+ display: none !important;
1245
+ }
1246
+
1247
+ .hero-section {
1248
+ background: white !important;
1249
+ color: black !important;
1250
+ padding: 1rem 0 !important;
1251
+ }
1252
+
1253
+ .card {
1254
+ box-shadow: none !important;
1255
+ border: 1px solid #ddd !important;
1256
+ }
1257
+
1258
+ .btn {
1259
+ border: 1px solid #ddd !important;
1260
+ background: white !important;
1261
+ color: black !important;
1262
+ }
1263
+ }
1264
+
1265
+ /* Playground-Specific Styles */
1266
+ .playground-visual {
1267
+ position: relative;
1268
+ display: flex;
1269
+ justify-content: center;
1270
+ align-items: center;
1271
+ height: 200px;
1272
+ }
1273
+
1274
+ .playground-icon {
1275
+ width: 100px;
1276
+ height: 100px;
1277
+ background: rgba(255, 255, 255, 0.15);
1278
+ border-radius: 50%;
1279
+ display: flex;
1280
+ align-items: center;
1281
+ justify-content: center;
1282
+ font-size: 2.5rem;
1283
+ color: white;
1284
+ backdrop-filter: blur(20px);
1285
+ border: 2px solid rgba(255, 255, 255, 0.3);
1286
+ position: relative;
1287
+ }
1288
+
1289
+ .audio-player-container {
1290
+ border: 2px solid #e2e8f0;
1291
+ transition: var(--transition);
1292
+ }
1293
+
1294
+ .audio-player-container:hover {
1295
+ border-color: var(--primary-color);
1296
+ box-shadow: 0 0 0 3px rgba(99, 102, 241, 0.1);
1297
+ }
1298
+
1299
+ .stat-item {
1300
+ padding: 1rem;
1301
+ text-align: center;
1302
+ }
1303
+
1304
+ .stat-item i {
1305
+ font-size: 1.5rem;
1306
+ margin-bottom: 0.5rem;
1307
+ display: block;
1308
+ }
1309
+
1310
+ .stat-value {
1311
+ font-size: 1.25rem;
1312
+ font-weight: 700;
1313
+ color: var(--dark-color);
1314
+ margin-bottom: 0.25rem;
1315
+ }
1316
+
1317
+ .stat-label {
1318
+ font-size: 0.875rem;
1319
+ color: var(--text-muted);
1320
+ font-weight: 500;
1321
+ }
1322
+
1323
+ .card-header {
1324
+ border-bottom: none;
1325
+ border-radius: var(--border-radius) var(--border-radius) 0 0 !important;
1326
+ }
1327
+
1328
+ /* Enhanced Form Controls for Playground */
1329
+ .playground .form-control,
1330
+ .playground .form-select {
1331
+ border: 2px solid #e2e8f0;
1332
+ border-radius: var(--border-radius-sm);
1333
+ padding: 1rem;
1334
+ font-size: 1rem;
1335
+ transition: var(--transition);
1336
+ }
1337
+
1338
+ .playground .form-control:focus,
1339
+ .playground .form-select:focus {
1340
+ border-color: var(--primary-color);
1341
+ box-shadow: 0 0 0 4px rgba(99, 102, 241, 0.1);
1342
+ transform: translateY(-1px);
1343
+ }
1344
+
1345
+ .playground .btn-group .btn {
1346
+ border-radius: var(--border-radius-sm);
1347
+ }
1348
+
1349
+ .playground .btn-group .btn:first-child {
1350
+ border-top-right-radius: 0;
1351
+ border-bottom-right-radius: 0;
1352
+ }
1353
+
1354
+ .playground .btn-group .btn:last-child {
1355
+ border-top-left-radius: 0;
1356
+ border-bottom-left-radius: 0;
1357
+ }
1358
+
1359
+ /* Audio Player Enhancements */
1360
+ audio::-webkit-media-controls-panel {
1361
+ background-color: var(--light-gray);
1362
+ border-radius: var(--border-radius-sm);
1363
+ }
1364
+
1365
+ audio::-webkit-media-controls-play-button,
1366
+ audio::-webkit-media-controls-pause-button {
1367
+ background-color: var(--primary-color);
1368
+ border-radius: 50%;
1369
+ }
1370
+
1371
+ audio::-webkit-media-controls-timeline {
1372
+ background-color: var(--light-gray);
1373
+ border-radius: var(--border-radius-sm);
1374
+ }
1375
+
1376
+ audio::-webkit-media-controls-current-time-display,
1377
+ audio::-webkit-media-controls-time-remaining-display {
1378
+ color: var(--text-color);
1379
+ font-weight: 500;
1380
+ }
1381
+
1382
+ /* Reduced Motion Support */
1383
+ @media (prefers-reduced-motion: reduce) {
1384
+ *,
1385
+ *::before,
1386
+ *::after {
1387
+ animation-duration: 0.01ms !important;
1388
+ animation-iteration-count: 1 !important;
1389
+ transition-duration: 0.01ms !important;
1390
+ }
1391
+
1392
+ .hero-background-animation,
1393
+ .floating-icon,
1394
+ .pulse-ring,
1395
+ .hero-scroll-indicator,
1396
+ .playground-icon {
1397
+ animation: none !important;
1398
+ }
1399
+ }
ttsfm-web/static/js/i18n.js ADDED
@@ -0,0 +1,221 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // JavaScript Internationalization Support for TTSFM
2
+
3
+ // Translation data - this will be populated by the server
4
+ window.i18nData = window.i18nData || {};
5
+
6
+ // Current locale
7
+ window.currentLocale = document.documentElement.lang || 'en';
8
+
9
+ // Translation function
10
+ function _(key, params = {}) {
11
+ const keys = key.split('.');
12
+ let value = window.i18nData;
13
+
14
+ // Navigate through the nested object
15
+ for (const k of keys) {
16
+ if (value && typeof value === 'object' && k in value) {
17
+ value = value[k];
18
+ } else {
19
+ // Fallback to key if translation not found
20
+ return key;
21
+ }
22
+ }
23
+
24
+ // If we found a string, apply parameters
25
+ if (typeof value === 'string') {
26
+ return formatString(value, params);
27
+ }
28
+
29
+ // Fallback to key
30
+ return key;
31
+ }
32
+
33
+ // Format string with parameters
34
+ function formatString(str, params) {
35
+ return str.replace(/\{(\w+)\}/g, (match, key) => {
36
+ return params.hasOwnProperty(key) ? params[key] : match;
37
+ });
38
+ }
39
+
40
+ // Load translations from server
41
+ async function loadTranslations() {
42
+ try {
43
+ const response = await fetch(`/api/translations/${window.currentLocale}`);
44
+ if (response.ok) {
45
+ window.i18nData = await response.json();
46
+ }
47
+ } catch (error) {
48
+ console.warn('Failed to load translations:', error);
49
+ }
50
+ }
51
+
52
+ // Sample texts for different languages
53
+ const sampleTexts = {
54
+ en: {
55
+ welcome: "Welcome to TTSFM! This is a free text-to-speech service that converts your text into high-quality audio using advanced AI technology.",
56
+ story: "Once upon a time, in a digital world far away, there lived a small Python package that could transform any text into beautiful speech. This package was called TTSFM, and it brought joy to developers everywhere.",
57
+ technical: "TTSFM is a Python client for text-to-speech APIs that provides both synchronous and asynchronous interfaces. It supports multiple voices and audio formats, making it perfect for various applications.",
58
+ multilingual: "TTSFM supports multiple languages and voices, allowing you to create diverse audio content for global audiences. The service is completely free and requires no API keys.",
59
+ long: "This is a longer text sample designed to test the auto-combine feature of TTSFM. When text exceeds the maximum length limit, TTSFM automatically splits it into smaller chunks, generates audio for each chunk, and then seamlessly combines them into a single audio file. This process is completely transparent to the user and ensures that you can convert text of any length without worrying about technical limitations. The resulting audio maintains consistent quality and natural flow throughout the entire content."
60
+ },
61
+ zh: {
62
+ welcome: "欢迎使用TTSFM!这是一个免费的文本转语音服务,使用先进的AI技术将您的文本转换为高质量音频。",
63
+ story: "很久很久以前,在一个遥远的数字世界里,住着一个小小的Python包,它能够将任何文本转换成美妙的语音。这个包叫做TTSFM,它为世界各地的开发者带来了快乐。",
64
+ technical: "TTSFM是一个用于文本转语音API的Python客户端,提供同步和异步接口。它支持多种声音和音频格式,非常适合各种应用。",
65
+ multilingual: "TTSFM支持多种语言和声音,让您能够为全球受众创建多样化的音频内容。该服务完全免费,无需API密钥。",
66
+ long: "这是一个较长的文本示例,用于测试TTSFM的自动合并功能。当文本超过最大长度限制时,TTSFM会自动将其分割成较小的片段,为每个片段生成音频,然后无缝地将它们合并成一个音频文件。这个过程对用户完全透明,确保您可以转换任何长度的文本,而无需担心技术限制。生成的音频在整个内容中保持一致的质量和自然的流畅性。"
67
+ }
68
+ };
69
+
70
+ // Get sample text for current locale
71
+ function getSampleText(type) {
72
+ const locale = window.currentLocale;
73
+ const texts = sampleTexts[locale] || sampleTexts.en;
74
+ return texts[type] || texts.welcome;
75
+ }
76
+
77
+ // Error messages
78
+ const errorMessages = {
79
+ en: {
80
+ empty_text: "Please enter some text to convert.",
81
+ generation_failed: "Failed to generate speech. Please try again.",
82
+ network_error: "Network error. Please check your connection and try again.",
83
+ invalid_format: "Invalid audio format selected.",
84
+ invalid_voice: "Invalid voice selected.",
85
+ text_too_long: "Text is too long. Please reduce the length or enable auto-combine.",
86
+ server_error: "Server error. Please try again later."
87
+ },
88
+ zh: {
89
+ empty_text: "请输入要转换的文本。",
90
+ generation_failed: "语音生成失败。请重试。",
91
+ network_error: "网络错误。请检查您的连接并重��。",
92
+ invalid_format: "选择的音频格式无效。",
93
+ invalid_voice: "选择的声音无效。",
94
+ text_too_long: "文本太长。请减少长度或启用自动合并。",
95
+ server_error: "服务器错误。请稍后重试。"
96
+ }
97
+ };
98
+
99
+ // Success messages
100
+ const successMessages = {
101
+ en: {
102
+ generation_complete: "Speech generated successfully!",
103
+ text_copied: "Text copied to clipboard!",
104
+ download_started: "Download started!"
105
+ },
106
+ zh: {
107
+ generation_complete: "语音生成成功!",
108
+ text_copied: "文本已复制到剪贴板!",
109
+ download_started: "下载已开始!"
110
+ }
111
+ };
112
+
113
+ // Get error message
114
+ function getErrorMessage(key) {
115
+ const locale = window.currentLocale;
116
+ const messages = errorMessages[locale] || errorMessages.en;
117
+ return messages[key] || key;
118
+ }
119
+
120
+ // Get success message
121
+ function getSuccessMessage(key) {
122
+ const locale = window.currentLocale;
123
+ const messages = successMessages[locale] || successMessages.en;
124
+ return messages[key] || key;
125
+ }
126
+
127
+ // Format file size
128
+ function formatFileSize(bytes) {
129
+ if (bytes === 0) return '0 Bytes';
130
+
131
+ const k = 1024;
132
+ const sizes = window.currentLocale === 'zh'
133
+ ? ['字节', 'KB', 'MB', 'GB']
134
+ : ['Bytes', 'KB', 'MB', 'GB'];
135
+
136
+ const i = Math.floor(Math.log(bytes) / Math.log(k));
137
+ return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i];
138
+ }
139
+
140
+ // Format duration
141
+ function formatDuration(seconds) {
142
+ if (isNaN(seconds) || seconds < 0) {
143
+ return window.currentLocale === 'zh' ? '未知' : 'Unknown';
144
+ }
145
+
146
+ const minutes = Math.floor(seconds / 60);
147
+ const remainingSeconds = Math.floor(seconds % 60);
148
+
149
+ if (minutes > 0) {
150
+ return window.currentLocale === 'zh'
151
+ ? `${minutes}分${remainingSeconds}秒`
152
+ : `${minutes}m ${remainingSeconds}s`;
153
+ } else {
154
+ return window.currentLocale === 'zh'
155
+ ? `${remainingSeconds}秒`
156
+ : `${remainingSeconds}s`;
157
+ }
158
+ }
159
+
160
+ // Update UI text based on current locale
161
+ function updateUIText() {
162
+ // Update button texts
163
+ const generateBtn = document.getElementById('generate-btn');
164
+ if (generateBtn && !generateBtn.disabled) {
165
+ generateBtn.innerHTML = window.currentLocale === 'zh'
166
+ ? '<i class="fas fa-magic me-2"></i>生成语音'
167
+ : '<i class="fas fa-magic me-2"></i>Generate Speech';
168
+ }
169
+
170
+ // Update other dynamic text elements
171
+ const charCountElement = document.querySelector('#char-count');
172
+ if (charCountElement) {
173
+ const count = charCountElement.textContent;
174
+ const parent = charCountElement.parentElement;
175
+ if (parent) {
176
+ // Escape HTML characters to prevent XSS
177
+ const escapedCount = count.replace(/&/g, '&amp;')
178
+ .replace(/</g, '&lt;')
179
+ .replace(/>/g, '&gt;')
180
+ .replace(/"/g, '&quot;')
181
+ .replace(/'/g, '&#x27;');
182
+
183
+ parent.innerHTML = window.currentLocale === 'zh'
184
+ ? `<i class="fas fa-keyboard me-1"></i><span id="char-count">${escapedCount}</span> 字符`
185
+ : `<i class="fas fa-keyboard me-1"></i><span id="char-count">${escapedCount}</span> characters`;
186
+ }
187
+ }
188
+ }
189
+
190
+ // Initialize i18n
191
+ function initI18n() {
192
+ // Load translations if needed
193
+ loadTranslations();
194
+
195
+ // Update UI text
196
+ updateUIText();
197
+
198
+ // Listen for language changes
199
+ document.addEventListener('languageChanged', function(event) {
200
+ window.currentLocale = event.detail.locale;
201
+ loadTranslations().then(() => {
202
+ updateUIText();
203
+ });
204
+ });
205
+ }
206
+
207
+ // Export functions for global use
208
+ window._ = _;
209
+ window.getSampleText = getSampleText;
210
+ window.getErrorMessage = getErrorMessage;
211
+ window.getSuccessMessage = getSuccessMessage;
212
+ window.formatFileSize = formatFileSize;
213
+ window.formatDuration = formatDuration;
214
+ window.initI18n = initI18n;
215
+
216
+ // Auto-initialize when DOM is ready
217
+ if (document.readyState === 'loading') {
218
+ document.addEventListener('DOMContentLoaded', initI18n);
219
+ } else {
220
+ initI18n();
221
+ }
ttsfm-web/static/js/playground-enhanced-fixed.js ADDED
@@ -0,0 +1,712 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // TTSFM Enhanced Playground with WebSocket Streaming Support - Fixed Version
2
+
3
+ // Global variables
4
+ let currentAudioBlob = null;
5
+ let currentFormat = 'mp3';
6
+ let batchResults = [];
7
+ let wsClient = null;
8
+ let streamingMode = false;
9
+ let currentStreamRequest = null;
10
+
11
+ // Initialize playground
12
+ document.addEventListener('DOMContentLoaded', function() {
13
+ initializePlayground();
14
+ initializeWebSocket();
15
+ });
16
+
17
+ // Initialize WebSocket client
18
+ function initializeWebSocket() {
19
+ // Check if Socket.IO is available
20
+ if (typeof io === 'undefined') {
21
+ console.warn('Socket.IO not loaded. WebSocket streaming will be disabled.');
22
+ return;
23
+ }
24
+
25
+ // Initialize WebSocket client
26
+ wsClient = new WebSocketTTSClient({
27
+ socketUrl: window.location.origin,
28
+ debug: true,
29
+ onConnect: () => {
30
+ console.log('WebSocket connected');
31
+ updateStreamingStatus('connected');
32
+ },
33
+ onDisconnect: () => {
34
+ console.log('WebSocket disconnected');
35
+ updateStreamingStatus('disconnected');
36
+ },
37
+ onError: (error) => {
38
+ console.error('WebSocket error:', error);
39
+ updateStreamingStatus('error');
40
+ }
41
+ });
42
+ }
43
+
44
+ // Update streaming status indicator
45
+ function updateStreamingStatus(status) {
46
+ const indicator = document.getElementById('streaming-indicator');
47
+ if (!indicator) return;
48
+
49
+ indicator.className = 'streaming-status';
50
+ switch(status) {
51
+ case 'connected':
52
+ indicator.classList.add('connected');
53
+ indicator.innerHTML = '<i class="fas fa-bolt"></i> Streaming Ready';
54
+ enableStreamingMode(true);
55
+ break;
56
+ case 'disconnected':
57
+ indicator.classList.add('disconnected');
58
+ indicator.innerHTML = '<i class="fas fa-plug"></i> Streaming Offline';
59
+ enableStreamingMode(false);
60
+ break;
61
+ case 'error':
62
+ indicator.classList.add('error');
63
+ indicator.innerHTML = '<i class="fas fa-exclamation-triangle"></i> Connection Error';
64
+ enableStreamingMode(false);
65
+ break;
66
+ case 'streaming':
67
+ indicator.classList.add('streaming');
68
+ indicator.innerHTML = '<i class="fas fa-stream"></i> Streaming...';
69
+ break;
70
+ }
71
+ }
72
+
73
+ // Enable/disable streaming mode
74
+ function enableStreamingMode(enabled) {
75
+ const streamToggle = document.getElementById('stream-mode-toggle');
76
+ if (streamToggle) {
77
+ streamToggle.disabled = !enabled;
78
+ if (!enabled && streamingMode) {
79
+ streamingMode = false;
80
+ streamToggle.checked = false;
81
+ }
82
+ }
83
+ }
84
+
85
+ // Check authentication status
86
+ async function checkAuthStatus() {
87
+ try {
88
+ const response = await fetch('/api/auth-status');
89
+ const data = await response.json();
90
+
91
+ const apiKeySection = document.getElementById('api-key-section');
92
+ if (apiKeySection) {
93
+ if (data.api_key_required) {
94
+ apiKeySection.style.display = 'block';
95
+ const apiKeyInput = document.getElementById('api-key-input');
96
+ if (apiKeyInput) {
97
+ apiKeyInput.required = true;
98
+ }
99
+ } else {
100
+ apiKeySection.style.display = 'none';
101
+ }
102
+ }
103
+ } catch (error) {
104
+ console.warn('Could not check auth status:', error);
105
+ }
106
+ }
107
+
108
+ function initializePlayground() {
109
+ console.log('Initializing enhanced playground...');
110
+ checkAuthStatus();
111
+ loadVoices();
112
+ loadFormats();
113
+ updateCharCount();
114
+ setupEventListeners();
115
+ setupStreamingControls();
116
+ console.log('Enhanced playground initialization complete');
117
+ }
118
+
119
+ function setupStreamingControls() {
120
+ // Add streaming mode toggle
121
+ const generateButton = document.getElementById('generate-btn');
122
+ if (generateButton && generateButton.parentElement) {
123
+ const streamingControls = document.createElement('div');
124
+ streamingControls.className = 'streaming-controls mt-3';
125
+ streamingControls.innerHTML = `
126
+ <div class="form-check form-switch">
127
+ <input class="form-check-input" type="checkbox" id="stream-mode-toggle" disabled>
128
+ <label class="form-check-label" for="stream-mode-toggle">
129
+ <i class="fas fa-bolt me-1"></i>
130
+ Enable WebSocket Streaming
131
+ <small class="text-muted">(Real-time audio chunks)</small>
132
+ </label>
133
+ </div>
134
+ <div id="streaming-indicator" class="streaming-status mt-2"></div>
135
+ `;
136
+ generateButton.parentElement.appendChild(streamingControls);
137
+
138
+ // Add toggle event listener
139
+ const toggle = document.getElementById('stream-mode-toggle');
140
+ if (toggle) {
141
+ toggle.addEventListener('change', (e) => {
142
+ streamingMode = e.target.checked;
143
+ console.log('Streaming mode:', streamingMode ? 'ON' : 'OFF');
144
+
145
+ // Update button text
146
+ const btnText = generateButton.querySelector('.btn-text');
147
+ if (btnText) {
148
+ if (streamingMode) {
149
+ btnText.innerHTML = '<i class="fas fa-bolt me-2"></i>Stream Speech';
150
+ } else {
151
+ btnText.innerHTML = '<i class="fas fa-magic me-2"></i>' +
152
+ (window.currentLocale === 'zh' ? '生成语音' : 'Generate Speech');
153
+ }
154
+ }
155
+ });
156
+ }
157
+ }
158
+
159
+ // Add streaming progress section and error message div
160
+ const audioResult = document.getElementById('audio-result');
161
+ if (audioResult && audioResult.parentElement) {
162
+ // Add error message div
163
+ const errorDiv = document.createElement('div');
164
+ errorDiv.id = 'error-message';
165
+ errorDiv.className = 'alert alert-danger';
166
+ errorDiv.style.display = 'none';
167
+ audioResult.parentElement.insertBefore(errorDiv, audioResult);
168
+
169
+ // Add loading section
170
+ const loadingDiv = document.createElement('div');
171
+ loadingDiv.id = 'loading-section';
172
+ loadingDiv.className = 'text-center';
173
+ loadingDiv.style.display = 'none';
174
+ loadingDiv.innerHTML = `
175
+ <div class="spinner-border text-primary" role="status">
176
+ <span class="visually-hidden">Loading...</span>
177
+ </div>
178
+ <p class="mt-2">Generating speech...</p>
179
+ `;
180
+ audioResult.parentElement.insertBefore(loadingDiv, audioResult);
181
+
182
+ // Add progress section
183
+ const progressSection = document.createElement('div');
184
+ progressSection.id = 'streaming-progress';
185
+ progressSection.className = 'streaming-progress-section';
186
+ progressSection.style.display = 'none';
187
+ progressSection.innerHTML = `
188
+ <div class="card border-primary">
189
+ <div class="card-body">
190
+ <h5 class="card-title">
191
+ <i class="fas fa-stream me-2"></i>Streaming Progress
192
+ </h5>
193
+ <div class="progress mb-3" style="height: 25px;">
194
+ <div class="progress-bar progress-bar-striped progress-bar-animated"
195
+ id="stream-progress-bar"
196
+ role="progressbar"
197
+ style="width: 0%">
198
+ <span id="stream-progress-text">0%</span>
199
+ </div>
200
+ </div>
201
+ <div class="row text-center">
202
+ <div class="col-md-4">
203
+ <h6>Chunks</h6>
204
+ <p class="h5"><span id="chunks-count">0</span> / <span id="total-chunks">0</span></p>
205
+ </div>
206
+ <div class="col-md-4">
207
+ <h6>Data</h6>
208
+ <p class="h5" id="data-transferred">0 KB</p>
209
+ </div>
210
+ <div class="col-md-4">
211
+ <h6>Time</h6>
212
+ <p class="h5" id="stream-time">0.0s</p>
213
+ </div>
214
+ </div>
215
+ <div id="chunks-visualization" class="chunks-visual mt-3"></div>
216
+ </div>
217
+ </div>
218
+ `;
219
+ audioResult.parentElement.insertBefore(progressSection, audioResult);
220
+ }
221
+ }
222
+
223
+ function setupEventListeners() {
224
+ console.log('Setting up event listeners...');
225
+
226
+ // Form and input events
227
+ const textInput = document.getElementById('text-input');
228
+ if (textInput) {
229
+ textInput.addEventListener('input', updateCharCount);
230
+ }
231
+
232
+ // Form submit
233
+ const form = document.getElementById('tts-form');
234
+ if (form) {
235
+ form.addEventListener('submit', function(event) {
236
+ event.preventDefault();
237
+ event.stopPropagation();
238
+
239
+ if (streamingMode && wsClient && wsClient.isConnected()) {
240
+ generateSpeechStreaming(event);
241
+ } else {
242
+ generateSpeech(event);
243
+ }
244
+
245
+ return false;
246
+ });
247
+ }
248
+
249
+ // Download button
250
+ const downloadBtn = document.getElementById('download-btn');
251
+ if (downloadBtn) {
252
+ downloadBtn.addEventListener('click', downloadAudio);
253
+ }
254
+ }
255
+
256
+ // Generate speech using WebSocket streaming
257
+ async function generateSpeechStreaming(event) {
258
+ event.preventDefault();
259
+
260
+ const text = document.getElementById('text-input').value.trim();
261
+ const voice = document.getElementById('voice-select').value;
262
+ const format = document.getElementById('format-select').value;
263
+
264
+ if (!text) {
265
+ showError('Please enter some text to convert');
266
+ return;
267
+ }
268
+
269
+ // Reset UI
270
+ hideError();
271
+ hideResults();
272
+ disableForm();
273
+
274
+ // Show streaming progress
275
+ const progressSection = document.getElementById('streaming-progress');
276
+ if (progressSection) progressSection.style.display = 'block';
277
+
278
+ // Reset progress
279
+ updateStreamingProgress(0, 0, 0);
280
+ const chunksViz = document.getElementById('chunks-visualization');
281
+ if (chunksViz) chunksViz.innerHTML = '';
282
+
283
+ // Update status
284
+ updateStreamingStatus('streaming');
285
+
286
+ const startTime = Date.now();
287
+ let audioChunks = [];
288
+
289
+ try {
290
+ const result = await wsClient.generateSpeech(text, {
291
+ voice: voice,
292
+ format: format,
293
+ chunkSize: 512,
294
+ onStart: (data) => {
295
+ currentStreamRequest = data.request_id;
296
+ console.log('Streaming started:', data);
297
+ },
298
+ onProgress: (progress) => {
299
+ updateStreamingProgress(
300
+ progress.progress,
301
+ progress.chunksCompleted,
302
+ progress.totalChunks
303
+ );
304
+
305
+ const elapsed = (Date.now() - startTime) / 1000;
306
+ const timeEl = document.getElementById('stream-time');
307
+ if (timeEl) timeEl.textContent = `${elapsed.toFixed(1)}s`;
308
+ },
309
+ onChunk: (chunk) => {
310
+ // Visualize chunk
311
+ const chunksViz = document.getElementById('chunks-visualization');
312
+ if (chunksViz) {
313
+ const chunkViz = document.createElement('div');
314
+ chunkViz.className = 'chunk-indicator';
315
+ chunkViz.title = `Chunk ${chunk.chunkIndex + 1} - ${(chunk.audioData.byteLength / 1024).toFixed(1)}KB`;
316
+ chunkViz.innerHTML = `<i class="fas fa-music"></i>`;
317
+ chunksViz.appendChild(chunkViz);
318
+ }
319
+
320
+ // Update data transferred
321
+ const dataEl = document.getElementById('data-transferred');
322
+ if (dataEl) {
323
+ const currentData = parseFloat(dataEl.textContent) || 0;
324
+ const newData = currentData + (chunk.audioData.byteLength / 1024);
325
+ dataEl.textContent = `${newData.toFixed(1)} KB`;
326
+ }
327
+
328
+ audioChunks.push(chunk);
329
+ },
330
+ onComplete: (result) => {
331
+ console.log('Streaming complete:', result);
332
+
333
+ // Create blob from audio data
334
+ currentAudioBlob = new Blob([result.audioData], { type: `audio/${result.format}` });
335
+ currentFormat = result.format;
336
+
337
+ // Show results
338
+ showResults(currentAudioBlob, result.format);
339
+
340
+ // Update final stats
341
+ const totalTime = (Date.now() - startTime) / 1000;
342
+ showStreamingStats({
343
+ chunks: result.chunks.length,
344
+ totalSize: (result.audioData.byteLength / 1024).toFixed(1),
345
+ totalTime: totalTime.toFixed(2),
346
+ format: result.format
347
+ });
348
+ },
349
+ onError: (error) => {
350
+ showError(`Streaming error: ${error.message}`);
351
+ enableForm();
352
+ if (progressSection) progressSection.style.display = 'none';
353
+ }
354
+ });
355
+
356
+ } catch (error) {
357
+ showError(`Failed to stream speech: ${error.message}`);
358
+ enableForm();
359
+ if (progressSection) progressSection.style.display = 'none';
360
+ } finally {
361
+ updateStreamingStatus('connected');
362
+ currentStreamRequest = null;
363
+ }
364
+ }
365
+
366
+ function updateStreamingProgress(progress, chunks, totalChunks) {
367
+ const progressBar = document.getElementById('stream-progress-bar');
368
+ const progressText = document.getElementById('stream-progress-text');
369
+ const chunksCount = document.getElementById('chunks-count');
370
+ const totalChunksEl = document.getElementById('total-chunks');
371
+
372
+ if (progressBar) {
373
+ progressBar.style.width = `${progress}%`;
374
+ if (progressText) progressText.textContent = `${progress}%`;
375
+ }
376
+ if (chunksCount) chunksCount.textContent = chunks;
377
+ if (totalChunksEl) totalChunksEl.textContent = totalChunks;
378
+ }
379
+
380
+ function showStreamingStats(stats) {
381
+ const progressSection = document.getElementById('streaming-progress');
382
+ if (!progressSection) return;
383
+
384
+ const statsHtml = `
385
+ <div class="alert alert-success mt-3">
386
+ <h6><i class="fas fa-check-circle me-2"></i>Streaming Complete!</h6>
387
+ <div class="row mt-2">
388
+ <div class="col-md-3">
389
+ <strong>Chunks:</strong> ${stats.chunks}
390
+ </div>
391
+ <div class="col-md-3">
392
+ <strong>Total Size:</strong> ${stats.totalSize} KB
393
+ </div>
394
+ <div class="col-md-3">
395
+ <strong>Time:</strong> ${stats.totalTime}s
396
+ </div>
397
+ <div class="col-md-3">
398
+ <strong>Format:</strong> ${stats.format.toUpperCase()}
399
+ </div>
400
+ </div>
401
+ </div>
402
+ `;
403
+
404
+ const statsDiv = document.createElement('div');
405
+ statsDiv.innerHTML = statsHtml;
406
+ progressSection.appendChild(statsDiv);
407
+ }
408
+
409
+ // Load available voices
410
+ async function loadVoices() {
411
+ try {
412
+ const response = await fetch('/api/voices');
413
+ const data = await response.json();
414
+
415
+ const voiceSelect = document.getElementById('voice-select');
416
+ if (voiceSelect) {
417
+ voiceSelect.innerHTML = '';
418
+
419
+ data.voices.forEach(voice => {
420
+ const option = document.createElement('option');
421
+ option.value = voice.id;
422
+ option.textContent = voice.name;
423
+ if (voice.id === 'alloy') {
424
+ option.selected = true;
425
+ }
426
+ voiceSelect.appendChild(option);
427
+ });
428
+ }
429
+ } catch (error) {
430
+ console.error('Failed to load voices:', error);
431
+ }
432
+ }
433
+
434
+ // Load available formats
435
+ async function loadFormats() {
436
+ try {
437
+ const response = await fetch('/api/formats');
438
+ const data = await response.json();
439
+
440
+ const formatSelect = document.getElementById('format-select');
441
+ if (formatSelect) {
442
+ formatSelect.innerHTML = '';
443
+
444
+ data.formats.forEach(format => {
445
+ const option = document.createElement('option');
446
+ option.value = format.id;
447
+ option.textContent = `${format.name} - ${format.quality}`;
448
+ if (format.id === 'mp3') {
449
+ option.selected = true;
450
+ }
451
+ formatSelect.appendChild(option);
452
+ });
453
+ }
454
+ } catch (error) {
455
+ console.error('Failed to load formats:', error);
456
+ }
457
+ }
458
+
459
+ // Update character count
460
+ function updateCharCount() {
461
+ const textInput = document.getElementById('text-input');
462
+ const charCount = document.getElementById('char-count');
463
+ const maxLengthInput = document.getElementById('max-length-input');
464
+
465
+ if (textInput && charCount) {
466
+ const currentLength = textInput.value.length;
467
+ const maxLength = maxLengthInput ? parseInt(maxLengthInput.value) : 4096;
468
+
469
+ charCount.textContent = currentLength;
470
+
471
+ if (currentLength > maxLength) {
472
+ charCount.className = 'text-danger fw-bold';
473
+ } else if (currentLength > maxLength * 0.8) {
474
+ charCount.className = 'text-warning fw-bold';
475
+ } else {
476
+ charCount.className = '';
477
+ }
478
+ }
479
+ }
480
+
481
+ // Generate speech (original HTTP method)
482
+ async function generateSpeech(event) {
483
+ event.preventDefault();
484
+
485
+ const text = document.getElementById('text-input').value.trim();
486
+ const voice = document.getElementById('voice-select').value;
487
+ const format = document.getElementById('format-select').value;
488
+ const instructions = document.getElementById('instructions-input')?.value.trim() || '';
489
+ const apiKey = document.getElementById('api-key-input')?.value.trim() || '';
490
+
491
+ if (!text) {
492
+ showError('Please enter some text to convert');
493
+ return;
494
+ }
495
+
496
+ hideError();
497
+ hideResults();
498
+ showLoading();
499
+ disableForm();
500
+
501
+ try {
502
+ const headers = {
503
+ 'Content-Type': 'application/json'
504
+ };
505
+
506
+ if (apiKey) {
507
+ headers['Authorization'] = `Bearer ${apiKey}`;
508
+ }
509
+
510
+ const requestBody = {
511
+ text: text,
512
+ voice: voice,
513
+ format: format
514
+ };
515
+
516
+ if (instructions) {
517
+ requestBody.instructions = instructions;
518
+ }
519
+
520
+ const response = await fetch('/api/generate', {
521
+ method: 'POST',
522
+ headers: headers,
523
+ body: JSON.stringify(requestBody)
524
+ });
525
+
526
+ if (!response.ok) {
527
+ let errorMessage = `Error: ${response.status} ${response.statusText}`;
528
+ try {
529
+ const errorData = await response.json();
530
+ if (errorData.error?.message) {
531
+ errorMessage = errorData.error.message;
532
+ }
533
+ } catch (e) {
534
+ // Use default error message
535
+ }
536
+ throw new Error(errorMessage);
537
+ }
538
+
539
+ const blob = await response.blob();
540
+ currentAudioBlob = blob;
541
+ currentFormat = format;
542
+
543
+ showResults(blob, format);
544
+
545
+ } catch (error) {
546
+ showError(error.message);
547
+ } finally {
548
+ hideLoading();
549
+ enableForm();
550
+ }
551
+ }
552
+
553
+ // Show/hide functions
554
+ function showLoading() {
555
+ const loading = document.getElementById('loading-section');
556
+ if (loading) loading.style.display = 'block';
557
+ }
558
+
559
+ function hideLoading() {
560
+ const loading = document.getElementById('loading-section');
561
+ if (loading) loading.style.display = 'none';
562
+ }
563
+
564
+ function showResults(blob, format) {
565
+ const audioUrl = URL.createObjectURL(blob);
566
+ const audioPlayer = document.getElementById('audio-player');
567
+ if (audioPlayer) {
568
+ audioPlayer.src = audioUrl;
569
+ }
570
+
571
+ const audioResult = document.getElementById('audio-result');
572
+ if (audioResult) {
573
+ audioResult.classList.remove('d-none');
574
+ }
575
+
576
+ const downloadBtn = document.getElementById('download-btn');
577
+ if (downloadBtn) {
578
+ downloadBtn.disabled = false;
579
+ }
580
+
581
+ enableForm();
582
+ }
583
+
584
+ function hideResults() {
585
+ const audioResult = document.getElementById('audio-result');
586
+ if (audioResult) {
587
+ audioResult.classList.add('d-none');
588
+ }
589
+ }
590
+
591
+ function showError(message) {
592
+ const errorDiv = document.getElementById('error-message');
593
+ if (errorDiv) {
594
+ errorDiv.textContent = message;
595
+ errorDiv.style.display = 'block';
596
+ }
597
+ }
598
+
599
+ function hideError() {
600
+ const errorDiv = document.getElementById('error-message');
601
+ if (errorDiv) {
602
+ errorDiv.style.display = 'none';
603
+ }
604
+ }
605
+
606
+ function disableForm() {
607
+ const elements = ['generate-btn', 'text-input', 'voice-select', 'format-select'];
608
+ elements.forEach(id => {
609
+ const el = document.getElementById(id);
610
+ if (el) el.disabled = true;
611
+ });
612
+ }
613
+
614
+ function enableForm() {
615
+ const elements = ['generate-btn', 'text-input', 'voice-select', 'format-select'];
616
+ elements.forEach(id => {
617
+ const el = document.getElementById(id);
618
+ if (el) el.disabled = false;
619
+ });
620
+ }
621
+
622
+ // Download audio
623
+ function downloadAudio() {
624
+ if (!currentAudioBlob) return;
625
+
626
+ const url = URL.createObjectURL(currentAudioBlob);
627
+ const a = document.createElement('a');
628
+ a.href = url;
629
+ a.download = `tts_${Date.now()}.${currentFormat}`;
630
+ a.click();
631
+ URL.revokeObjectURL(url);
632
+ }
633
+
634
+ // Add CSS for streaming visualization
635
+ const style = document.createElement('style');
636
+ style.textContent = `
637
+ .streaming-controls {
638
+ padding: 15px;
639
+ background-color: #f8f9fa;
640
+ border-radius: 8px;
641
+ }
642
+
643
+ .streaming-status {
644
+ display: inline-block;
645
+ padding: 5px 10px;
646
+ border-radius: 20px;
647
+ font-size: 0.875rem;
648
+ font-weight: 500;
649
+ }
650
+
651
+ .streaming-status.connected {
652
+ background-color: #d4edda;
653
+ color: #155724;
654
+ }
655
+
656
+ .streaming-status.disconnected {
657
+ background-color: #f8d7da;
658
+ color: #721c24;
659
+ }
660
+
661
+ .streaming-status.error {
662
+ background-color: #fff3cd;
663
+ color: #856404;
664
+ }
665
+
666
+ .streaming-status.streaming {
667
+ background-color: #cce5ff;
668
+ color: #004085;
669
+ animation: pulse 1.5s infinite;
670
+ }
671
+
672
+ @keyframes pulse {
673
+ 0% { opacity: 1; }
674
+ 50% { opacity: 0.7; }
675
+ 100% { opacity: 1; }
676
+ }
677
+
678
+ .streaming-progress-section {
679
+ margin-bottom: 20px;
680
+ }
681
+
682
+ .chunks-visual {
683
+ display: flex;
684
+ flex-wrap: wrap;
685
+ gap: 5px;
686
+ }
687
+
688
+ .chunk-indicator {
689
+ width: 30px;
690
+ height: 30px;
691
+ background-color: #007bff;
692
+ color: white;
693
+ border-radius: 4px;
694
+ display: flex;
695
+ align-items: center;
696
+ justify-content: center;
697
+ font-size: 0.75rem;
698
+ animation: chunkAppear 0.3s ease-out;
699
+ }
700
+
701
+ @keyframes chunkAppear {
702
+ from {
703
+ transform: scale(0);
704
+ opacity: 0;
705
+ }
706
+ to {
707
+ transform: scale(1);
708
+ opacity: 1;
709
+ }
710
+ }
711
+ `;
712
+ document.head.appendChild(style);
ttsfm-web/static/js/playground.js CHANGED
@@ -1,745 +1,861 @@
1
- // TTSFM Playground JavaScript
2
-
3
- // Global variables
4
- let currentAudioBlob = null;
5
- let currentFormat = 'mp3';
6
- let batchResults = [];
7
-
8
- // Initialize playground
9
- document.addEventListener('DOMContentLoaded', function() {
10
- initializePlayground();
11
- });
12
-
13
- function initializePlayground() {
14
- loadVoices();
15
- loadFormats();
16
- updateCharCount();
17
- setupEventListeners();
18
-
19
- // Initialize tooltips if Bootstrap is available
20
- if (typeof bootstrap !== 'undefined') {
21
- const tooltipTriggerList = [].slice.call(document.querySelectorAll('[data-bs-toggle="tooltip"]'));
22
- tooltipTriggerList.map(function (tooltipTriggerEl) {
23
- return new bootstrap.Tooltip(tooltipTriggerEl);
24
- });
25
- }
26
- }
27
-
28
- function setupEventListeners() {
29
- // Form and input events
30
- document.getElementById('text-input').addEventListener('input', updateCharCount);
31
- document.getElementById('tts-form').addEventListener('submit', generateSpeech);
32
- document.getElementById('max-length-input').addEventListener('input', updateCharCount);
33
- document.getElementById('auto-split-check').addEventListener('change', updateGenerateButton);
34
-
35
- // Enhanced button events
36
- document.getElementById('validate-text-btn').addEventListener('click', validateText);
37
- document.getElementById('random-text-btn').addEventListener('click', loadRandomText);
38
- document.getElementById('download-btn').addEventListener('click', downloadAudio);
39
- document.getElementById('download-all-btn').addEventListener('click', downloadAllAudio);
40
-
41
- // New button events
42
- const clearTextBtn = document.getElementById('clear-text-btn');
43
- if (clearTextBtn) {
44
- clearTextBtn.addEventListener('click', clearText);
45
- }
46
-
47
-
48
-
49
- const resetFormBtn = document.getElementById('reset-form-btn');
50
- if (resetFormBtn) {
51
- resetFormBtn.addEventListener('click', resetForm);
52
- }
53
-
54
- const replayBtn = document.getElementById('replay-btn');
55
- if (replayBtn) {
56
- replayBtn.addEventListener('click', replayAudio);
57
- }
58
-
59
- const shareBtn = document.getElementById('share-btn');
60
- if (shareBtn) {
61
- shareBtn.addEventListener('click', shareAudio);
62
- }
63
-
64
- // Voice and format selection events
65
- document.getElementById('voice-select').addEventListener('change', updateVoiceInfo);
66
- document.getElementById('format-select').addEventListener('change', updateFormatInfo);
67
-
68
- // Example text buttons
69
- document.querySelectorAll('.use-example').forEach(button => {
70
- button.addEventListener('click', function() {
71
- document.getElementById('text-input').value = this.dataset.text;
72
- updateCharCount();
73
- // Add visual feedback
74
- this.classList.add('btn-success');
75
- setTimeout(() => {
76
- this.classList.remove('btn-success');
77
- this.classList.add('btn-outline-primary');
78
- }, 1000);
79
- });
80
- });
81
-
82
- // Keyboard shortcuts
83
- document.addEventListener('keydown', function(e) {
84
- // Ctrl/Cmd + Enter to generate speech
85
- if ((e.ctrlKey || e.metaKey) && e.key === 'Enter') {
86
- e.preventDefault();
87
- document.getElementById('generate-btn').click();
88
- }
89
-
90
- // Escape to clear results
91
- if (e.key === 'Escape') {
92
- clearResults();
93
- }
94
- });
95
- }
96
-
97
- async function loadVoices() {
98
- try {
99
- const response = await fetch('/api/voices');
100
- const data = await response.json();
101
-
102
- const select = document.getElementById('voice-select');
103
- select.innerHTML = '';
104
-
105
- data.voices.forEach(voice => {
106
- const option = document.createElement('option');
107
- option.value = voice.id;
108
- option.textContent = `${voice.name} - ${voice.description}`;
109
- select.appendChild(option);
110
- });
111
-
112
- // Select default voice
113
- select.value = 'alloy';
114
-
115
- } catch (error) {
116
- console.error('Failed to load voices:', error);
117
- console.log('Failed to load voices. Please refresh the page.');
118
- }
119
- }
120
-
121
- async function loadFormats() {
122
- try {
123
- const response = await fetch('/api/formats');
124
- const data = await response.json();
125
-
126
- const select = document.getElementById('format-select');
127
- select.innerHTML = '';
128
-
129
- data.formats.forEach(format => {
130
- const option = document.createElement('option');
131
- option.value = format.id;
132
- option.textContent = `${format.name} - ${format.description}`;
133
- select.appendChild(option);
134
- });
135
-
136
- // Select default format
137
- select.value = 'mp3';
138
- updateFormatInfo();
139
-
140
- } catch (error) {
141
- console.error('Failed to load formats:', error);
142
- console.log('Failed to load formats. Please refresh the page.');
143
- }
144
- }
145
-
146
- function updateCharCount() {
147
- const text = document.getElementById('text-input').value;
148
- const maxLength = parseInt(document.getElementById('max-length-input').value) || 4096;
149
- const charCount = text.length;
150
-
151
- document.getElementById('char-count').textContent = charCount.toLocaleString();
152
-
153
- // Update length status with better visual feedback
154
- const statusElement = document.getElementById('length-status');
155
- const percentage = (charCount / maxLength) * 100;
156
-
157
- if (charCount > maxLength) {
158
- statusElement.innerHTML = '<span class="badge bg-danger"><i class="fas fa-exclamation-triangle me-1"></i>Exceeds limit</span>';
159
- } else if (percentage > 80) {
160
- statusElement.innerHTML = '<span class="badge bg-warning"><i class="fas fa-exclamation me-1"></i>Near limit</span>';
161
- } else if (percentage > 50) {
162
- statusElement.innerHTML = '<span class="badge bg-info"><i class="fas fa-info me-1"></i>Good</span>';
163
- } else {
164
- statusElement.innerHTML = '<span class="badge bg-success"><i class="fas fa-check me-1"></i>OK</span>';
165
- }
166
-
167
- updateGenerateButton();
168
- }
169
-
170
- function updateGenerateButton() {
171
- const text = document.getElementById('text-input').value;
172
- const maxLength = parseInt(document.getElementById('max-length-input').value) || 4096;
173
- const autoSplit = document.getElementById('auto-split-check').checked;
174
- const generateBtn = document.getElementById('generate-btn');
175
- const btnText = generateBtn.querySelector('.btn-text');
176
-
177
- if (text.length > maxLength && autoSplit) {
178
- btnText.innerHTML = '<i class="fas fa-layer-group me-2"></i>Generate Speech (Batch Mode)';
179
- generateBtn.classList.add('btn-warning');
180
- generateBtn.classList.remove('btn-primary');
181
- } else {
182
- btnText.innerHTML = '<i class="fas fa-magic me-2"></i>Generate Speech';
183
- generateBtn.classList.add('btn-primary');
184
- generateBtn.classList.remove('btn-warning');
185
- }
186
- }
187
-
188
- async function validateText() {
189
- const text = document.getElementById('text-input').value.trim();
190
- const maxLength = parseInt(document.getElementById('max-length-input').value) || 4096;
191
-
192
- if (!text) {
193
- console.log('Please enter some text to validate');
194
- return;
195
- }
196
-
197
- const validateBtn = document.getElementById('validate-text-btn');
198
- setLoading(validateBtn, true);
199
-
200
- try {
201
- const response = await fetch('/api/validate-text', {
202
- method: 'POST',
203
- headers: { 'Content-Type': 'application/json' },
204
- body: JSON.stringify({ text, max_length: maxLength })
205
- });
206
-
207
- const data = await response.json();
208
- const resultDiv = document.getElementById('validation-result');
209
-
210
- if (data.is_valid) {
211
- resultDiv.innerHTML = `
212
- <div class="alert alert-success fade-in">
213
- <i class="fas fa-check-circle me-2"></i>
214
- <strong>Text is valid!</strong> (${data.text_length.toLocaleString()} characters)
215
- <div class="progress progress-custom mt-2">
216
- <div class="progress-bar-custom" style="width: ${(data.text_length / data.max_length) * 100}%"></div>
217
- </div>
218
- </div>
219
- `;
220
- } else {
221
- resultDiv.innerHTML = `
222
- <div class="alert alert-warning fade-in">
223
- <i class="fas fa-exclamation-triangle me-2"></i>
224
- <strong>Text exceeds limit!</strong> (${data.text_length.toLocaleString()}/${data.max_length.toLocaleString()} characters)
225
- <br><small class="mt-2 d-block">Suggested chunks: ${data.suggested_chunks}</small>
226
- <div class="mt-3">
227
- <strong>Preview of chunks:</strong>
228
- <div class="mt-2">
229
- ${data.chunk_preview.map((chunk, i) => `
230
- <div class="border rounded p-2 mb-2 bg-light">
231
- <small class="text-muted">Chunk ${i+1}:</small>
232
- <div class="small">${chunk}</div>
233
- </div>
234
- `).join('')}
235
- </div>
236
- <button class="btn btn-sm btn-outline-primary mt-2" onclick="enableAutoSplit()">
237
- <i class="fas fa-magic me-1"></i>Enable Auto-Split
238
- </button>
239
- </div>
240
- </div>
241
- `;
242
- }
243
-
244
- resultDiv.classList.remove('d-none');
245
- resultDiv.scrollIntoView({ behavior: 'smooth', block: 'nearest' });
246
-
247
- } catch (error) {
248
- console.error('Validation failed:', error);
249
- console.log('Failed to validate text. Please try again.');
250
- } finally {
251
- setLoading(validateBtn, false);
252
- }
253
- }
254
-
255
- function enableAutoSplit() {
256
- document.getElementById('auto-split-check').checked = true;
257
- updateGenerateButton();
258
- console.log('Auto-split enabled! Click Generate Speech to process in batch mode.');
259
- }
260
-
261
- async function generateSpeech(event) {
262
- event.preventDefault();
263
-
264
- const button = document.getElementById('generate-btn');
265
- const audioResult = document.getElementById('audio-result');
266
- const batchResult = document.getElementById('batch-result');
267
-
268
- // Get form data
269
- const formData = getFormData();
270
-
271
- if (!validateFormData(formData)) {
272
- return;
273
- }
274
-
275
- // Check if we need batch processing
276
- const needsBatch = formData.text.length > formData.maxLength && formData.autoSplit;
277
-
278
- // Show loading state
279
- setLoading(button, true);
280
- clearResults();
281
-
282
- try {
283
- if (needsBatch) {
284
- await generateBatchSpeech(formData);
285
- } else {
286
- await generateSingleSpeech(formData);
287
- }
288
- } catch (error) {
289
- console.error('Generation failed:', error);
290
- console.log(`Failed to generate speech: ${error.message}`);
291
- } finally {
292
- setLoading(button, false);
293
- }
294
- }
295
-
296
- function getFormData() {
297
- return {
298
- text: document.getElementById('text-input').value.trim(),
299
- voice: document.getElementById('voice-select').value,
300
- format: document.getElementById('format-select').value,
301
- instructions: document.getElementById('instructions-input').value.trim(),
302
- maxLength: parseInt(document.getElementById('max-length-input').value) || 4096,
303
- validateLength: document.getElementById('validate-length-check').checked,
304
- autoSplit: document.getElementById('auto-split-check').checked
305
- };
306
- }
307
-
308
- function validateFormData(formData) {
309
- if (!formData.text || !formData.voice || !formData.format) {
310
- console.log('Please fill in all required fields');
311
- return false;
312
- }
313
-
314
- if (formData.text.length > formData.maxLength && formData.validateLength && !formData.autoSplit) {
315
- console.log(`Text is too long (${formData.text.length} characters). Enable auto-split or reduce text length.`);
316
- return false;
317
- }
318
-
319
- return true;
320
- }
321
-
322
- function clearResults() {
323
- document.getElementById('audio-result').classList.add('d-none');
324
- document.getElementById('batch-result').classList.add('d-none');
325
- document.getElementById('validation-result').classList.add('d-none');
326
- }
327
-
328
- // Utility functions
329
- function setLoading(button, loading) {
330
- if (loading) {
331
- button.classList.add('loading');
332
- button.disabled = true;
333
- } else {
334
- button.classList.remove('loading');
335
- button.disabled = false;
336
- }
337
- }
338
-
339
-
340
-
341
- async function generateSingleSpeech(formData) {
342
- const audioResult = document.getElementById('audio-result');
343
-
344
- const response = await fetch('/api/generate', {
345
- method: 'POST',
346
- headers: { 'Content-Type': 'application/json' },
347
- body: JSON.stringify({
348
- text: formData.text,
349
- voice: formData.voice,
350
- format: formData.format,
351
- instructions: formData.instructions || undefined,
352
- max_length: formData.maxLength,
353
- validate_length: formData.validateLength
354
- })
355
- });
356
-
357
- if (!response.ok) {
358
- const errorData = await response.json();
359
- throw new Error(errorData.error || `HTTP ${response.status}`);
360
- }
361
-
362
- // Get audio data
363
- const audioBlob = await response.blob();
364
- currentAudioBlob = audioBlob;
365
- currentFormat = formData.format;
366
-
367
- // Create audio URL and setup player
368
- const audioUrl = URL.createObjectURL(audioBlob);
369
- const audioPlayer = document.getElementById('audio-player');
370
- audioPlayer.src = audioUrl;
371
-
372
- // Use enhanced display function
373
- displayAudioResult(audioBlob, formData.format, formData.voice, formData.text);
374
-
375
- console.log('Speech generated successfully! Click play to listen.');
376
-
377
- // Auto-play if user prefers
378
- if (localStorage.getItem('autoPlay') === 'true') {
379
- audioPlayer.play().catch(() => {
380
- // Auto-play blocked, that's fine
381
- });
382
- }
383
- }
384
-
385
- async function generateBatchSpeech(formData) {
386
- const batchResult = document.getElementById('batch-result');
387
-
388
- const response = await fetch('/api/generate-batch', {
389
- method: 'POST',
390
- headers: { 'Content-Type': 'application/json' },
391
- body: JSON.stringify({
392
- text: formData.text,
393
- voice: formData.voice,
394
- format: formData.format,
395
- instructions: formData.instructions || undefined,
396
- max_length: formData.maxLength,
397
- preserve_words: true
398
- })
399
- });
400
-
401
- if (!response.ok) {
402
- const errorData = await response.json();
403
- throw new Error(errorData.error || `HTTP ${response.status}`);
404
- }
405
-
406
- const data = await response.json();
407
- batchResults = data.results;
408
-
409
- // Update batch summary
410
- const summaryDiv = document.getElementById('batch-summary');
411
- summaryDiv.innerHTML = `
412
- <i class="fas fa-layer-group me-2"></i>
413
- <strong>Batch Processing Complete!</strong>
414
- Generated ${data.successful_chunks} of ${data.total_chunks} audio chunks successfully.
415
- ${data.successful_chunks < data.total_chunks ?
416
- `<br><small class="text-warning">⚠️ ${data.total_chunks - data.successful_chunks} chunks failed to generate.</small>` :
417
- '<br><small class="text-success">✅ All chunks generated successfully!</small>'
418
- }
419
- `;
420
-
421
- // Display chunks
422
- displayBatchChunks(data.results, formData.format);
423
-
424
- // Show batch result with animation
425
- batchResult.classList.remove('d-none');
426
- batchResult.classList.add('fade-in');
427
-
428
- console.log(`Batch processing completed! Generated ${data.successful_chunks} audio files.`);
429
- }
430
-
431
- function displayBatchChunks(results, format) {
432
- const chunksDiv = document.getElementById('batch-chunks');
433
- chunksDiv.innerHTML = '';
434
-
435
- results.forEach((result, index) => {
436
- const chunkDiv = document.createElement('div');
437
- chunkDiv.className = 'col-md-6 col-lg-4 mb-3';
438
-
439
- if (result.audio_data) {
440
- // Convert base64 to blob
441
- const audioBlob = base64ToBlob(result.audio_data, result.content_type);
442
- const audioUrl = URL.createObjectURL(audioBlob);
443
-
444
- chunkDiv.innerHTML = `
445
- <div class="card batch-chunk-card h-100">
446
- <div class="card-body">
447
- <div class="d-flex justify-content-between align-items-start mb-2">
448
- <h6 class="card-title mb-0">
449
- <i class="fas fa-music me-1"></i>Chunk ${result.chunk_index}
450
- </h6>
451
- <span class="badge bg-success">
452
- <i class="fas fa-check me-1"></i>Success
453
- </span>
454
- </div>
455
- <p class="card-text small text-muted mb-3">${result.chunk_text}</p>
456
- <audio controls class="w-100 mb-3" preload="metadata">
457
- <source src="${audioUrl}" type="${result.content_type}">
458
- Your browser does not support audio playback.
459
- </audio>
460
- <div class="d-flex justify-content-between align-items-center">
461
- <small class="text-muted">
462
- <i class="fas fa-file-audio me-1"></i>
463
- ${(result.size / 1024).toFixed(1)} KB
464
- </small>
465
- <button class="btn btn-sm btn-outline-primary download-chunk"
466
- data-url="${audioUrl}"
467
- data-filename="chunk_${result.chunk_index}.${result.format}"
468
- title="Download this chunk">
469
- <i class="fas fa-download"></i>
470
- </button>
471
- </div>
472
- </div>
473
- </div>
474
- `;
475
- } else {
476
- chunkDiv.innerHTML = `
477
- <div class="card border-danger h-100">
478
- <div class="card-body">
479
- <div class="d-flex justify-content-between align-items-start mb-2">
480
- <h6 class="card-title mb-0 text-danger">
481
- <i class="fas fa-exclamation-triangle me-1"></i>Chunk ${result.chunk_index}
482
- </h6>
483
- <span class="badge bg-danger">
484
- <i class="fas fa-times me-1"></i>Failed
485
- </span>
486
- </div>
487
- <p class="card-text small text-muted mb-3">${result.chunk_text}</p>
488
- <div class="alert alert-danger small mb-0">
489
- <i class="fas fa-exclamation-circle me-1"></i>
490
- ${result.error}
491
- </div>
492
- </div>
493
- </div>
494
- `;
495
- }
496
-
497
- chunksDiv.appendChild(chunkDiv);
498
- });
499
-
500
- // Add download event listeners
501
- document.querySelectorAll('.download-chunk').forEach(btn => {
502
- btn.addEventListener('click', function() {
503
- const url = this.dataset.url;
504
- const filename = this.dataset.filename;
505
- downloadFromUrl(url, filename);
506
-
507
- // Visual feedback
508
- const icon = this.querySelector('i');
509
- icon.className = 'fas fa-check';
510
- setTimeout(() => {
511
- icon.className = 'fas fa-download';
512
- }, 1000);
513
- });
514
- });
515
- }
516
-
517
- function downloadAudio() {
518
- if (!currentAudioBlob) {
519
- console.log('No audio to download');
520
- return;
521
- }
522
-
523
- const url = URL.createObjectURL(currentAudioBlob);
524
- const timestamp = new Date().toISOString().slice(0, 19).replace(/:/g, '-');
525
- downloadFromUrl(url, `ttsfm-speech-${timestamp}.${currentFormat}`);
526
- URL.revokeObjectURL(url);
527
- }
528
-
529
- function downloadAllAudio() {
530
- const downloadButtons = document.querySelectorAll('.download-chunk');
531
- if (downloadButtons.length === 0) {
532
- console.log('No batch audio files to download');
533
- return;
534
- }
535
-
536
- console.log(`Starting download of ${downloadButtons.length} files...`);
537
-
538
- downloadButtons.forEach((btn, index) => {
539
- setTimeout(() => {
540
- btn.click();
541
- }, index * 500); // Stagger downloads to avoid browser limits
542
- });
543
- }
544
-
545
- function base64ToBlob(base64, contentType) {
546
- const byteCharacters = atob(base64);
547
- const byteNumbers = new Array(byteCharacters.length);
548
- for (let i = 0; i < byteCharacters.length; i++) {
549
- byteNumbers[i] = byteCharacters.charCodeAt(i);
550
- }
551
- const byteArray = new Uint8Array(byteNumbers);
552
- return new Blob([byteArray], { type: contentType });
553
- }
554
-
555
- function downloadFromUrl(url, filename) {
556
- const a = document.createElement('a');
557
- a.href = url;
558
- a.download = filename;
559
- a.style.display = 'none';
560
- document.body.appendChild(a);
561
- a.click();
562
- document.body.removeChild(a);
563
- }
564
-
565
- // New enhanced functions
566
- function clearText() {
567
- document.getElementById('text-input').value = '';
568
- updateCharCount();
569
- clearResults();
570
- console.log('Text cleared successfully');
571
- }
572
-
573
- function loadRandomText() {
574
- const randomTexts = [
575
- // News & Information
576
- "Breaking news: Scientists have discovered a revolutionary new method for generating incredibly natural synthetic speech using advanced neural networks and machine learning algorithms.",
577
- "Weather update: Today will be partly cloudy with temperatures reaching 75 degrees Fahrenheit. Light winds from the southwest at 5 to 10 miles per hour.",
578
- "Technology report: The latest advancements in artificial intelligence are revolutionizing how we interact with digital devices and services.",
579
-
580
- // Educational & Informative
581
- "The human brain contains approximately 86 billion neurons, each connected to thousands of others, creating a complex network that enables consciousness, memory, and thought.",
582
- "Photosynthesis is the process by which plants convert sunlight, carbon dioxide, and water into glucose and oxygen, forming the foundation of most life on Earth.",
583
- "The speed of light in a vacuum is exactly 299,792,458 meters per second, making it one of the fundamental constants of physics.",
584
-
585
- // Creative & Storytelling
586
- "Once upon a time, in a land far away, there lived a wise old wizard who could speak to the stars and understand their ancient secrets.",
587
- "The mysterious lighthouse stood alone on the rocky cliff, its beacon cutting through the fog like a sword of light, guiding lost ships safely home.",
588
- "In the depths of the enchanted forest, where sunbeams danced through emerald leaves, a young adventurer discovered a hidden path to destiny.",
589
-
590
- // Business & Professional
591
- "Our quarterly results demonstrate strong growth across all market segments, with revenue increasing by 23% compared to the same period last year.",
592
- "The new product launch exceeded expectations, capturing 15% market share within the first six months and establishing our brand as an industry leader.",
593
- "We are committed to sustainable business practices that benefit our customers, employees, and the environment for generations to come.",
594
-
595
- // Technical & Programming
596
- "The TTSFM package provides a comprehensive API for text-to-speech generation with support for multiple voices and audio formats.",
597
- "Machine learning algorithms process vast amounts of data to identify patterns and make predictions with remarkable accuracy.",
598
- "Cloud computing has transformed how businesses store, process, and access their data, enabling scalability and flexibility like never before.",
599
-
600
- // Conversational & Casual
601
- "Welcome to TTSFM! Experience the future of text-to-speech technology with our premium AI voices.",
602
- "Good morning! Today is a beautiful day to learn something new and explore the possibilities of text-to-speech technology.",
603
- "Have you ever wondered what it would be like if your computer could speak with perfect human-like intonation and emotion?"
604
- ];
605
-
606
- const randomText = randomTexts[Math.floor(Math.random() * randomTexts.length)];
607
- document.getElementById('text-input').value = randomText;
608
- updateCharCount();
609
- console.log('Random text loaded successfully');
610
- }
611
-
612
-
613
-
614
- function resetForm() {
615
- // Reset form to default values
616
- document.getElementById('text-input').value = 'Welcome to TTSFM! Experience the future of text-to-speech technology with our premium AI voices. Generate natural, expressive speech for any application.';
617
- document.getElementById('voice-select').value = 'alloy';
618
- document.getElementById('format-select').value = 'mp3';
619
- document.getElementById('instructions-input').value = '';
620
- document.getElementById('max-length-input').value = '4096';
621
- document.getElementById('validate-length-check').checked = true;
622
- document.getElementById('auto-split-check').checked = false;
623
-
624
- updateCharCount();
625
- updateGenerateButton();
626
- clearResults();
627
- console.log('Form reset to default values');
628
- }
629
-
630
- function replayAudio() {
631
- const audioPlayer = document.getElementById('audio-player');
632
- if (audioPlayer && audioPlayer.src) {
633
- audioPlayer.currentTime = 0;
634
- audioPlayer.play().catch(() => {
635
- console.log('Unable to replay audio. Please check your browser settings.');
636
- });
637
- }
638
- }
639
-
640
- function shareAudio() {
641
- if (navigator.share && currentAudioBlob) {
642
- const file = new File([currentAudioBlob], `ttsfm-speech.${currentFormat}`, {
643
- type: `audio/${currentFormat}`
644
- });
645
-
646
- navigator.share({
647
- title: 'TTSFM Generated Speech',
648
- text: 'Check out this speech generated with TTSFM!',
649
- files: [file]
650
- }).catch(() => {
651
- // Fallback to copying link
652
- copyAudioLink();
653
- });
654
- } else {
655
- copyAudioLink();
656
- }
657
- }
658
-
659
- function copyAudioLink() {
660
- const audioPlayer = document.getElementById('audio-player');
661
- if (audioPlayer && audioPlayer.src) {
662
- navigator.clipboard.writeText(audioPlayer.src).then(() => {
663
- console.log('Audio link copied to clipboard!');
664
- }).catch(() => {
665
- console.log('Unable to copy link. Please try downloading the audio instead.');
666
- });
667
- }
668
- }
669
-
670
- function updateVoiceInfo() {
671
- const voiceSelect = document.getElementById('voice-select');
672
- const previewBtn = document.getElementById('preview-voice-btn');
673
-
674
- if (voiceSelect.value) {
675
- previewBtn.disabled = false;
676
- previewBtn.onclick = () => previewVoice(voiceSelect.value);
677
- } else {
678
- previewBtn.disabled = true;
679
- }
680
- }
681
-
682
- function updateFormatInfo() {
683
- const formatSelect = document.getElementById('format-select');
684
- const formatInfo = document.getElementById('format-info');
685
-
686
- const formatDescriptions = {
687
- 'mp3': '🎵 MP3 - Good quality, small file size. Best for web and general use.',
688
- 'opus': '📻 OPUS - Excellent quality, small file size. Best for streaming and VoIP.',
689
- 'aac': '📱 AAC - Good quality, medium file size. Best for Apple devices and streaming.',
690
- 'flac': '💿 FLAC - Lossless quality, large file size. Best for archival and high-quality audio.',
691
- 'wav': '🎧 WAV - Lossless quality, large file size. Best for professional audio production.',
692
- 'pcm': '🔊 PCM - Raw audio data, large file size. Best for audio processing.'
693
- };
694
-
695
- if (formatInfo && formatSelect.value) {
696
- formatInfo.textContent = formatDescriptions[formatSelect.value] || 'High-quality audio format';
697
- }
698
- }
699
-
700
- function previewVoice(voiceId) {
701
- // This would typically play a short preview of the voice
702
- console.log(`Voice preview for ${voiceId} - Feature coming soon!`);
703
- }
704
-
705
- // Enhanced audio result display
706
- function displayAudioResult(audioBlob, format, voice, text) {
707
- const audioResult = document.getElementById('audio-result');
708
- const audioPlayer = document.getElementById('audio-player');
709
- const audioInfo = document.getElementById('audio-info');
710
-
711
- // Create audio URL and setup player
712
- const audioUrl = URL.createObjectURL(audioBlob);
713
- audioPlayer.src = audioUrl;
714
-
715
- // Update audio stats
716
- const sizeKB = (audioBlob.size / 1024).toFixed(1);
717
- document.getElementById('audio-size').textContent = `${sizeKB} KB`;
718
- document.getElementById('audio-format').textContent = format.toUpperCase();
719
- document.getElementById('audio-voice').textContent = voice.charAt(0).toUpperCase() + voice.slice(1);
720
-
721
- // Update audio info
722
- audioInfo.innerHTML = `
723
- <i class="fas fa-check-circle text-success me-1"></i>
724
- Generated successfully • ${sizeKB} KB • ${format.toUpperCase()}
725
- `;
726
-
727
- // Show result with animation
728
- audioResult.classList.remove('d-none');
729
- audioResult.classList.add('fade-in');
730
-
731
- // Update duration when metadata loads
732
- audioPlayer.addEventListener('loadedmetadata', function() {
733
- const duration = Math.round(audioPlayer.duration);
734
- document.getElementById('audio-duration').textContent = `${duration}s`;
735
- }, { once: true });
736
-
737
- // Scroll to result
738
- audioResult.scrollIntoView({ behavior: 'smooth', block: 'nearest' });
739
- }
740
-
741
- // Export functions for use in HTML
742
- window.enableAutoSplit = enableAutoSplit;
743
- window.clearText = clearText;
744
- window.loadRandomText = loadRandomText;
745
- window.resetForm = resetForm;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // TTSFM Playground JavaScript
2
+
3
+ // Global variables
4
+ let currentAudioBlob = null;
5
+ let currentFormat = 'mp3';
6
+ let batchResults = [];
7
+
8
+ // Initialize playground
9
+ document.addEventListener('DOMContentLoaded', function() {
10
+ initializePlayground();
11
+ });
12
+
13
+ // Check authentication status and show/hide API key field
14
+ async function checkAuthStatus() {
15
+ try {
16
+ const response = await fetch('/api/auth-status');
17
+ const data = await response.json();
18
+
19
+ const apiKeySection = document.getElementById('api-key-section');
20
+ if (apiKeySection) {
21
+ if (data.api_key_required) {
22
+ // Show API key field and mark as required
23
+ apiKeySection.style.display = 'block';
24
+ const apiKeyInput = document.getElementById('api-key-input');
25
+ const label = apiKeySection.querySelector('label');
26
+
27
+ if (apiKeyInput) {
28
+ apiKeyInput.required = true;
29
+ apiKeyInput.placeholder = 'Enter your API key (required)';
30
+ }
31
+
32
+ if (label) {
33
+ label.innerHTML = '<i class="fas fa-key me-2"></i>' + (window.currentLocale === 'zh' ? 'API密钥(必需)' : 'API Key (Required)');
34
+ }
35
+
36
+ // Update form text
37
+ const formText = apiKeySection.querySelector('.form-text');
38
+ if (formText) {
39
+ formText.innerHTML = '<i class="fas fa-exclamation-triangle me-1 text-warning"></i>API key protection is enabled - this field is required';
40
+ }
41
+ } else {
42
+ // Hide API key field or mark as optional
43
+ apiKeySection.style.display = 'none';
44
+ }
45
+ }
46
+ } catch (error) {
47
+ console.warn('Could not check auth status:', error);
48
+ // If we can't check, assume API key might be required and show the field
49
+ const apiKeySection = document.getElementById('api-key-section');
50
+ if (apiKeySection) {
51
+ apiKeySection.style.display = 'block';
52
+ }
53
+ }
54
+ }
55
+
56
+ function initializePlayground() {
57
+ console.log('Initializing playground...');
58
+ checkAuthStatus();
59
+ loadVoices();
60
+ loadFormats();
61
+ updateCharCount();
62
+ setupEventListeners();
63
+ console.log('Playground initialization complete');
64
+
65
+ // Initialize tooltips if Bootstrap is available
66
+ if (typeof bootstrap !== 'undefined') {
67
+ const tooltipTriggerList = [].slice.call(document.querySelectorAll('[data-bs-toggle="tooltip"]'));
68
+ tooltipTriggerList.map(function (tooltipTriggerEl) {
69
+ return new bootstrap.Tooltip(tooltipTriggerEl);
70
+ });
71
+ }
72
+ }
73
+
74
+ function setupEventListeners() {
75
+ console.log('Setting up event listeners...');
76
+
77
+ // Form and input events
78
+ const textInput = document.getElementById('text-input');
79
+ if (textInput) {
80
+ textInput.addEventListener('input', updateCharCount);
81
+ console.log('Text input event listener added');
82
+ } else {
83
+ console.error('Text input element not found!');
84
+ }
85
+
86
+ // Add form submit event listener with better error handling
87
+ const form = document.getElementById('tts-form');
88
+ if (form) {
89
+ form.addEventListener('submit', function(event) {
90
+ console.log('Form submit event triggered');
91
+ event.preventDefault(); // Prevent default form submission
92
+ event.stopPropagation(); // Stop event bubbling
93
+ generateSpeech(event);
94
+ return false; // Additional prevention
95
+ });
96
+ } else {
97
+ console.error('TTS form not found!');
98
+ }
99
+
100
+ const maxLengthInput = document.getElementById('max-length-input');
101
+ if (maxLengthInput) {
102
+ maxLengthInput.addEventListener('input', updateCharCount);
103
+ console.log('Max length input event listener added');
104
+ } else {
105
+ console.error('Max length input element not found!');
106
+ }
107
+
108
+ const autoCombineCheck = document.getElementById('auto-combine-check');
109
+ if (autoCombineCheck) {
110
+ autoCombineCheck.addEventListener('change', updateAutoCombineStatus);
111
+ }
112
+
113
+ // Enhanced button events
114
+ const validateBtn = document.getElementById('validate-text-btn');
115
+ if (validateBtn) {
116
+ validateBtn.addEventListener('click', validateText);
117
+ console.log('Validate button event listener added');
118
+ } else {
119
+ console.error('Validate button not found!');
120
+ }
121
+
122
+ const randomBtn = document.getElementById('random-text-btn');
123
+ if (randomBtn) {
124
+ randomBtn.addEventListener('click', loadRandomText);
125
+ console.log('Random text button event listener added');
126
+ } else {
127
+ console.error('Random text button not found!');
128
+ }
129
+
130
+ const downloadBtn = document.getElementById('download-btn');
131
+ if (downloadBtn) {
132
+ downloadBtn.addEventListener('click', downloadAudio);
133
+ console.log('Download button event listener added');
134
+ } else {
135
+ console.error('Download button not found!');
136
+ }
137
+
138
+ // Add direct click event listener for generate button as backup
139
+ const generateBtn = document.getElementById('generate-btn');
140
+ if (generateBtn) {
141
+ generateBtn.addEventListener('click', function(event) {
142
+ console.log('Generate button clicked directly');
143
+ event.preventDefault();
144
+ event.stopPropagation();
145
+ generateSpeech(event);
146
+ return false;
147
+ });
148
+ }
149
+
150
+ // New button events
151
+ const clearTextBtn = document.getElementById('clear-text-btn');
152
+ if (clearTextBtn) {
153
+ clearTextBtn.addEventListener('click', clearText);
154
+ }
155
+
156
+
157
+
158
+ const resetFormBtn = document.getElementById('reset-form-btn');
159
+ if (resetFormBtn) {
160
+ resetFormBtn.addEventListener('click', resetForm);
161
+ }
162
+
163
+ const replayBtn = document.getElementById('replay-btn');
164
+ if (replayBtn) {
165
+ replayBtn.addEventListener('click', replayAudio);
166
+ }
167
+
168
+ const shareBtn = document.getElementById('share-btn');
169
+ if (shareBtn) {
170
+ shareBtn.addEventListener('click', shareAudio);
171
+ }
172
+
173
+ // API Key visibility toggle
174
+ const toggleApiKeyBtn = document.getElementById('toggle-api-key-visibility');
175
+ if (toggleApiKeyBtn) {
176
+ toggleApiKeyBtn.addEventListener('click', toggleApiKeyVisibility);
177
+ }
178
+
179
+ // Voice and format selection events
180
+ const voiceSelect = document.getElementById('voice-select');
181
+ if (voiceSelect) {
182
+ voiceSelect.addEventListener('change', updateVoiceInfo);
183
+ console.log('Voice select event listener added');
184
+ } else {
185
+ console.error('Voice select element not found!');
186
+ }
187
+
188
+ const formatSelect = document.getElementById('format-select');
189
+ if (formatSelect) {
190
+ formatSelect.addEventListener('change', updateFormatInfo);
191
+ console.log('Format select event listener added');
192
+ } else {
193
+ console.error('Format select element not found!');
194
+ }
195
+
196
+ // Example text buttons
197
+ document.querySelectorAll('.use-example').forEach(button => {
198
+ button.addEventListener('click', function() {
199
+ document.getElementById('text-input').value = this.dataset.text;
200
+ updateCharCount();
201
+ // Add visual feedback
202
+ this.classList.add('btn-success');
203
+ setTimeout(() => {
204
+ this.classList.remove('btn-success');
205
+ this.classList.add('btn-outline-primary');
206
+ }, 1000);
207
+ });
208
+ });
209
+
210
+ // Keyboard shortcuts
211
+ document.addEventListener('keydown', function(e) {
212
+ // Ctrl/Cmd + Enter to generate speech
213
+ if ((e.ctrlKey || e.metaKey) && e.key === 'Enter') {
214
+ e.preventDefault();
215
+ document.getElementById('generate-btn').click();
216
+ }
217
+
218
+ // Escape to clear results
219
+ if (e.key === 'Escape') {
220
+ clearResults();
221
+ }
222
+ });
223
+
224
+ // Initialize auto-combine status
225
+ updateAutoCombineStatus();
226
+ }
227
+
228
+ async function loadVoices() {
229
+ try {
230
+ // Prepare headers for API key if available (OpenAI compatible format)
231
+ const headers = {};
232
+ const apiKeyInput = document.getElementById('api-key-input');
233
+ if (apiKeyInput && apiKeyInput.value.trim()) {
234
+ headers['Authorization'] = `Bearer ${apiKeyInput.value.trim()}`;
235
+ }
236
+
237
+ const response = await fetch('/api/voices', { headers });
238
+ const data = await response.json();
239
+
240
+ const select = document.getElementById('voice-select');
241
+ select.innerHTML = '';
242
+
243
+ data.voices.forEach(voice => {
244
+ const option = document.createElement('option');
245
+ option.value = voice.id;
246
+ option.textContent = `${voice.name} - ${voice.description}`;
247
+ select.appendChild(option);
248
+ });
249
+
250
+ // Select default voice
251
+ select.value = 'alloy';
252
+
253
+ } catch (error) {
254
+ console.error('Failed to load voices:', error);
255
+ console.log('Failed to load voices. Please refresh the page.');
256
+ }
257
+ }
258
+
259
+ async function loadFormats() {
260
+ try {
261
+ // Prepare headers for API key if available (OpenAI compatible format)
262
+ const headers = {};
263
+ const apiKeyInput = document.getElementById('api-key-input');
264
+ if (apiKeyInput && apiKeyInput.value.trim()) {
265
+ headers['Authorization'] = `Bearer ${apiKeyInput.value.trim()}`;
266
+ }
267
+
268
+ const response = await fetch('/api/formats', { headers });
269
+ const data = await response.json();
270
+
271
+ const select = document.getElementById('format-select');
272
+ select.innerHTML = '';
273
+
274
+ data.formats.forEach(format => {
275
+ const option = document.createElement('option');
276
+ option.value = format.id;
277
+ option.textContent = `${format.name} - ${format.description}`;
278
+ select.appendChild(option);
279
+ });
280
+
281
+ // Select default format
282
+ select.value = 'mp3';
283
+ updateFormatInfo();
284
+
285
+ } catch (error) {
286
+ console.error('Failed to load formats:', error);
287
+ console.log('Failed to load formats. Please refresh the page.');
288
+ }
289
+ }
290
+
291
+ function updateCharCount() {
292
+ const textInput = document.getElementById('text-input');
293
+ const maxLengthInput = document.getElementById('max-length-input');
294
+ const charCountElement = document.getElementById('char-count');
295
+
296
+ if (!textInput || !maxLengthInput || !charCountElement) {
297
+ console.warn('Required elements not found for updateCharCount');
298
+ return;
299
+ }
300
+
301
+ const text = textInput.value;
302
+ const maxLength = parseInt(maxLengthInput.value) || 4096;
303
+ const charCount = text.length;
304
+
305
+ charCountElement.textContent = charCount.toLocaleString();
306
+
307
+ // Update length status with better visual feedback
308
+ const statusElement = document.getElementById('length-status');
309
+ if (statusElement) {
310
+ const percentage = (charCount / maxLength) * 100;
311
+
312
+ if (charCount > maxLength) {
313
+ statusElement.innerHTML = '<span class="badge bg-danger"><i class="fas fa-exclamation-triangle me-1"></i>Exceeds limit</span>';
314
+ } else if (percentage > 80) {
315
+ statusElement.innerHTML = '<span class="badge bg-warning"><i class="fas fa-exclamation me-1"></i>Near limit</span>';
316
+ } else if (percentage > 50) {
317
+ statusElement.innerHTML = '<span class="badge bg-info"><i class="fas fa-info me-1"></i>Good</span>';
318
+ } else {
319
+ statusElement.innerHTML = '<span class="badge bg-success"><i class="fas fa-check me-1"></i>OK</span>';
320
+ }
321
+ }
322
+
323
+ updateGenerateButton();
324
+ updateAutoCombineStatus();
325
+ }
326
+
327
+ function updateGenerateButton() {
328
+ const text = document.getElementById('text-input').value;
329
+ const maxLength = parseInt(document.getElementById('max-length-input').value) || 4096;
330
+ const autoCombineCheck = document.getElementById('auto-combine-check');
331
+ const autoCombine = autoCombineCheck ? autoCombineCheck.checked : false;
332
+ const generateBtn = document.getElementById('generate-btn');
333
+
334
+ if (!generateBtn) {
335
+ console.warn('Generate button not found');
336
+ return;
337
+ }
338
+
339
+ const btnText = generateBtn.querySelector('.btn-text');
340
+
341
+ if (!btnText) {
342
+ console.warn('Button text element not found');
343
+ return;
344
+ }
345
+
346
+ if (text.length > maxLength && autoCombine) {
347
+ btnText.innerHTML = '<i class="fas fa-magic me-2"></i>Generate Speech (Auto-Combine)';
348
+ generateBtn.classList.add('btn-warning');
349
+ generateBtn.classList.remove('btn-primary');
350
+ } else {
351
+ btnText.innerHTML = '<i class="fas fa-magic me-2"></i>Generate Speech';
352
+ generateBtn.classList.add('btn-primary');
353
+ generateBtn.classList.remove('btn-warning');
354
+ }
355
+ }
356
+
357
+ async function validateText() {
358
+ const text = document.getElementById('text-input').value.trim();
359
+ const maxLength = parseInt(document.getElementById('max-length-input').value) || 4096;
360
+
361
+ if (!text) {
362
+ console.log('Please enter some text to validate');
363
+ return;
364
+ }
365
+
366
+ const validateBtn = document.getElementById('validate-text-btn');
367
+ setLoading(validateBtn, true);
368
+
369
+ try {
370
+ const response = await fetch('/api/validate-text', {
371
+ method: 'POST',
372
+ headers: { 'Content-Type': 'application/json' },
373
+ body: JSON.stringify({ text, max_length: maxLength })
374
+ });
375
+
376
+ const data = await response.json();
377
+ const resultDiv = document.getElementById('validation-result');
378
+
379
+ if (data.is_valid) {
380
+ resultDiv.innerHTML = `
381
+ <div class="alert alert-success fade-in">
382
+ <i class="fas fa-check-circle me-2"></i>
383
+ <strong>Text is valid!</strong> (${data.text_length.toLocaleString()} characters)
384
+ <div class="progress progress-custom mt-2">
385
+ <div class="progress-bar-custom" style="width: ${(data.text_length / data.max_length) * 100}%"></div>
386
+ </div>
387
+ </div>
388
+ `;
389
+ } else {
390
+ resultDiv.innerHTML = `
391
+ <div class="alert alert-warning fade-in">
392
+ <i class="fas fa-exclamation-triangle me-2"></i>
393
+ <strong>Text exceeds limit!</strong> (${data.text_length.toLocaleString()}/${data.max_length.toLocaleString()} characters)
394
+ <br><small class="mt-2 d-block">Suggested chunks: ${data.suggested_chunks}</small>
395
+ <div class="mt-3">
396
+ <strong>Preview of chunks:</strong>
397
+ <div class="mt-2">
398
+ ${data.chunk_preview.map((chunk, i) => `
399
+ <div class="border rounded p-2 mb-2 bg-light">
400
+ <small class="text-muted">Chunk ${i+1}:</small>
401
+ <div class="small">${chunk}</div>
402
+ </div>
403
+ `).join('')}
404
+ </div>
405
+
406
+ </div>
407
+ </div>
408
+ `;
409
+ }
410
+
411
+ resultDiv.classList.remove('d-none');
412
+ resultDiv.scrollIntoView({ behavior: 'smooth', block: 'nearest' });
413
+
414
+ } catch (error) {
415
+ console.error('Validation failed:', error);
416
+ console.log('Failed to validate text. Please try again.');
417
+ } finally {
418
+ setLoading(validateBtn, false);
419
+ }
420
+ }
421
+
422
+
423
+
424
+ function updateAutoCombineStatus() {
425
+ const autoCombineCheck = document.getElementById('auto-combine-check');
426
+ const statusBadge = document.getElementById('auto-combine-status');
427
+ const textInput = document.getElementById('text-input');
428
+ const maxLength = parseInt(document.getElementById('max-length-input').value) || 4096;
429
+
430
+ if (!autoCombineCheck || !statusBadge) return;
431
+
432
+ const isAutoCombineEnabled = autoCombineCheck.checked;
433
+ const textLength = textInput.value.length;
434
+ const isLongText = textLength > maxLength;
435
+
436
+ // Show/hide status badge
437
+ if (isAutoCombineEnabled && isLongText) {
438
+ statusBadge.classList.remove('d-none');
439
+ statusBadge.classList.add('bg-success');
440
+ statusBadge.classList.remove('bg-warning');
441
+ statusBadge.innerHTML = '<i class="fas fa-magic me-1"></i>Auto-combine enabled';
442
+ } else if (!isAutoCombineEnabled && isLongText) {
443
+ statusBadge.classList.remove('d-none');
444
+ statusBadge.classList.add('bg-warning');
445
+ statusBadge.classList.remove('bg-success');
446
+ statusBadge.innerHTML = '<i class="fas fa-exclamation-triangle me-1"></i>Long text detected';
447
+ } else {
448
+ statusBadge.classList.add('d-none');
449
+ }
450
+
451
+ // Remove the recursive call to updateCharCount() - this was causing infinite recursion
452
+ }
453
+
454
+ async function generateSpeech(event) {
455
+ console.log('generateSpeech function called');
456
+
457
+ // Prevent default form submission behavior
458
+ if (event) {
459
+ event.preventDefault();
460
+ event.stopPropagation();
461
+ }
462
+
463
+ const button = document.getElementById('generate-btn');
464
+ const audioResult = document.getElementById('audio-result');
465
+
466
+ // Get form data
467
+ const formData = getFormData();
468
+
469
+ if (!validateFormData(formData)) {
470
+ console.log('Form validation failed');
471
+ return false;
472
+ }
473
+
474
+ // Show loading state
475
+ setLoading(button, true);
476
+ clearResults();
477
+
478
+ try {
479
+ console.log('Starting speech generation...');
480
+ // Always use the unified endpoint with auto-combine
481
+ await generateUnifiedSpeech(formData);
482
+ console.log('Speech generation completed successfully');
483
+ } catch (error) {
484
+ console.error('Generation failed:', error);
485
+ console.log(`Failed to generate speech: ${error.message}`);
486
+ } finally {
487
+ setLoading(button, false);
488
+ }
489
+
490
+ return false; // Ensure form doesn't submit
491
+ }
492
+
493
+ function getFormData() {
494
+ return {
495
+ text: document.getElementById('text-input').value.trim(),
496
+ voice: document.getElementById('voice-select').value,
497
+ format: document.getElementById('format-select').value,
498
+ instructions: document.getElementById('instructions-input').value.trim(),
499
+ maxLength: parseInt(document.getElementById('max-length-input').value) || 4096,
500
+ validateLength: document.getElementById('validate-length-check').checked,
501
+ autoCombine: document.getElementById('auto-combine-check').checked,
502
+ apiKey: document.getElementById('api-key-input').value.trim()
503
+ };
504
+ }
505
+
506
+ function validateFormData(formData) {
507
+ if (!formData.text || !formData.voice || !formData.format) {
508
+ console.log('Please fill in all required fields');
509
+ return false;
510
+ }
511
+
512
+ if (formData.text.length > formData.maxLength && formData.validateLength && !formData.autoCombine) {
513
+ console.log(`Text is too long (${formData.text.length} characters). Enable auto-combine or reduce text length.`);
514
+ return false;
515
+ }
516
+
517
+ return true;
518
+ }
519
+
520
+ function clearResults() {
521
+ document.getElementById('audio-result').classList.add('d-none');
522
+ const batchResult = document.getElementById('batch-result');
523
+ if (batchResult) {
524
+ batchResult.classList.add('d-none');
525
+ }
526
+ document.getElementById('validation-result').classList.add('d-none');
527
+ }
528
+
529
+ // Utility functions
530
+ function setLoading(button, loading) {
531
+ if (loading) {
532
+ button.classList.add('loading');
533
+ button.disabled = true;
534
+ } else {
535
+ button.classList.remove('loading');
536
+ button.disabled = false;
537
+ }
538
+ }
539
+
540
+
541
+
542
+ // New unified function using OpenAI-compatible endpoint with auto-combine
543
+ async function generateUnifiedSpeech(formData) {
544
+ const audioResult = document.getElementById('audio-result');
545
+
546
+ // Prepare headers
547
+ const headers = { 'Content-Type': 'application/json' };
548
+
549
+ // Add API key if provided (OpenAI compatible format)
550
+ if (formData.apiKey) {
551
+ headers['Authorization'] = `Bearer ${formData.apiKey}`;
552
+ }
553
+
554
+ const response = await fetch('/v1/audio/speech', {
555
+ method: 'POST',
556
+ headers: headers,
557
+ body: JSON.stringify({
558
+ model: 'gpt-4o-mini-tts',
559
+ input: formData.text,
560
+ voice: formData.voice,
561
+ response_format: formData.format,
562
+ instructions: formData.instructions || undefined,
563
+ auto_combine: formData.autoCombine,
564
+ max_length: formData.maxLength
565
+ })
566
+ });
567
+
568
+ if (!response.ok) {
569
+ const errorData = await response.json();
570
+ const errorMessage = errorData.error?.message || errorData.error || `HTTP ${response.status}`;
571
+ throw new Error(errorMessage);
572
+ }
573
+
574
+ // Get audio data
575
+ const audioBlob = await response.blob();
576
+ currentAudioBlob = audioBlob;
577
+ currentFormat = formData.format;
578
+
579
+ // Create audio URL and setup player
580
+ const audioUrl = URL.createObjectURL(audioBlob);
581
+ const audioPlayer = document.getElementById('audio-player');
582
+ audioPlayer.src = audioUrl;
583
+
584
+ // Get response headers for enhanced display
585
+ const chunksCount = response.headers.get('X-Chunks-Combined') || '1';
586
+ const autoCombineUsed = response.headers.get('X-Auto-Combine') === 'true';
587
+ const originalLength = response.headers.get('X-Original-Text-Length');
588
+
589
+ // Use enhanced display function with new metadata
590
+ displayAudioResult(audioBlob, formData.format, formData.voice, formData.text, {
591
+ chunksCount,
592
+ autoCombineUsed,
593
+ originalLength
594
+ });
595
+
596
+ console.log('Speech generated successfully! Click play to listen.');
597
+ if (autoCombineUsed && chunksCount > 1) {
598
+ console.log(`Auto-combine feature combined ${chunksCount} chunks into a single audio file.`);
599
+ }
600
+
601
+ // Auto-play if user prefers
602
+ if (localStorage.getItem('autoPlay') === 'true') {
603
+ audioPlayer.play().catch(() => {
604
+ // Auto-play blocked, that's fine
605
+ });
606
+ }
607
+ }
608
+
609
+ // Legacy function for backward compatibility
610
+ async function generateSingleSpeech(formData) {
611
+ // Use the new unified function
612
+ await generateUnifiedSpeech(formData);
613
+ }
614
+
615
+
616
+
617
+
618
+
619
+ function downloadAudio() {
620
+ if (!currentAudioBlob) {
621
+ console.log('No audio to download');
622
+ return;
623
+ }
624
+
625
+ const url = URL.createObjectURL(currentAudioBlob);
626
+ const timestamp = new Date().toISOString().slice(0, 19).replace(/:/g, '-');
627
+ downloadFromUrl(url, `ttsfm-speech-${timestamp}.${currentFormat}`);
628
+ URL.revokeObjectURL(url);
629
+ }
630
+
631
+
632
+
633
+ function downloadFromUrl(url, filename) {
634
+ const a = document.createElement('a');
635
+ a.href = url;
636
+ a.download = filename;
637
+ a.style.display = 'none';
638
+ document.body.appendChild(a);
639
+ a.click();
640
+ document.body.removeChild(a);
641
+ }
642
+
643
+ // New enhanced functions
644
+ function clearText() {
645
+ document.getElementById('text-input').value = '';
646
+ updateCharCount();
647
+ clearResults();
648
+ console.log('Text cleared successfully');
649
+ }
650
+
651
+ function loadRandomText() {
652
+ const randomTexts = [
653
+ // News & Information
654
+ "Breaking news: Scientists have discovered a revolutionary new method for generating incredibly natural synthetic speech using advanced neural networks and machine learning algorithms.",
655
+ "Weather update: Today will be partly cloudy with temperatures reaching 75 degrees Fahrenheit. Light winds from the southwest at 5 to 10 miles per hour.",
656
+ "Technology report: The latest advancements in artificial intelligence are revolutionizing how we interact with digital devices and services.",
657
+
658
+ // Educational & Informative
659
+ "The human brain contains approximately 86 billion neurons, each connected to thousands of others, creating a complex network that enables consciousness, memory, and thought.",
660
+ "Photosynthesis is the process by which plants convert sunlight, carbon dioxide, and water into glucose and oxygen, forming the foundation of most life on Earth.",
661
+ "The speed of light in a vacuum is exactly 299,792,458 meters per second, making it one of the fundamental constants of physics.",
662
+
663
+ // Creative & Storytelling
664
+ "Once upon a time, in a land far away, there lived a wise old wizard who could speak to the stars and understand their ancient secrets.",
665
+ "The mysterious lighthouse stood alone on the rocky cliff, its beacon cutting through the fog like a sword of light, guiding lost ships safely home.",
666
+ "In the depths of the enchanted forest, where sunbeams danced through emerald leaves, a young adventurer discovered a hidden path to destiny.",
667
+
668
+ // Business & Professional
669
+ "Our quarterly results demonstrate strong growth across all market segments, with revenue increasing by 23% compared to the same period last year.",
670
+ "The new product launch exceeded expectations, capturing 15% market share within the first six months and establishing our brand as an industry leader.",
671
+ "We are committed to sustainable business practices that benefit our customers, employees, and the environment for generations to come.",
672
+
673
+ // Technical & Programming
674
+ "The TTSFM package provides a comprehensive API for text-to-speech generation with support for multiple voices and audio formats.",
675
+ "Machine learning algorithms process vast amounts of data to identify patterns and make predictions with remarkable accuracy.",
676
+ "Cloud computing has transformed how businesses store, process, and access their data, enabling scalability and flexibility like never before.",
677
+
678
+ // Conversational & Casual
679
+ "Welcome to TTSFM! Experience the future of text-to-speech technology with our premium AI voices.",
680
+ "Good morning! Today is a beautiful day to learn something new and explore the possibilities of text-to-speech technology.",
681
+ "Have you ever wondered what it would be like if your computer could speak with perfect human-like intonation and emotion?"
682
+ ];
683
+
684
+ const randomText = randomTexts[Math.floor(Math.random() * randomTexts.length)];
685
+ document.getElementById('text-input').value = randomText;
686
+ updateCharCount();
687
+ console.log('Random text loaded successfully');
688
+ }
689
+
690
+
691
+
692
+ function resetForm() {
693
+ // Reset form to default values
694
+ document.getElementById('text-input').value = 'Welcome to TTSFM! Experience the future of text-to-speech technology with our premium AI voices. Generate natural, expressive speech for any application.';
695
+ document.getElementById('voice-select').value = 'alloy';
696
+ document.getElementById('format-select').value = 'mp3';
697
+ document.getElementById('instructions-input').value = '';
698
+ document.getElementById('max-length-input').value = '4096';
699
+ document.getElementById('validate-length-check').checked = true;
700
+ const autoCombineCheck = document.getElementById('auto-combine-check');
701
+ if (autoCombineCheck) {
702
+ autoCombineCheck.checked = true;
703
+ }
704
+
705
+ updateCharCount();
706
+ updateGenerateButton();
707
+ clearResults();
708
+ console.log('Form reset to default values');
709
+ }
710
+
711
+ function replayAudio() {
712
+ const audioPlayer = document.getElementById('audio-player');
713
+ if (audioPlayer && audioPlayer.src) {
714
+ audioPlayer.currentTime = 0;
715
+ audioPlayer.play().catch(() => {
716
+ console.log('Unable to replay audio. Please check your browser settings.');
717
+ });
718
+ }
719
+ }
720
+
721
+ function shareAudio() {
722
+ if (navigator.share && currentAudioBlob) {
723
+ const file = new File([currentAudioBlob], `ttsfm-speech.${currentFormat}`, {
724
+ type: `audio/${currentFormat}`
725
+ });
726
+
727
+ navigator.share({
728
+ title: 'TTSFM Generated Speech',
729
+ text: 'Check out this speech generated with TTSFM!',
730
+ files: [file]
731
+ }).catch(() => {
732
+ // Fallback to copying link
733
+ copyAudioLink();
734
+ });
735
+ } else {
736
+ copyAudioLink();
737
+ }
738
+ }
739
+
740
+ function copyAudioLink() {
741
+ const audioPlayer = document.getElementById('audio-player');
742
+ if (audioPlayer && audioPlayer.src) {
743
+ navigator.clipboard.writeText(audioPlayer.src).then(() => {
744
+ console.log('Audio link copied to clipboard!');
745
+ }).catch(() => {
746
+ console.log('Unable to copy link. Please try downloading the audio instead.');
747
+ });
748
+ }
749
+ }
750
+
751
+ function updateVoiceInfo() {
752
+ const voiceSelect = document.getElementById('voice-select');
753
+ const previewBtn = document.getElementById('preview-voice-btn');
754
+
755
+ if (voiceSelect.value) {
756
+ previewBtn.disabled = false;
757
+ previewBtn.onclick = () => previewVoice(voiceSelect.value);
758
+ } else {
759
+ previewBtn.disabled = true;
760
+ }
761
+ }
762
+
763
+ function updateFormatInfo() {
764
+ const formatSelect = document.getElementById('format-select');
765
+ const formatInfo = document.getElementById('format-info');
766
+
767
+ const formatDescriptions = {
768
+ 'mp3': '🎵 MP3 - Good quality, small file size. Best for web and general use.',
769
+ 'opus': '📻 OPUS - Excellent quality, small file size. Best for streaming and VoIP.',
770
+ 'aac': '📱 AAC - Good quality, medium file size. Best for Apple devices and streaming.',
771
+ 'flac': '💿 FLAC - Lossless quality, large file size. Best for archival and high-quality audio.',
772
+ 'wav': '🎧 WAV - Lossless quality, large file size. Best for professional audio production.',
773
+ 'pcm': '🔊 PCM - Raw audio data, large file size. Best for audio processing.'
774
+ };
775
+
776
+ if (formatInfo && formatSelect.value) {
777
+ formatInfo.textContent = formatDescriptions[formatSelect.value] || 'High-quality audio format';
778
+ }
779
+ }
780
+
781
+ function previewVoice(voiceId) {
782
+ // This would typically play a short preview of the voice
783
+ console.log(`Voice preview for ${voiceId} - Feature coming soon!`);
784
+ }
785
+
786
+ // Enhanced audio result display with auto-combine metadata
787
+ function displayAudioResult(audioBlob, format, voice, text, metadata = {}) {
788
+ const audioResult = document.getElementById('audio-result');
789
+ const audioPlayer = document.getElementById('audio-player');
790
+ const audioInfo = document.getElementById('audio-info');
791
+
792
+ // Create audio URL and setup player
793
+ const audioUrl = URL.createObjectURL(audioBlob);
794
+ audioPlayer.src = audioUrl;
795
+
796
+ // Update audio stats
797
+ const sizeKB = (audioBlob.size / 1024).toFixed(1);
798
+ document.getElementById('audio-size').textContent = `${sizeKB} KB`;
799
+ document.getElementById('audio-format').textContent = format.toUpperCase();
800
+ document.getElementById('audio-voice').textContent = voice.charAt(0).toUpperCase() + voice.slice(1);
801
+
802
+ // Update audio info safely without innerHTML
803
+ // Clear existing content
804
+ audioInfo.textContent = '';
805
+
806
+ // Create and append icon element
807
+ const icon = document.createElement('i');
808
+ icon.className = 'fas fa-check-circle text-success me-1';
809
+ audioInfo.appendChild(icon);
810
+
811
+ // Create info text with auto-combine details
812
+ let infoText = `Generated successfully • ${sizeKB} KB • ${format.toUpperCase()}`;
813
+
814
+ if (metadata.autoCombineUsed && metadata.chunksCount > 1) {
815
+ infoText += ` • Auto-combined ${metadata.chunksCount} chunks`;
816
+
817
+ // Add a special badge for auto-combine
818
+ const badge = document.createElement('span');
819
+ badge.className = 'badge bg-primary ms-2';
820
+ badge.innerHTML = '<i class="fas fa-magic me-1"></i>Auto-combined';
821
+ audioInfo.appendChild(document.createTextNode(infoText));
822
+ audioInfo.appendChild(badge);
823
+ } else {
824
+ // Create and append text content (safely escaped)
825
+ const textNode = document.createTextNode(infoText);
826
+ audioInfo.appendChild(textNode);
827
+ }
828
+
829
+ // Show result with animation
830
+ audioResult.classList.remove('d-none');
831
+ audioResult.classList.add('fade-in');
832
+
833
+ // Update duration when metadata loads
834
+ audioPlayer.addEventListener('loadedmetadata', function() {
835
+ const duration = Math.round(audioPlayer.duration);
836
+ document.getElementById('audio-duration').textContent = `${duration}s`;
837
+ }, { once: true });
838
+
839
+ // Scroll to result
840
+ audioResult.scrollIntoView({ behavior: 'smooth', block: 'nearest' });
841
+ }
842
+
843
+ // API Key visibility toggle function
844
+ function toggleApiKeyVisibility() {
845
+ const apiKeyInput = document.getElementById('api-key-input');
846
+ const eyeIcon = document.getElementById('api-key-eye-icon');
847
+
848
+ if (apiKeyInput.type === 'password') {
849
+ apiKeyInput.type = 'text';
850
+ eyeIcon.className = 'fas fa-eye-slash';
851
+ } else {
852
+ apiKeyInput.type = 'password';
853
+ eyeIcon.className = 'fas fa-eye';
854
+ }
855
+ }
856
+
857
+ // Export functions for use in HTML
858
+ window.clearText = clearText;
859
+ window.loadRandomText = loadRandomText;
860
+ window.resetForm = resetForm;
861
+ window.toggleApiKeyVisibility = toggleApiKeyVisibility;
ttsfm-web/static/js/websocket-tts.js ADDED
@@ -0,0 +1,366 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * WebSocket TTS Streaming Client
3
+ *
4
+ * Because apparently HTTP requests are so 2023.
5
+ * Now we need real-time streaming for everything.
6
+ */
7
+
8
+ class WebSocketTTSClient {
9
+ constructor(options = {}) {
10
+ this.socketUrl = options.socketUrl || window.location.origin;
11
+ this.socket = null;
12
+ this.activeRequests = new Map();
13
+ this.reconnectAttempts = 0;
14
+ this.maxReconnectAttempts = options.maxReconnectAttempts || 5;
15
+ this.reconnectDelay = options.reconnectDelay || 1000;
16
+ this.debug = options.debug || false;
17
+
18
+ // Audio context for seamless playback
19
+ this.audioContext = null;
20
+ this.audioQueue = new Map(); // request_id -> audio chunks
21
+
22
+ // Event handlers
23
+ this.onConnect = options.onConnect || (() => {});
24
+ this.onDisconnect = options.onDisconnect || (() => {});
25
+ this.onError = options.onError || ((error) => console.error('WebSocket error:', error));
26
+
27
+ // Initialize
28
+ this.connect();
29
+ }
30
+
31
+ connect() {
32
+ if (this.socket && this.socket.connected) {
33
+ this.log('Already connected');
34
+ return;
35
+ }
36
+
37
+ this.log('Connecting to WebSocket server...');
38
+
39
+ // Initialize Socket.IO connection
40
+ this.socket = io(this.socketUrl, {
41
+ transports: ['websocket', 'polling'],
42
+ reconnection: true,
43
+ reconnectionAttempts: this.maxReconnectAttempts,
44
+ reconnectionDelay: this.reconnectDelay
45
+ });
46
+
47
+ // Set up event handlers
48
+ this.setupEventHandlers();
49
+ }
50
+
51
+ setupEventHandlers() {
52
+ // Connection events
53
+ this.socket.on('connect', () => {
54
+ this.log('Connected to WebSocket server');
55
+ this.reconnectAttempts = 0;
56
+ this.onConnect();
57
+ });
58
+
59
+ this.socket.on('disconnect', (reason) => {
60
+ this.log('Disconnected from WebSocket server:', reason);
61
+ this.onDisconnect(reason);
62
+ });
63
+
64
+ this.socket.on('connect_error', (error) => {
65
+ this.log('Connection error:', error);
66
+ this.reconnectAttempts++;
67
+ this.onError({
68
+ type: 'connection_error',
69
+ message: error.message,
70
+ attempts: this.reconnectAttempts
71
+ });
72
+ });
73
+
74
+ // TTS streaming events
75
+ this.socket.on('connected', (data) => {
76
+ this.log('Session established:', data.session_id);
77
+ });
78
+
79
+ this.socket.on('stream_started', (data) => {
80
+ this.log('Stream started:', data.request_id);
81
+ const request = this.activeRequests.get(data.request_id);
82
+ if (request && request.onStart) {
83
+ request.onStart(data);
84
+ }
85
+ });
86
+
87
+ this.socket.on('audio_chunk', (data) => {
88
+ this.handleAudioChunk(data);
89
+ });
90
+
91
+ this.socket.on('stream_progress', (data) => {
92
+ this.handleProgress(data);
93
+ });
94
+
95
+ this.socket.on('stream_complete', (data) => {
96
+ this.handleStreamComplete(data);
97
+ });
98
+
99
+ this.socket.on('stream_error', (data) => {
100
+ this.handleStreamError(data);
101
+ });
102
+ }
103
+
104
+ /**
105
+ * Generate speech with real-time streaming
106
+ */
107
+ generateSpeech(text, options = {}) {
108
+ return new Promise((resolve, reject) => {
109
+ if (!this.socket || !this.socket.connected) {
110
+ reject(new Error('WebSocket not connected'));
111
+ return;
112
+ }
113
+
114
+ const requestId = this.generateRequestId();
115
+ const audioChunks = [];
116
+
117
+ // Store request info
118
+ this.activeRequests.set(requestId, {
119
+ resolve,
120
+ reject,
121
+ audioChunks,
122
+ options,
123
+ startTime: Date.now(),
124
+ onStart: options.onStart,
125
+ onProgress: options.onProgress,
126
+ onChunk: options.onChunk,
127
+ onComplete: options.onComplete,
128
+ onError: options.onError
129
+ });
130
+
131
+ // Initialize audio queue for this request
132
+ this.audioQueue.set(requestId, []);
133
+
134
+ // Emit generation request
135
+ this.socket.emit('generate_stream', {
136
+ request_id: requestId,
137
+ text: text,
138
+ voice: options.voice || 'alloy',
139
+ format: options.format || 'mp3',
140
+ chunk_size: options.chunkSize || 1024
141
+ });
142
+
143
+ this.log('Requested speech generation:', requestId);
144
+ });
145
+ }
146
+
147
+ handleAudioChunk(data) {
148
+ const request = this.activeRequests.get(data.request_id);
149
+ if (!request) {
150
+ this.log('Received chunk for unknown request:', data.request_id);
151
+ return;
152
+ }
153
+
154
+ // Convert hex string back to binary
155
+ const audioData = this.hexToArrayBuffer(data.audio_data);
156
+
157
+ // Store chunk
158
+ request.audioChunks.push({
159
+ index: data.chunk_index,
160
+ data: audioData,
161
+ duration: data.duration,
162
+ format: data.format
163
+ });
164
+
165
+ // Add to audio queue for streaming playback
166
+ const queue = this.audioQueue.get(data.request_id);
167
+ if (queue) {
168
+ queue.push(audioData);
169
+ }
170
+
171
+ // Call chunk handler if provided
172
+ if (request.onChunk) {
173
+ request.onChunk({
174
+ chunkIndex: data.chunk_index,
175
+ totalChunks: data.total_chunks,
176
+ audioData: audioData,
177
+ duration: data.duration,
178
+ text: data.chunk_text
179
+ });
180
+ }
181
+
182
+ this.log(`Received chunk ${data.chunk_index + 1}/${data.total_chunks} for request ${data.request_id}`);
183
+ }
184
+
185
+ handleProgress(data) {
186
+ const request = this.activeRequests.get(data.request_id);
187
+ if (request && request.onProgress) {
188
+ request.onProgress({
189
+ progress: data.progress,
190
+ chunksCompleted: data.chunks_completed,
191
+ totalChunks: data.total_chunks,
192
+ status: data.status
193
+ });
194
+ }
195
+ }
196
+
197
+ handleStreamComplete(data) {
198
+ const request = this.activeRequests.get(data.request_id);
199
+ if (!request) {
200
+ this.log('Completion for unknown request:', data.request_id);
201
+ return;
202
+ }
203
+
204
+ // Sort chunks by index
205
+ request.audioChunks.sort((a, b) => a.index - b.index);
206
+
207
+ // Combine all audio chunks
208
+ const combinedAudio = this.combineAudioChunks(request.audioChunks);
209
+
210
+ const result = {
211
+ requestId: data.request_id,
212
+ audioData: combinedAudio,
213
+ chunks: request.audioChunks,
214
+ duration: request.audioChunks.reduce((sum, chunk) => sum + chunk.duration, 0),
215
+ generationTime: Date.now() - request.startTime,
216
+ format: request.audioChunks[0]?.format || 'mp3'
217
+ };
218
+
219
+ // Call complete handler
220
+ if (request.onComplete) {
221
+ request.onComplete(result);
222
+ }
223
+
224
+ // Resolve promise
225
+ request.resolve(result);
226
+
227
+ // Cleanup
228
+ this.activeRequests.delete(data.request_id);
229
+ this.audioQueue.delete(data.request_id);
230
+
231
+ this.log('Stream completed:', data.request_id);
232
+ }
233
+
234
+ handleStreamError(data) {
235
+ const request = this.activeRequests.get(data.request_id);
236
+ if (!request) {
237
+ this.log('Error for unknown request:', data.request_id);
238
+ return;
239
+ }
240
+
241
+ const error = new Error(data.error);
242
+ error.requestId = data.request_id;
243
+ error.timestamp = data.timestamp;
244
+
245
+ // Call error handler
246
+ if (request.onError) {
247
+ request.onError(error);
248
+ }
249
+
250
+ // Reject promise
251
+ request.reject(error);
252
+
253
+ // Cleanup
254
+ this.activeRequests.delete(data.request_id);
255
+ this.audioQueue.delete(data.request_id);
256
+
257
+ this.log('Stream error:', data.request_id, data.error);
258
+ }
259
+
260
+ /**
261
+ * Cancel an active stream
262
+ */
263
+ cancelStream(requestId) {
264
+ if (!this.socket || !this.socket.connected) {
265
+ throw new Error('WebSocket not connected');
266
+ }
267
+
268
+ this.socket.emit('cancel_stream', { request_id: requestId });
269
+
270
+ // Clean up local state
271
+ const request = this.activeRequests.get(requestId);
272
+ if (request) {
273
+ request.reject(new Error('Stream cancelled by user'));
274
+ this.activeRequests.delete(requestId);
275
+ this.audioQueue.delete(requestId);
276
+ }
277
+ }
278
+
279
+ /**
280
+ * Combine audio chunks into a single buffer
281
+ */
282
+ combineAudioChunks(chunks) {
283
+ if (chunks.length === 0) return new ArrayBuffer(0);
284
+
285
+ // Calculate total size
286
+ const totalSize = chunks.reduce((sum, chunk) => sum + chunk.data.byteLength, 0);
287
+
288
+ // Create combined buffer
289
+ const combined = new ArrayBuffer(totalSize);
290
+ const view = new Uint8Array(combined);
291
+
292
+ let offset = 0;
293
+ for (const chunk of chunks) {
294
+ view.set(new Uint8Array(chunk.data), offset);
295
+ offset += chunk.data.byteLength;
296
+ }
297
+
298
+ return combined;
299
+ }
300
+
301
+ /**
302
+ * Play audio directly (experimental streaming playback)
303
+ */
304
+ async playAudioStream(requestId) {
305
+ if (!this.audioContext) {
306
+ this.audioContext = new (window.AudioContext || window.webkitAudioContext)();
307
+ }
308
+
309
+ const queue = this.audioQueue.get(requestId);
310
+ if (!queue) {
311
+ throw new Error('No audio queue found for request');
312
+ }
313
+
314
+ // This is a simplified version - real implementation would need
315
+ // proper audio decoding and buffering for seamless playback
316
+ this.log('Streaming audio playback not fully implemented yet');
317
+ }
318
+
319
+ /**
320
+ * Utility functions
321
+ */
322
+ hexToArrayBuffer(hex) {
323
+ const bytes = new Uint8Array(hex.length / 2);
324
+ for (let i = 0; i < hex.length; i += 2) {
325
+ bytes[i / 2] = parseInt(hex.substr(i, 2), 16);
326
+ }
327
+ return bytes.buffer;
328
+ }
329
+
330
+ generateRequestId() {
331
+ return `req_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
332
+ }
333
+
334
+ log(...args) {
335
+ if (this.debug) {
336
+ console.log('[WebSocketTTS]', ...args);
337
+ }
338
+ }
339
+
340
+ /**
341
+ * Get connection status
342
+ */
343
+ isConnected() {
344
+ return this.socket && this.socket.connected;
345
+ }
346
+
347
+ /**
348
+ * Disconnect from server
349
+ */
350
+ disconnect() {
351
+ if (this.socket) {
352
+ this.socket.disconnect();
353
+ this.socket = null;
354
+ }
355
+
356
+ // Clear all active requests
357
+ for (const [requestId, request] of this.activeRequests) {
358
+ request.reject(new Error('Client disconnected'));
359
+ }
360
+ this.activeRequests.clear();
361
+ this.audioQueue.clear();
362
+ }
363
+ }
364
+
365
+ // Export for use
366
+ window.WebSocketTTSClient = WebSocketTTSClient;
ttsfm-web/templates/base.html CHANGED
@@ -1,356 +1,363 @@
1
- <!DOCTYPE html>
2
- <html lang="en">
3
- <head>
4
- <!-- Cronitor RUM -->
5
- <script async src="https://rum.cronitor.io/script.js"></script>
6
- <script>
7
- window.cronitor = window.cronitor || function() { (window.cronitor.q = window.cronitor.q || []).push(arguments); };
8
- cronitor('config', { clientKey: 'bdc4a3faf9c16d842b5099e1a0e3ba6f' });
9
- </script>
10
-
11
- <meta charset="UTF-8">
12
- <meta name="viewport" content="width=device-width, initial-scale=1.0">
13
- <title>{% block title %}TTSFM - Text-to-Speech{% endblock %}</title>
14
-
15
- <!-- Bootstrap CSS -->
16
- <link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet">
17
-
18
- <!-- Font Awesome -->
19
- <link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css" rel="stylesheet">
20
-
21
- <!-- Google Fonts -->
22
- <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap" rel="stylesheet">
23
-
24
- <!-- Custom CSS -->
25
- <link href="{{ url_for('static', filename='css/style.css') }}" rel="stylesheet">
26
-
27
- <!-- Additional Performance Optimizations -->
28
- <link rel="preconnect" href="https://fonts.googleapis.com">
29
- <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
30
-
31
- <!-- Favicon -->
32
- <link rel="icon" type="image/svg+xml" href="data:image/svg+xml,<svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 100 100'><text y='.9em' font-size='90'>🎤</text></svg>">
33
-
34
- <!-- Meta tags for better SEO and social sharing -->
35
- <meta name="description" content="TTSFM - A Python client for text-to-speech APIs. Simple to use with support for multiple voices and audio formats.">
36
- <meta name="keywords" content="text-to-speech, TTS, python, API, voice synthesis, audio generation">
37
- <meta name="author" content="TTSFM">
38
-
39
- <!-- Open Graph / Facebook -->
40
- <meta property="og:type" content="website">
41
- <meta property="og:url" content="{{ request.url }}">
42
- <meta property="og:title" content="{% block og_title %}TTSFM - Python Text-to-Speech Client{% endblock %}">
43
- <meta property="og:description" content="A Python client for text-to-speech APIs. Simple to use with support for multiple voices and audio formats.">
44
-
45
- <!-- Twitter -->
46
- <meta property="twitter:card" content="summary">
47
- <meta property="twitter:url" content="{{ request.url }}">
48
- <meta property="twitter:title" content="{% block twitter_title %}TTSFM - Python Text-to-Speech Client{% endblock %}">
49
- <meta property="twitter:description" content="A Python client for text-to-speech APIs. Simple to use with support for multiple voices and audio formats.">
50
-
51
- {% block extra_css %}{% endblock %}
52
- </head>
53
- <body>
54
- <!-- Skip to content link for accessibility -->
55
- <a href="#main-content" class="skip-link">Skip to main content</a>
56
-
57
- <!-- Clean Navigation -->
58
- <nav class="navbar navbar-expand-lg fixed-top" style="background-color: rgba(255, 255, 255, 0.95); backdrop-filter: blur(10px); border-bottom: 1px solid #e5e7eb;">
59
- <div class="container">
60
- <a class="navbar-brand" href="{{ url_for('index') }}">
61
- <i class="fas fa-microphone-alt me-2"></i>
62
- <span class="fw-bold">TTSFM</span>
63
- <span class="badge bg-primary ms-2 small">v3.0</span>
64
- </a>
65
-
66
- <button class="navbar-toggler border-0" type="button" data-bs-toggle="collapse" data-bs-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation">
67
- <span class="navbar-toggler-icon"></span>
68
- </button>
69
-
70
- <div class="collapse navbar-collapse" id="navbarNav">
71
- <ul class="navbar-nav me-auto">
72
- <li class="nav-item">
73
- <a class="nav-link" href="{{ url_for('index') }}" aria-label="Home page">
74
- <i class="fas fa-home me-1"></i>Home
75
- </a>
76
- </li>
77
- <li class="nav-item">
78
- <a class="nav-link" href="{{ url_for('playground') }}" aria-label="Interactive playground">
79
- <i class="fas fa-play me-1"></i>Playground
80
- </a>
81
- </li>
82
- <li class="nav-item">
83
- <a class="nav-link" href="{{ url_for('docs') }}" aria-label="API documentation">
84
- <i class="fas fa-book me-1"></i>Documentation
85
- </a>
86
- </li>
87
- </ul>
88
-
89
- <ul class="navbar-nav">
90
- <li class="nav-item">
91
- <span class="navbar-text d-flex align-items-center">
92
- <span id="status-indicator" class="status-indicator status-offline" aria-hidden="true"></span>
93
- <span id="status-text" class="small">Checking...</span>
94
- </span>
95
- </li>
96
- <li class="nav-item ms-2">
97
- <a class="btn btn-outline-primary btn-sm" href="https://github.com/dbccccccc/ttsfm" target="_blank" rel="noopener noreferrer" aria-label="View source code on GitHub">
98
- <i class="fab fa-github me-1"></i>GitHub
99
- </a>
100
- </li>
101
- </ul>
102
- </div>
103
- </div>
104
- </nav>
105
-
106
- <!-- Main Content -->
107
- <main id="main-content" style="padding-top: 76px;">
108
- {% block content %}{% endblock %}
109
- </main>
110
-
111
- <!-- Simplified Footer -->
112
- <footer class="footer py-4" style="background-color: #f8fafc; border-top: 1px solid #e5e7eb;" role="contentinfo">
113
- <div class="container">
114
- <div class="row align-items-center">
115
- <div class="col-md-6">
116
- <div class="d-flex align-items-center mb-2 mb-md-0">
117
- <i class="fas fa-microphone-alt me-2 text-primary"></i>
118
- <strong class="text-dark">TTSFM</strong>
119
- <span class="ms-2 text-muted">Free Text-to-Speech for Python</span>
120
- </div>
121
- </div>
122
- <div class="col-md-6 text-md-end">
123
- <div class="d-flex justify-content-md-end gap-3">
124
- <a href="{{ url_for('playground') }}" class="text-decoration-none" style="color: #6b7280;">
125
- <i class="fas fa-play me-1"></i>Demo
126
- </a>
127
- <a href="{{ url_for('docs') }}" class="text-decoration-none" style="color: #6b7280;">
128
- <i class="fas fa-book me-1"></i>Docs
129
- </a>
130
- <a href="https://github.com/dbccccccc/ttsfm" class="text-decoration-none" style="color: #6b7280;" target="_blank" rel="noopener noreferrer">
131
- <i class="fab fa-github me-1"></i>GitHub
132
- </a>
133
- </div>
134
- </div>
135
- </div>
136
- <hr class="my-3" style="border-color: #e5e7eb;">
137
- <div class="row align-items-center">
138
- <div class="col-md-6">
139
- <small class="text-muted">&copy; 2024 TTSFM. MIT License.</small>
140
- </div>
141
- <div class="col-md-6 text-md-end">
142
- <small class="text-muted">
143
- <span id="footer-status" class="d-inline-flex align-items-center">
144
- <span class="status-indicator status-offline me-2"></span>
145
- Status: <span id="footer-status-text" class="ms-1">Checking...</span>
146
- </span>
147
- </small>
148
- </div>
149
- </div>
150
- </div>
151
- </footer>
152
-
153
- <!-- Bootstrap JS -->
154
- <script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/js/bootstrap.bundle.min.js"></script>
155
-
156
- <!-- Enhanced Common JavaScript -->
157
- <script>
158
- // Enhanced service status checking
159
- async function checkStatus() {
160
- try {
161
- const response = await fetch('/api/health');
162
- const data = await response.json();
163
-
164
- const indicator = document.getElementById('status-indicator');
165
- const text = document.getElementById('status-text');
166
- const footerIndicator = document.querySelector('#footer-status .status-indicator');
167
- const footerText = document.getElementById('footer-status-text');
168
-
169
- if (response.ok && data.status === 'healthy') {
170
- // Update navbar status
171
- indicator.className = 'status-indicator status-online';
172
- text.textContent = 'Online';
173
-
174
- // Update footer status
175
- if (footerIndicator) footerIndicator.className = 'status-indicator status-online';
176
- if (footerText) footerText.textContent = 'Online';
177
- } else {
178
- // Update navbar status
179
- indicator.className = 'status-indicator status-offline';
180
- text.textContent = 'Offline';
181
-
182
- // Update footer status
183
- if (footerIndicator) footerIndicator.className = 'status-indicator status-offline';
184
- if (footerText) footerText.textContent = 'Offline';
185
- }
186
- } catch (error) {
187
- // Update navbar status
188
- const indicator = document.getElementById('status-indicator');
189
- const text = document.getElementById('status-text');
190
- indicator.className = 'status-indicator status-offline';
191
- text.textContent = 'Offline';
192
-
193
- // Update footer status
194
- const footerIndicator = document.querySelector('#footer-status .status-indicator');
195
- const footerText = document.getElementById('footer-status-text');
196
- if (footerIndicator) footerIndicator.className = 'status-indicator status-offline';
197
- if (footerText) footerText.textContent = 'Offline';
198
- }
199
- }
200
-
201
- // Enhanced page initialization
202
- document.addEventListener('DOMContentLoaded', function() {
203
- // Check status immediately and periodically
204
- checkStatus();
205
- setInterval(checkStatus, 30000); // Check every 30 seconds
206
-
207
- // Initialize tooltips
208
- if (typeof bootstrap !== 'undefined') {
209
- const tooltipTriggerList = [].slice.call(document.querySelectorAll('[data-bs-toggle="tooltip"]'));
210
- tooltipTriggerList.map(function (tooltipTriggerEl) {
211
- return new bootstrap.Tooltip(tooltipTriggerEl);
212
- });
213
- }
214
-
215
- // Add smooth scrolling for anchor links
216
- document.querySelectorAll('a[href^="#"]').forEach(anchor => {
217
- anchor.addEventListener('click', function (e) {
218
- const target = document.querySelector(this.getAttribute('href'));
219
- if (target) {
220
- e.preventDefault();
221
- target.scrollIntoView({
222
- behavior: 'smooth',
223
- block: 'start'
224
- });
225
- }
226
- });
227
- });
228
-
229
- // Add fade-in animation to main content
230
- const mainContent = document.querySelector('main');
231
- if (mainContent) {
232
- mainContent.classList.add('fade-in');
233
- }
234
-
235
- // Add loading states to external links
236
- document.querySelectorAll('a[target="_blank"]').forEach(link => {
237
- link.addEventListener('click', function() {
238
- this.style.opacity = '0.7';
239
- setTimeout(() => {
240
- this.style.opacity = '1';
241
- }, 1000);
242
- });
243
- });
244
- });
245
-
246
- // Enhanced utility function to show loading state
247
- function setLoading(button, loading) {
248
- if (loading) {
249
- button.classList.add('loading');
250
- button.disabled = true;
251
- button.style.cursor = 'wait';
252
- } else {
253
- button.classList.remove('loading');
254
- button.disabled = false;
255
- button.style.cursor = 'pointer';
256
- }
257
- }
258
-
259
- // Enhanced utility function to show alerts
260
- function showAlert(message, type = 'info', duration = 5000) {
261
- const alertDiv = document.createElement('div');
262
- alertDiv.className = `alert alert-${type} alert-dismissible fade show fade-in`;
263
- alertDiv.style.position = 'relative';
264
- alertDiv.style.zIndex = '1050';
265
- alertDiv.innerHTML = `
266
- <i class="fas fa-${getAlertIcon(type)} me-2"></i>
267
- ${message}
268
- <button type="button" class="btn-close" data-bs-dismiss="alert" aria-label="Close"></button>
269
- `;
270
-
271
- // Find the best container to insert the alert
272
- const container = document.querySelector('main .container') || document.querySelector('.container') || document.body;
273
- if (container) {
274
- container.insertBefore(alertDiv, container.firstChild);
275
-
276
- // Auto-dismiss after specified duration
277
- setTimeout(() => {
278
- if (alertDiv.parentNode) {
279
- alertDiv.classList.remove('show');
280
- setTimeout(() => {
281
- if (alertDiv.parentNode) {
282
- alertDiv.remove();
283
- }
284
- }, 150);
285
- }
286
- }, duration);
287
-
288
- // Scroll to alert if it's not visible
289
- alertDiv.scrollIntoView({ behavior: 'smooth', block: 'nearest' });
290
- }
291
- }
292
-
293
- // Helper function to get appropriate icon for alert type
294
- function getAlertIcon(type) {
295
- const icons = {
296
- 'success': 'check-circle',
297
- 'danger': 'exclamation-triangle',
298
- 'warning': 'exclamation-triangle',
299
- 'info': 'info-circle',
300
- 'primary': 'info-circle'
301
- };
302
- return icons[type] || 'info-circle';
303
- }
304
-
305
- // Enhanced error handling for fetch requests
306
- async function safeFetch(url, options = {}) {
307
- try {
308
- const response = await fetch(url, options);
309
- if (!response.ok) {
310
- throw new Error(`HTTP ${response.status}: ${response.statusText}`);
311
- }
312
- return response;
313
- } catch (error) {
314
- console.error('Fetch error:', error);
315
- showAlert(`Network error: ${error.message}`, 'danger');
316
- throw error;
317
- }
318
- }
319
-
320
- // Performance monitoring
321
- window.addEventListener('load', function() {
322
- // Log page load time
323
- const loadTime = performance.now();
324
- console.log(`Page loaded in ${Math.round(loadTime)}ms`);
325
-
326
- // Check for slow loading resources
327
- if (loadTime > 3000) {
328
- console.warn('Page load time is slow. Consider optimizing resources.');
329
- }
330
- });
331
-
332
- // Keyboard shortcuts
333
- document.addEventListener('keydown', function(e) {
334
- // Alt + H for home
335
- if (e.altKey && e.key === 'h') {
336
- e.preventDefault();
337
- window.location.href = '{{ url_for("index") }}';
338
- }
339
-
340
- // Alt + P for playground
341
- if (e.altKey && e.key === 'p') {
342
- e.preventDefault();
343
- window.location.href = '{{ url_for("playground") }}';
344
- }
345
-
346
- // Alt + D for docs
347
- if (e.altKey && e.key === 'd') {
348
- e.preventDefault();
349
- window.location.href = '{{ url_for("docs") }}';
350
- }
351
- });
352
- </script>
353
-
354
- {% block extra_js %}{% endblock %}
355
- </body>
356
- </html>
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="{{ get_locale() }}">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>{% block title %}TTSFM - {{ _('nav.home') }}{% endblock %}</title>
7
+
8
+ <!-- Bootstrap CSS -->
9
+ <link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet">
10
+
11
+ <!-- Font Awesome -->
12
+ <link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css" rel="stylesheet">
13
+
14
+ <!-- Google Fonts -->
15
+ <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap" rel="stylesheet">
16
+
17
+ <!-- Custom CSS -->
18
+ <link href="{{ url_for('static', filename='css/style.css') }}" rel="stylesheet">
19
+
20
+ <!-- Additional Performance Optimizations -->
21
+ <link rel="preconnect" href="https://fonts.googleapis.com">
22
+ <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
23
+
24
+ <!-- Favicon -->
25
+ <link rel="icon" type="image/svg+xml" href="data:image/svg+xml,<svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 100 100'><text y='.9em' font-size='90'>🎤</text></svg>">
26
+
27
+ <!-- Meta tags for better SEO and social sharing -->
28
+ <meta name="description" content="TTSFM - A Python client for text-to-speech APIs. Simple to use with support for multiple voices and audio formats.">
29
+ <meta name="keywords" content="text-to-speech, TTS, python, API, voice synthesis, audio generation">
30
+ <meta name="author" content="TTSFM">
31
+
32
+ <!-- Open Graph / Facebook -->
33
+ <meta property="og:type" content="website">
34
+ <meta property="og:url" content="{{ request.url }}">
35
+ <meta property="og:title" content="{% block og_title %}TTSFM - Python Text-to-Speech Client{% endblock %}">
36
+ <meta property="og:description" content="A Python client for text-to-speech APIs. Simple to use with support for multiple voices and audio formats.">
37
+
38
+ <!-- Twitter -->
39
+ <meta property="twitter:card" content="summary">
40
+ <meta property="twitter:url" content="{{ request.url }}">
41
+ <meta property="twitter:title" content="{% block twitter_title %}TTSFM - Python Text-to-Speech Client{% endblock %}">
42
+ <meta property="twitter:description" content="A Python client for text-to-speech APIs. Simple to use with support for multiple voices and audio formats.">
43
+
44
+ {% block extra_css %}{% endblock %}
45
+
46
+ <!-- Language button styling -->
47
+ <style>
48
+ /* Language dropdown button styling */
49
+ #languageDropdown {
50
+ border-color: #6c757d;
51
+ color: #6c757d;
52
+ transition: all 0.2s ease-in-out;
53
+ font-size: 0.875rem;
54
+ }
55
+
56
+ #languageDropdown:hover {
57
+ border-color: #495057;
58
+ color: #495057;
59
+ background-color: #f8f9fa;
60
+ }
61
+
62
+ #languageDropdown:focus {
63
+ box-shadow: 0 0 0 0.2rem rgba(108, 117, 125, 0.25);
64
+ }
65
+
66
+ /* Responsive language button */
67
+ @media (max-width: 576px) {
68
+ #languageDropdown {
69
+ font-size: 0.75rem;
70
+ padding: 0.25rem 0.5rem;
71
+ }
72
+ }
73
+
74
+ /* Ensure consistent button heights */
75
+ .navbar-nav .btn {
76
+ display: inline-flex;
77
+ align-items: center;
78
+ }
79
+ </style>
80
+ </head>
81
+ <body>
82
+ <!-- Skip to content link for accessibility -->
83
+ <a href="#main-content" class="skip-link">Skip to main content</a>
84
+
85
+ <!-- Clean Navigation -->
86
+ <nav class="navbar navbar-expand-lg fixed-top" style="background-color: rgba(255, 255, 255, 0.95); backdrop-filter: blur(10px); border-bottom: 1px solid #e5e7eb;">
87
+ <div class="container">
88
+ <a class="navbar-brand" href="{{ url_for('index') }}">
89
+ <i class="fas fa-microphone-alt me-2"></i>
90
+ <span class="fw-bold">TTSFM</span>
91
+ <span class="badge bg-primary ms-2 small">v3.2.2</span>
92
+ </a>
93
+
94
+ <button class="navbar-toggler border-0" type="button" data-bs-toggle="collapse" data-bs-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation">
95
+ <span class="navbar-toggler-icon"></span>
96
+ </button>
97
+
98
+ <div class="collapse navbar-collapse" id="navbarNav">
99
+ <ul class="navbar-nav me-auto">
100
+ <li class="nav-item">
101
+ <a class="nav-link" href="{{ url_for('index') }}" aria-label="{{ _('nav.home') }}">
102
+ <i class="fas fa-home me-1"></i>{{ _('nav.home') }}
103
+ </a>
104
+ </li>
105
+ <li class="nav-item">
106
+ <a class="nav-link" href="{{ url_for('playground') }}" aria-label="{{ _('nav.playground') }}">
107
+ <i class="fas fa-play me-1"></i>{{ _('nav.playground') }}
108
+ </a>
109
+ </li>
110
+ <li class="nav-item">
111
+ <a class="nav-link" href="{{ url_for('docs') }}" aria-label="{{ _('nav.documentation') }}">
112
+ <i class="fas fa-book me-1"></i>{{ _('nav.documentation') }}
113
+ </a>
114
+ </li>
115
+ </ul>
116
+
117
+ <ul class="navbar-nav">
118
+ <li class="nav-item">
119
+ <span class="navbar-text d-flex align-items-center">
120
+ <span id="status-indicator" class="status-indicator status-offline" aria-hidden="true"></span>
121
+ <span id="status-text" class="small">{{ _('nav.status_checking') }}</span>
122
+ </span>
123
+ </li>
124
+ <li class="nav-item dropdown ms-3">
125
+ <button class="btn btn-outline-secondary btn-sm dropdown-toggle" type="button" id="languageDropdown" data-bs-toggle="dropdown" aria-expanded="false" title="{{ _('common.language') }}">
126
+ {% if get_locale() == 'zh' %}🇨🇳 中文{% else %}🇺🇸 English{% endif %}
127
+ </button>
128
+ <ul class="dropdown-menu" aria-labelledby="languageDropdown">
129
+ {% for lang_code, lang_name in get_supported_languages().items() %}
130
+ <li>
131
+ <a class="dropdown-item{% if get_locale() == lang_code %} active{% endif %}"
132
+ href="{{ url_for('set_language', lang_code=lang_code) }}">
133
+ {% if lang_code == 'en' %}🇺🇸{% elif lang_code == 'zh' %}🇨🇳{% endif %} {{ lang_name }}
134
+ </a>
135
+ </li>
136
+ {% endfor %}
137
+ </ul>
138
+ </li>
139
+ <li class="nav-item ms-3">
140
+ <a class="btn btn-outline-primary btn-sm" href="https://github.com/dbccccccc/ttsfm" target="_blank" rel="noopener noreferrer" aria-label="{{ _('nav.github') }}">
141
+ <i class="fab fa-github me-1"></i>{{ _('nav.github') }}
142
+ </a>
143
+ </li>
144
+ </ul>
145
+ </div>
146
+ </div>
147
+ </nav>
148
+
149
+ <!-- Main Content -->
150
+ <main id="main-content" style="padding-top: 76px;">
151
+ {% block content %}{% endblock %}
152
+ </main>
153
+
154
+ <!-- Simplified Footer -->
155
+ <footer class="footer py-3" style="background-color: #f9fafb; border-top: 1px solid #e5e7eb;" role="contentinfo">
156
+ <div class="container">
157
+ <div class="row align-items-center">
158
+ <div class="col-md-6">
159
+ <div class="d-flex align-items-center">
160
+ <i class="fas fa-microphone-alt me-2 text-primary"></i>
161
+ <strong class="text-dark">TTSFM</strong>
162
+ <span class="ms-2 text-muted">v3.2.2</span>
163
+ </div>
164
+ </div>
165
+ <div class="col-md-6 text-md-end">
166
+ <small class="text-muted">
167
+ {{ _('home.footer_copyright') }} •
168
+ <a href="{{ url_for('docs') }}" class="text-decoration-none text-muted">{{ _('nav.documentation') }}</a> •
169
+ <a href="https://github.com/dbccccccc/ttsfm" class="text-decoration-none text-muted" target="_blank">{{ _('nav.github') }}</a>
170
+ </small>
171
+ </div>
172
+ </div>
173
+ </div>
174
+ </footer>
175
+
176
+ <!-- Bootstrap JS -->
177
+ <script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/js/bootstrap.bundle.min.js"></script>
178
+
179
+ <!-- Internationalization Support -->
180
+ <script src="{{ url_for('static', filename='js/i18n.js') }}"></script>
181
+
182
+ <!-- Enhanced Common JavaScript -->
183
+ <script>
184
+ // Enhanced service status checking
185
+ async function checkStatus() {
186
+ try {
187
+ const response = await fetch('/api/health');
188
+ const data = await response.json();
189
+
190
+ const indicator = document.getElementById('status-indicator');
191
+ const text = document.getElementById('status-text');
192
+
193
+ if (response.ok && data.status === 'healthy') {
194
+ indicator.className = 'status-indicator status-online';
195
+ text.textContent = '{{ _("nav.status_online") }}';
196
+ } else {
197
+ indicator.className = 'status-indicator status-offline';
198
+ text.textContent = '{{ _("nav.status_offline") }}';
199
+ }
200
+ } catch (error) {
201
+ const indicator = document.getElementById('status-indicator');
202
+ const text = document.getElementById('status-text');
203
+ indicator.className = 'status-indicator status-offline';
204
+ text.textContent = '{{ _("nav.status_offline") }}';
205
+ }
206
+ }
207
+
208
+ // Enhanced page initialization
209
+ document.addEventListener('DOMContentLoaded', function() {
210
+ // Check status immediately and periodically
211
+ checkStatus();
212
+ setInterval(checkStatus, 30000); // Check every 30 seconds
213
+
214
+ // Initialize tooltips
215
+ if (typeof bootstrap !== 'undefined') {
216
+ const tooltipTriggerList = [].slice.call(document.querySelectorAll('[data-bs-toggle="tooltip"]'));
217
+ tooltipTriggerList.map(function (tooltipTriggerEl) {
218
+ return new bootstrap.Tooltip(tooltipTriggerEl);
219
+ });
220
+ }
221
+
222
+ // Add smooth scrolling for anchor links
223
+ document.querySelectorAll('a[href^="#"]').forEach(anchor => {
224
+ anchor.addEventListener('click', function (e) {
225
+ const target = document.querySelector(this.getAttribute('href'));
226
+ if (target) {
227
+ e.preventDefault();
228
+ target.scrollIntoView({
229
+ behavior: 'smooth',
230
+ block: 'start'
231
+ });
232
+ }
233
+ });
234
+ });
235
+
236
+ // Add fade-in animation to main content
237
+ const mainContent = document.querySelector('main');
238
+ if (mainContent) {
239
+ mainContent.classList.add('fade-in');
240
+ }
241
+
242
+ // Add loading states to external links
243
+ document.querySelectorAll('a[target="_blank"]').forEach(link => {
244
+ link.addEventListener('click', function() {
245
+ this.style.opacity = '0.7';
246
+ setTimeout(() => {
247
+ this.style.opacity = '1';
248
+ }, 1000);
249
+ });
250
+ });
251
+ });
252
+
253
+ // Enhanced utility function to show loading state
254
+ function setLoading(button, loading) {
255
+ if (loading) {
256
+ button.classList.add('loading');
257
+ button.disabled = true;
258
+ button.style.cursor = 'wait';
259
+ } else {
260
+ button.classList.remove('loading');
261
+ button.disabled = false;
262
+ button.style.cursor = 'pointer';
263
+ }
264
+ }
265
+
266
+ // Enhanced utility function to show alerts
267
+ function showAlert(message, type = 'info', duration = 5000) {
268
+ const alertDiv = document.createElement('div');
269
+ alertDiv.className = `alert alert-${type} alert-dismissible fade show fade-in`;
270
+ alertDiv.style.position = 'relative';
271
+ alertDiv.style.zIndex = '1050';
272
+ alertDiv.innerHTML = `
273
+ <i class="fas fa-${getAlertIcon(type)} me-2"></i>
274
+ ${message}
275
+ <button type="button" class="btn-close" data-bs-dismiss="alert" aria-label="Close"></button>
276
+ `;
277
+
278
+ // Find the best container to insert the alert
279
+ const container = document.querySelector('main .container') || document.querySelector('.container') || document.body;
280
+ if (container) {
281
+ container.insertBefore(alertDiv, container.firstChild);
282
+
283
+ // Auto-dismiss after specified duration
284
+ setTimeout(() => {
285
+ if (alertDiv.parentNode) {
286
+ alertDiv.classList.remove('show');
287
+ setTimeout(() => {
288
+ if (alertDiv.parentNode) {
289
+ alertDiv.remove();
290
+ }
291
+ }, 150);
292
+ }
293
+ }, duration);
294
+
295
+ // Scroll to alert if it's not visible
296
+ alertDiv.scrollIntoView({ behavior: 'smooth', block: 'nearest' });
297
+ }
298
+ }
299
+
300
+ // Helper function to get appropriate icon for alert type
301
+ function getAlertIcon(type) {
302
+ const icons = {
303
+ 'success': 'check-circle',
304
+ 'danger': 'exclamation-triangle',
305
+ 'warning': 'exclamation-triangle',
306
+ 'info': 'info-circle',
307
+ 'primary': 'info-circle'
308
+ };
309
+ return icons[type] || 'info-circle';
310
+ }
311
+
312
+ // Enhanced error handling for fetch requests
313
+ async function safeFetch(url, options = {}) {
314
+ try {
315
+ const response = await fetch(url, options);
316
+ if (!response.ok) {
317
+ throw new Error(`HTTP ${response.status}: ${response.statusText}`);
318
+ }
319
+ return response;
320
+ } catch (error) {
321
+ console.error('Fetch error:', error);
322
+ showAlert(`Network error: ${error.message}`, 'danger');
323
+ throw error;
324
+ }
325
+ }
326
+
327
+ // Performance monitoring
328
+ window.addEventListener('load', function() {
329
+ // Log page load time
330
+ const loadTime = performance.now();
331
+ console.log(`Page loaded in ${Math.round(loadTime)}ms`);
332
+
333
+ // Check for slow loading resources
334
+ if (loadTime > 3000) {
335
+ console.warn('Page load time is slow. Consider optimizing resources.');
336
+ }
337
+ });
338
+
339
+ // Keyboard shortcuts
340
+ document.addEventListener('keydown', function(e) {
341
+ // Alt + H for home
342
+ if (e.altKey && e.key === 'h') {
343
+ e.preventDefault();
344
+ window.location.href = '{{ url_for("index") }}';
345
+ }
346
+
347
+ // Alt + P for playground
348
+ if (e.altKey && e.key === 'p') {
349
+ e.preventDefault();
350
+ window.location.href = '{{ url_for("playground") }}';
351
+ }
352
+
353
+ // Alt + D for docs
354
+ if (e.altKey && e.key === 'd') {
355
+ e.preventDefault();
356
+ window.location.href = '{{ url_for("docs") }}';
357
+ }
358
+ });
359
+ </script>
360
+
361
+ {% block extra_js %}{% endblock %}
362
+ </body>
363
+ </html>
ttsfm-web/templates/docs.html CHANGED
@@ -1,369 +1,734 @@
1
- {% extends "base.html" %}
2
-
3
- {% block title %}TTSFM API Documentation{% endblock %}
4
-
5
- {% block extra_css %}
6
- <style>
7
- .code-block {
8
- background-color: #f8f9fa;
9
- border: 1px solid #e9ecef;
10
- border-radius: 0.375rem;
11
- padding: 1rem;
12
- margin: 1rem 0;
13
- overflow-x: auto;
14
- }
15
-
16
- .endpoint-card {
17
- border-left: 4px solid #007bff;
18
- margin-bottom: 2rem;
19
- }
20
-
21
- .method-badge {
22
- font-size: 0.75rem;
23
- padding: 0.25rem 0.5rem;
24
- border-radius: 0.25rem;
25
- font-weight: bold;
26
- margin-right: 0.5rem;
27
- }
28
-
29
- .method-get { background-color: #28a745; color: white; }
30
- .method-post { background-color: #007bff; color: white; }
31
- .method-put { background-color: #ffc107; color: black; }
32
- .method-delete { background-color: #dc3545; color: white; }
33
-
34
- .response-example {
35
- background-color: #f1f3f4;
36
- border-radius: 0.375rem;
37
- padding: 1rem;
38
- margin-top: 1rem;
39
- }
40
-
41
- .toc {
42
- position: sticky;
43
- top: 2rem;
44
- max-height: calc(100vh - 4rem);
45
- overflow-y: auto;
46
- }
47
-
48
- .toc a {
49
- color: #6c757d;
50
- text-decoration: none;
51
- display: block;
52
- padding: 0.25rem 0;
53
- border-left: 2px solid transparent;
54
- padding-left: 1rem;
55
- }
56
-
57
- .toc a:hover, .toc a.active {
58
- color: #007bff;
59
- border-left-color: #007bff;
60
- }
61
- </style>
62
- {% endblock %}
63
-
64
- {% block content %}
65
- <div class="container py-5">
66
- <div class="row">
67
- <div class="col-12 text-center mb-5">
68
- <h1 class="display-4 fw-bold">
69
- <i class="fas fa-book me-3"></i>API Documentation
70
- </h1>
71
- <p class="lead text-muted">
72
- Complete reference for the TTSFM Text-to-Speech API
73
- </p>
74
- </div>
75
- </div>
76
-
77
- <div class="row">
78
- <!-- Table of Contents -->
79
- <div class="col-lg-3">
80
- <div class="toc">
81
- <h5 class="fw-bold mb-3">Contents</h5>
82
- <a href="#overview">Overview</a>
83
- <a href="#authentication">Authentication</a>
84
- <a href="#text-validation">Text Validation</a>
85
- <a href="#endpoints">API Endpoints</a>
86
- <a href="#voices">Voices</a>
87
- <a href="#formats">Audio Formats</a>
88
- <a href="#generate">Generate Speech</a>
89
- <a href="#batch">Batch Processing</a>
90
- <a href="#status">Status & Health</a>
91
- <a href="#errors">Error Handling</a>
92
- <a href="#examples">Code Examples</a>
93
- <a href="#python-package">Python Package</a>
94
- </div>
95
- </div>
96
-
97
- <!-- Documentation Content -->
98
- <div class="col-lg-9">
99
- <!-- Overview -->
100
- <section id="overview" class="mb-5">
101
- <h2 class="fw-bold mb-3">Overview</h2>
102
- <p>
103
- The TTSFM API provides a modern, OpenAI-compatible interface for text-to-speech generation.
104
- It supports multiple voices, audio formats, and includes advanced features like text length
105
- validation and batch processing.
106
- </p>
107
-
108
- <div class="alert alert-info">
109
- <i class="fas fa-info-circle me-2"></i>
110
- <strong>Base URL:</strong> <code>{{ request.url_root }}api/</code>
111
- </div>
112
-
113
- <h4>Key Features</h4>
114
- <ul>
115
- <li>11 different voice options</li>
116
- <li>Multiple audio formats (MP3, WAV, OPUS, etc.)</li>
117
- <li>Text length validation (4096 character limit)</li>
118
- <li>Automatic text splitting for long content</li>
119
- <li>Batch processing capabilities</li>
120
- <li>Real-time status monitoring</li>
121
- </ul>
122
- </section>
123
-
124
- <!-- Authentication -->
125
- <section id="authentication" class="mb-5">
126
- <h2 class="fw-bold mb-3">Authentication</h2>
127
- <p>
128
- Currently, the API supports optional API key authentication. If configured,
129
- include your API key in the request headers.
130
- </p>
131
-
132
- <div class="code-block">
133
- <pre><code>Authorization: Bearer YOUR_API_KEY</code></pre>
134
- </div>
135
- </section>
136
-
137
- <!-- Text Validation -->
138
- <section id="text-validation" class="mb-5">
139
- <h2 class="fw-bold mb-3">Text Length Validation</h2>
140
- <p>
141
- TTSFM includes built-in text length validation to ensure compatibility with TTS models.
142
- The default maximum length is 4096 characters, but this can be customized.
143
- </p>
144
-
145
- <div class="alert alert-warning">
146
- <i class="fas fa-exclamation-triangle me-2"></i>
147
- <strong>Important:</strong> Text exceeding the maximum length will be rejected unless
148
- validation is disabled or the text is split into chunks.
149
- </div>
150
-
151
- <h4>Validation Options</h4>
152
- <ul>
153
- <li><code>max_length</code>: Maximum allowed characters (default: 4096)</li>
154
- <li><code>validate_length</code>: Enable/disable validation (default: true)</li>
155
- <li><code>preserve_words</code>: Avoid splitting words when chunking (default: true)</li>
156
- </ul>
157
- </section>
158
-
159
- <!-- API Endpoints -->
160
- <section id="endpoints" class="mb-5">
161
- <h2 class="fw-bold mb-3">API Endpoints</h2>
162
-
163
- <!-- Voices Endpoint -->
164
- <div class="card endpoint-card" id="voices">
165
- <div class="card-body">
166
- <h4 class="card-title">
167
- <span class="method-badge method-get">GET</span>
168
- /api/voices
169
- </h4>
170
- <p class="card-text">Get list of available voices.</p>
171
-
172
- <h6>Response Example:</h6>
173
- <div class="response-example">
174
- <pre><code>{
175
- "voices": [
176
- {
177
- "id": "alloy",
178
- "name": "Alloy",
179
- "description": "Alloy voice"
180
- },
181
- {
182
- "id": "echo",
183
- "name": "Echo",
184
- "description": "Echo voice"
185
- }
186
- ],
187
- "count": 6
188
- }</code></pre>
189
- </div>
190
- </div>
191
- </div>
192
-
193
- <!-- Formats Endpoint -->
194
- <div class="card endpoint-card" id="formats">
195
- <div class="card-body">
196
- <h4 class="card-title">
197
- <span class="method-badge method-get">GET</span>
198
- /api/formats
199
- </h4>
200
- <p class="card-text">Get list of supported audio formats.</p>
201
-
202
- <h6>Response Example:</h6>
203
- <div class="response-example">
204
- <pre><code>{
205
- "formats": [
206
- {
207
- "id": "mp3",
208
- "name": "MP3",
209
- "mime_type": "audio/mp3",
210
- "description": "MP3 audio format"
211
- }
212
- ],
213
- "count": 6
214
- }</code></pre>
215
- </div>
216
- </div>
217
- </div>
218
-
219
- <!-- Text Validation Endpoint -->
220
- <div class="card endpoint-card">
221
- <div class="card-body">
222
- <h4 class="card-title">
223
- <span class="method-badge method-post">POST</span>
224
- /api/validate-text
225
- </h4>
226
- <p class="card-text">Validate text length and get splitting suggestions.</p>
227
-
228
- <h6>Request Body:</h6>
229
- <div class="code-block">
230
- <pre><code>{
231
- "text": "Your text to validate",
232
- "max_length": 4096
233
- }</code></pre>
234
- </div>
235
-
236
- <h6>Response Example:</h6>
237
- <div class="response-example">
238
- <pre><code>{
239
- "text_length": 5000,
240
- "max_length": 4096,
241
- "is_valid": false,
242
- "needs_splitting": true,
243
- "suggested_chunks": 2,
244
- "chunk_preview": [
245
- "First chunk preview...",
246
- "Second chunk preview..."
247
- ]
248
- }</code></pre>
249
- </div>
250
- </div>
251
- </div>
252
-
253
- <!-- Generate Speech Endpoint -->
254
- <div class="card endpoint-card" id="generate">
255
- <div class="card-body">
256
- <h4 class="card-title">
257
- <span class="method-badge method-post">POST</span>
258
- /api/generate
259
- </h4>
260
- <p class="card-text">Generate speech from text.</p>
261
-
262
- <h6>Request Body:</h6>
263
- <div class="code-block">
264
- <pre><code>{
265
- "text": "Hello, world!",
266
- "voice": "alloy",
267
- "format": "mp3",
268
- "instructions": "Speak cheerfully",
269
- "max_length": 4096,
270
- "validate_length": true
271
- }</code></pre>
272
- </div>
273
-
274
- <h6>Parameters:</h6>
275
- <ul>
276
- <li><code>text</code> (required): Text to convert to speech</li>
277
- <li><code>voice</code> (optional): Voice ID (default: "alloy")</li>
278
- <li><code>format</code> (optional): Audio format (default: "mp3")</li>
279
- <li><code>instructions</code> (optional): Voice modulation instructions</li>
280
- <li><code>max_length</code> (optional): Maximum text length (default: 4096)</li>
281
- <li><code>validate_length</code> (optional): Enable validation (default: true)</li>
282
- </ul>
283
-
284
- <h6>Response:</h6>
285
- <p>Returns audio file with appropriate Content-Type header.</p>
286
- </div>
287
- </div>
288
-
289
- <!-- Batch Processing Endpoint -->
290
- <div class="card endpoint-card" id="batch">
291
- <div class="card-body">
292
- <h4 class="card-title">
293
- <span class="method-badge method-post">POST</span>
294
- /api/generate-batch
295
- </h4>
296
- <p class="card-text">Generate speech from long text by automatically splitting into chunks.</p>
297
-
298
- <h6>Request Body:</h6>
299
- <div class="code-block">
300
- <pre><code>{
301
- "text": "Very long text that exceeds the limit...",
302
- "voice": "alloy",
303
- "format": "mp3",
304
- "max_length": 4096,
305
- "preserve_words": true
306
- }</code></pre>
307
- </div>
308
-
309
- <h6>Response Example:</h6>
310
- <div class="response-example">
311
- <pre><code>{
312
- "total_chunks": 3,
313
- "successful_chunks": 3,
314
- "results": [
315
- {
316
- "chunk_index": 1,
317
- "chunk_text": "First chunk text...",
318
- "audio_data": "base64_encoded_audio",
319
- "content_type": "audio/mp3",
320
- "size": 12345,
321
- "format": "mp3"
322
- }
323
- ]
324
- }</code></pre>
325
- </div>
326
- </div>
327
- </div>
328
- </section>
329
- </div>
330
- </div>
331
- </div>
332
- {% endblock %}
333
-
334
- {% block extra_js %}
335
- <script>
336
- // Smooth scrolling for TOC links
337
- document.querySelectorAll('.toc a').forEach(link => {
338
- link.addEventListener('click', function(e) {
339
- e.preventDefault();
340
- const target = document.querySelector(this.getAttribute('href'));
341
- if (target) {
342
- target.scrollIntoView({ behavior: 'smooth' });
343
-
344
- // Update active link
345
- document.querySelectorAll('.toc a').forEach(l => l.classList.remove('active'));
346
- this.classList.add('active');
347
- }
348
- });
349
- });
350
-
351
- // Highlight current section in TOC
352
- window.addEventListener('scroll', function() {
353
- const sections = document.querySelectorAll('section[id]');
354
- const scrollPos = window.scrollY + 100;
355
-
356
- sections.forEach(section => {
357
- const top = section.offsetTop;
358
- const bottom = top + section.offsetHeight;
359
- const id = section.getAttribute('id');
360
- const link = document.querySelector(`.toc a[href="#${id}"]`);
361
-
362
- if (scrollPos >= top && scrollPos < bottom) {
363
- document.querySelectorAll('.toc a').forEach(l => l.classList.remove('active'));
364
- if (link) link.classList.add('active');
365
- }
366
- });
367
- });
368
- </script>
369
- {% endblock %}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {% extends "base.html" %}
2
+
3
+ {% block title %}TTSFM {{ _('docs.title') }}{% endblock %}
4
+
5
+ {% block extra_css %}
6
+ <style>
7
+ .code-block {
8
+ background-color: #f8f9fa;
9
+ border: 1px solid #e9ecef;
10
+ border-radius: 0.375rem;
11
+ padding: 1rem;
12
+ margin: 1rem 0;
13
+ overflow-x: auto;
14
+ }
15
+
16
+ .endpoint-card {
17
+ border-left: 4px solid #007bff;
18
+ margin-bottom: 2rem;
19
+ }
20
+
21
+ .method-badge {
22
+ font-size: 0.75rem;
23
+ padding: 0.25rem 0.5rem;
24
+ border-radius: 0.25rem;
25
+ font-weight: bold;
26
+ margin-right: 0.5rem;
27
+ }
28
+
29
+ .method-get { background-color: #28a745; color: white; }
30
+ .method-post { background-color: #007bff; color: white; }
31
+ .method-put { background-color: #ffc107; color: black; }
32
+ .method-delete { background-color: #dc3545; color: white; }
33
+
34
+ .response-example {
35
+ background-color: #f1f3f4;
36
+ border-radius: 0.375rem;
37
+ padding: 1rem;
38
+ margin-top: 1rem;
39
+ }
40
+
41
+ .toc {
42
+ position: sticky;
43
+ top: 2rem;
44
+ max-height: calc(100vh - 4rem);
45
+ overflow-y: auto;
46
+ }
47
+
48
+ .toc a {
49
+ color: #6c757d;
50
+ text-decoration: none;
51
+ display: block;
52
+ padding: 0.25rem 0;
53
+ border-left: 2px solid transparent;
54
+ padding-left: 1rem;
55
+ }
56
+
57
+ .toc a:hover, .toc a.active {
58
+ color: #007bff;
59
+ border-left-color: #007bff;
60
+ }
61
+ </style>
62
+ {% endblock %}
63
+
64
+ {% block content %}
65
+ <div class="container py-5">
66
+ <div class="row">
67
+ <div class="col-12 text-center mb-5">
68
+ <h1 class="display-4 fw-bold">
69
+ <i class="fas fa-book me-3 text-primary"></i>{{ _('docs.title') }}
70
+ </h1>
71
+ <p class="lead text-muted">
72
+ {{ _('docs.subtitle') }}
73
+ </p>
74
+ </div>
75
+ </div>
76
+
77
+ <div class="row">
78
+ <!-- Table of Contents -->
79
+ <div class="col-lg-3">
80
+ <div class="toc">
81
+ <h5 class="fw-bold mb-3">{{ _('docs.contents') }}</h5>
82
+ <a href="#overview">{{ _('docs.overview') }}</a>
83
+ <a href="#authentication">{{ _('docs.authentication') }}</a>
84
+ <a href="#text-validation">{{ _('docs.text_validation') }}</a>
85
+ <a href="#endpoints">{{ _('docs.endpoints') }}</a>
86
+ <a href="#voices">{{ _('docs.voices') }}</a>
87
+ <a href="#formats">{{ _('docs.formats') }}</a>
88
+ <a href="#generate">{{ _('docs.generate') }}</a>
89
+ <a href="#combined">{{ _('docs.combined') }}</a>
90
+ <a href="#status">{{ _('docs.status') }}</a>
91
+ <a href="#errors">{{ _('docs.errors') }}</a>
92
+ <a href="#examples">{{ _('docs.examples') }}</a>
93
+ <a href="#python-package">{{ _('docs.python_package') }}</a>
94
+ <a href="#websocket">WebSocket Streaming</a>
95
+ </div>
96
+ </div>
97
+
98
+ <!-- Documentation Content -->
99
+ <div class="col-lg-9">
100
+ <!-- Overview -->
101
+ <section id="overview" class="mb-5">
102
+ <h2 class="fw-bold mb-3">{{ _('docs.overview_title') }}</h2>
103
+ <p>
104
+ {{ _('docs.overview_desc') }}
105
+ </p>
106
+
107
+ <div class="alert alert-info">
108
+ <i class="fas fa-info-circle me-2"></i>
109
+ <strong>{{ _('docs.base_url') }}</strong> <code>{{ request.url_root }}api/</code>
110
+ </div>
111
+
112
+ <h4>{{ _('docs.key_features') }}</h4>
113
+ <ul>
114
+ <li><strong>🎤 {{ _('docs.feature_voices') }}</strong></li>
115
+ <li><strong>🎵 {{ _('docs.feature_formats') }}</strong></li>
116
+ <li><strong>🤖 {{ _('docs.feature_openai') }}</strong></li>
117
+ <li><strong>✨ {{ _('docs.feature_auto_combine') }}</strong></li>
118
+ <li><strong>📊 {{ _('docs.feature_validation') }}</strong></li>
119
+ <li><strong>📈 {{ _('docs.feature_monitoring') }}</strong></li>
120
+ </ul>
121
+
122
+ <div class="alert alert-success">
123
+ <i class="fas fa-star me-2"></i>
124
+ <strong>{{ _('docs.new_version') }}</strong> {{ _('docs.new_version_desc') }}
125
+ </div>
126
+ </section>
127
+
128
+ <!-- Authentication -->
129
+ <section id="authentication" class="mb-5">
130
+ <h2 class="fw-bold mb-3">{{ _('docs.authentication_title') }}</h2>
131
+ <p>
132
+ {{ _('docs.authentication_desc') }}
133
+ </p>
134
+
135
+ <div class="code-block">
136
+ <pre><code>Authorization: Bearer YOUR_API_KEY</code></pre>
137
+ </div>
138
+ </section>
139
+
140
+ <!-- Text Validation -->
141
+ <section id="text-validation" class="mb-5">
142
+ <h2 class="fw-bold mb-3">{{ _('docs.text_validation_title') }}</h2>
143
+ <p>
144
+ {{ _('docs.text_validation_desc') }}
145
+ </p>
146
+
147
+ <div class="alert alert-warning">
148
+ <i class="fas fa-exclamation-triangle me-2"></i>
149
+ <strong>{{ _('docs.important') }}</strong> {{ _('docs.text_validation_warning') }}
150
+ </div>
151
+
152
+ <h4>{{ _('docs.validation_options') }}</h4>
153
+ <ul>
154
+ <li><code>max_length</code>: {{ _('docs.max_length_option') }}</li>
155
+ <li><code>validate_length</code>: {{ _('docs.validate_length_option') }}</li>
156
+ <li><code>preserve_words</code>: {{ _('docs.preserve_words_option') }}</li>
157
+ </ul>
158
+ </section>
159
+
160
+ <!-- API Endpoints -->
161
+ <section id="endpoints" class="mb-5">
162
+ <h2 class="fw-bold mb-3">{{ _('docs.endpoints_title') }}</h2>
163
+
164
+ <!-- Voices Endpoint -->
165
+ <div class="card endpoint-card" id="voices">
166
+ <div class="card-body">
167
+ <h4 class="card-title">
168
+ <span class="method-badge method-get">GET</span>
169
+ /api/voices
170
+ </h4>
171
+ <p class="card-text">{{ _('docs.get_voices_desc') }}</p>
172
+
173
+ <h6>{{ _('docs.response_example') }}</h6>
174
+ <div class="response-example">
175
+ <pre><code>{
176
+ "voices": [
177
+ {
178
+ "id": "alloy",
179
+ "name": "Alloy",
180
+ "description": "Alloy voice"
181
+ },
182
+ {
183
+ "id": "echo",
184
+ "name": "Echo",
185
+ "description": "Echo voice"
186
+ }
187
+ ],
188
+ "count": 6
189
+ }</code></pre>
190
+ </div>
191
+ </div>
192
+ </div>
193
+
194
+ <!-- Formats Endpoint -->
195
+ <div class="card endpoint-card" id="formats">
196
+ <div class="card-body">
197
+ <h4 class="card-title">
198
+ <span class="method-badge method-get">GET</span>
199
+ /api/formats
200
+ </h4>
201
+ <p class="card-text">Get available audio formats for speech generation.</p>
202
+
203
+ <h6>Available Formats</h6>
204
+ <p>We support multiple format requests, but internally:</p>
205
+ <ul>
206
+ <li><strong>mp3</strong> - Returns actual MP3 format</li>
207
+ <li><strong>All other formats</strong> (opus, aac, flac, wav, pcm) - Mapped to WAV format</li>
208
+ </ul>
209
+
210
+ <div class="alert alert-info">
211
+ <i class="fas fa-info-circle me-2"></i>
212
+ <strong>Note:</strong> When you request opus, aac, flac, wav, or pcm, you'll receive WAV audio data.
213
+ </div>
214
+
215
+ <h6>{{ _('docs.response_example') }}</h6>
216
+ <div class="response-example">
217
+ <pre><code>{
218
+ "formats": [
219
+ {
220
+ "id": "mp3",
221
+ "name": "MP3",
222
+ "mime_type": "audio/mp3",
223
+ "description": "MP3 audio format"
224
+ },
225
+ {
226
+ "id": "opus",
227
+ "name": "Opus",
228
+ "mime_type": "audio/wav",
229
+ "description": "Returns WAV format"
230
+ },
231
+ {
232
+ "id": "aac",
233
+ "name": "AAC",
234
+ "mime_type": "audio/wav",
235
+ "description": "Returns WAV format"
236
+ },
237
+ {
238
+ "id": "flac",
239
+ "name": "FLAC",
240
+ "mime_type": "audio/wav",
241
+ "description": "Returns WAV format"
242
+ },
243
+ {
244
+ "id": "wav",
245
+ "name": "WAV",
246
+ "mime_type": "audio/wav",
247
+ "description": "WAV audio format"
248
+ },
249
+ {
250
+ "id": "pcm",
251
+ "name": "PCM",
252
+ "mime_type": "audio/wav",
253
+ "description": "Returns WAV format"
254
+ }
255
+ ],
256
+ "count": 6
257
+ }</code></pre>
258
+ </div>
259
+ </div>
260
+ </div>
261
+
262
+ <!-- Text Validation Endpoint -->
263
+ <div class="card endpoint-card">
264
+ <div class="card-body">
265
+ <h4 class="card-title">
266
+ <span class="method-badge method-post">POST</span>
267
+ /api/validate-text
268
+ </h4>
269
+ <p class="card-text">{{ _('docs.validate_text_desc') }}</p>
270
+
271
+ <h6>{{ _('docs.request_body') }}</h6>
272
+ <div class="code-block">
273
+ <pre><code>{
274
+ "text": "Your text to validate",
275
+ "max_length": 4096
276
+ }</code></pre>
277
+ </div>
278
+
279
+ <h6>{{ _('docs.response_example') }}</h6>
280
+ <div class="response-example">
281
+ <pre><code>{
282
+ "text_length": 5000,
283
+ "max_length": 4096,
284
+ "is_valid": false,
285
+ "needs_splitting": true,
286
+ "suggested_chunks": 2,
287
+ "chunk_preview": [
288
+ "First chunk preview...",
289
+ "Second chunk preview..."
290
+ ]
291
+ }</code></pre>
292
+ </div>
293
+ </div>
294
+ </div>
295
+
296
+ <!-- Generate Speech Endpoint -->
297
+ <div class="card endpoint-card" id="generate">
298
+ <div class="card-body">
299
+ <h4 class="card-title">
300
+ <span class="method-badge method-post">POST</span>
301
+ /api/generate
302
+ </h4>
303
+ <p class="card-text">{{ _('docs.generate_speech_desc') }}</p>
304
+
305
+ <h6>{{ _('docs.request_body') }}</h6>
306
+ <div class="code-block">
307
+ <pre><code>{
308
+ "text": "Hello, world!",
309
+ "voice": "alloy",
310
+ "format": "mp3",
311
+ "instructions": "Speak cheerfully",
312
+ "max_length": 4096,
313
+ "validate_length": true
314
+ }</code></pre>
315
+ </div>
316
+
317
+ <h6>{{ _('docs.parameters') }}</h6>
318
+ <ul>
319
+ <li><code>text</code> ({{ _('docs.required') }}): {{ _('docs.text_param') }}</li>
320
+ <li><code>voice</code> ({{ _('docs.optional') }}): {{ _('docs.voice_param') }}</li>
321
+ <li><code>format</code> ({{ _('docs.optional') }}): {{ _('docs.format_param') }}</li>
322
+ <li><code>instructions</code> ({{ _('docs.optional') }}): {{ _('docs.instructions_param') }}</li>
323
+ <li><code>max_length</code> ({{ _('docs.optional') }}): {{ _('docs.max_length_param') }}</li>
324
+ <li><code>validate_length</code> ({{ _('docs.optional') }}): {{ _('docs.validate_length_param') }}</li>
325
+ </ul>
326
+
327
+ <h6>{{ _('docs.response') }}</h6>
328
+ <p>{{ _('docs.response_audio') }}</p>
329
+ </div>
330
+ </div>
331
+
332
+ </section>
333
+
334
+ <!-- Python Package -->
335
+ <section id="python-package" class="mb-5">
336
+ <h3 class="fw-bold mb-4">
337
+ <i class="fab fa-python me-2 text-warning"></i>{{ _('docs.python_package_title') }}
338
+ </h3>
339
+
340
+ <div class="card">
341
+ <div class="card-body">
342
+ <h5>{{ _('docs.long_text_support') }}</h5>
343
+ <p>{{ _('docs.long_text_desc') }}</p>
344
+
345
+ <div class="code-block">
346
+ <pre><code>from ttsfm import TTSClient, Voice, AudioFormat
347
+
348
+ # Create client
349
+ client = TTSClient()
350
+
351
+ # Generate speech from long text (automatically splits into separate files)
352
+ responses = client.generate_speech_long_text(
353
+ text="Very long text that exceeds 4096 characters...",
354
+ voice=Voice.ALLOY,
355
+ response_format=AudioFormat.MP3,
356
+ max_length=2000,
357
+ preserve_words=True
358
+ )
359
+
360
+ # Save each chunk as separate files
361
+ for i, response in enumerate(responses, 1):
362
+ response.save_to_file(f"part_{i:03d}.mp3")</code></pre>
363
+ </div>
364
+
365
+ <h6 class="mt-4">{{ _('docs.developer_features') }}</h6>
366
+ <ul>
367
+ <li><strong>{{ _('docs.manual_splitting') }}</strong></li>
368
+ <li><strong>{{ _('docs.word_preservation') }}</strong></li>
369
+ <li><strong>{{ _('docs.separate_files') }}</strong></li>
370
+ <li><strong>{{ _('docs.cli_support') }}</strong></li>
371
+ </ul>
372
+
373
+ <div class="alert alert-info">
374
+ <i class="fas fa-info-circle me-2"></i>
375
+ <strong>{{ _('docs.note') }}</strong> {{ _('docs.auto_combine_note') }}
376
+ </div>
377
+ </div>
378
+ </div>
379
+
380
+ <!-- Combined Audio Endpoints -->
381
+ <div class="card endpoint-card" id="combined">
382
+ <div class="card-body">
383
+ <h4 class="card-title">
384
+ <span class="method-badge method-post">POST</span>
385
+ /api/generate-combined
386
+ </h4>
387
+ <p class="card-text">{{ _('docs.combined_audio_desc') }}</p>
388
+
389
+ <h6>{{ _('docs.request_body') }}</h6>
390
+ <div class="code-block">
391
+ <pre><code>{
392
+ "text": "Very long text that exceeds the limit...",
393
+ "voice": "alloy",
394
+ "format": "mp3",
395
+ "instructions": "Optional voice instructions",
396
+ "max_length": 4096,
397
+ "preserve_words": true
398
+ }</code></pre>
399
+ </div>
400
+
401
+ <h6>{{ _('docs.response') }}</h6>
402
+ <p>{{ _('docs.response_combined_audio') }}</p>
403
+
404
+ <h6>{{ _('docs.response_headers') }}</h6>
405
+ <ul>
406
+ <li><code>X-Chunks-Combined</code>: {{ _('docs.chunks_combined_header') }}</li>
407
+ <li><code>X-Original-Text-Length</code>: {{ _('docs.original_text_length_header') }}</li>
408
+ <li><code>X-Audio-Size</code>: {{ _('docs.audio_size_header') }}</li>
409
+ </ul>
410
+ </div>
411
+ </div>
412
+
413
+ <!-- OpenAI Compatible Endpoint with Auto-Combine -->
414
+ <div class="card endpoint-card">
415
+ <div class="card-body">
416
+ <h4 class="card-title">
417
+ <span class="method-badge method-post">POST</span>
418
+ /v1/audio/speech
419
+ </h4>
420
+ <p class="card-text">{{ _('docs.openai_compatible_desc') }}</p>
421
+
422
+ <h6>{{ _('docs.request_body') }}</h6>
423
+ <div class="code-block">
424
+ <pre><code>{
425
+ "model": "gpt-4o-mini-tts",
426
+ "input": "Text of any length...",
427
+ "voice": "alloy",
428
+ "response_format": "mp3",
429
+ "instructions": "Optional voice instructions",
430
+ "speed": 1.0,
431
+ "auto_combine": true,
432
+ "max_length": 4096
433
+ }</code></pre>
434
+ </div>
435
+
436
+ <h6>{{ _('docs.enhanced_parameters') }}</h6>
437
+ <ul>
438
+ <li><strong>auto_combine</strong> (boolean, default: true):
439
+ <ul>
440
+ <li><code>true</code>: {{ _('docs.auto_combine_param') }}</li>
441
+ <li><code>false</code>: {{ _('docs.auto_combine_false') }}</li>
442
+ </ul>
443
+ </li>
444
+ <li><strong>max_length</strong> (integer, default: 4096): {{ _('docs.max_length_chunk_param') }}</li>
445
+ </ul>
446
+
447
+ <h6>{{ _('docs.response_headers') }}</h6>
448
+ <ul>
449
+ <li><code>X-Auto-Combine</code>: {{ _('docs.auto_combine_header') }}</li>
450
+ <li><code>X-Chunks-Combined</code>: {{ _('docs.chunks_combined_response') }}</li>
451
+ <li><code>X-Original-Text-Length</code>: {{ _('docs.original_text_response') }}</li>
452
+ <li><code>X-Audio-Format</code>: {{ _('docs.audio_format_header') }}</li>
453
+ <li><code>X-Audio-Size</code>: {{ _('docs.audio_size_response') }}</li>
454
+ </ul>
455
+
456
+ <h6>{{ _('docs.examples_title') }}</h6>
457
+ <div class="code-block">
458
+ <pre><code># {{ _('docs.short_text_comment') }}
459
+ curl -X POST {{ request.url_root }}v1/audio/speech \
460
+ -H "Content-Type: application/json" \
461
+ -d '{
462
+ "model": "gpt-4o-mini-tts",
463
+ "input": "Hello world!",
464
+ "voice": "alloy"
465
+ }'
466
+
467
+ # {{ _('docs.long_text_auto_comment') }}
468
+ curl -X POST {{ request.url_root }}v1/audio/speech \
469
+ -H "Content-Type: application/json" \
470
+ -d '{
471
+ "model": "gpt-4o-mini-tts",
472
+ "input": "Very long text...",
473
+ "voice": "alloy",
474
+ "auto_combine": true
475
+ }'
476
+
477
+ # {{ _('docs.long_text_no_auto_comment') }}
478
+ curl -X POST {{ request.url_root }}v1/audio/speech \
479
+ -H "Content-Type: application/json" \
480
+ -d '{
481
+ "model": "gpt-4o-mini-tts",
482
+ "input": "Very long text...",
483
+ "voice": "alloy",
484
+ "auto_combine": false
485
+ }'</code></pre>
486
+ </div>
487
+
488
+ <div class="alert alert-info mt-3">
489
+ <i class="fas fa-info-circle me-2"></i>
490
+ <strong>{{ _('docs.audio_combination') }}</strong> {{ _('docs.audio_combination_desc') }}
491
+ </div>
492
+
493
+ <h6 class="mt-4">{{ _('docs.use_cases') }}</h6>
494
+ <ul>
495
+ <li><strong>{{ _('docs.use_case_articles') }}</strong></li>
496
+ <li><strong>{{ _('docs.use_case_audiobooks') }}</strong></li>
497
+ <li><strong>{{ _('docs.use_case_podcasts') }}</strong></li>
498
+ <li><strong>{{ _('docs.use_case_education') }}</strong></li>
499
+ </ul>
500
+
501
+ <h6 class="mt-4">{{ _('docs.example_usage') }}</h6>
502
+ <div class="code-block">
503
+ <pre><code># {{ _('docs.python_example_comment') }}
504
+ import requests
505
+
506
+ response = requests.post(
507
+ "{{ request.url_root }}api/generate-combined",
508
+ json={
509
+ "text": "Your very long text content here...",
510
+ "voice": "nova",
511
+ "format": "mp3",
512
+ "max_length": 2000
513
+ }
514
+ )
515
+
516
+ if response.status_code == 200:
517
+ with open("combined_audio.mp3", "wb") as f:
518
+ f.write(response.content)
519
+
520
+ chunks = response.headers.get('X-Chunks-Combined')
521
+ print(f"Combined {chunks} chunks into single file")</code></pre>
522
+ </div>
523
+ </div>
524
+ </div>
525
+ </section>
526
+
527
+ <!-- WebSocket Streaming -->
528
+ <section id="websocket" class="mb-5">
529
+ <h2 class="mb-4">
530
+ <i class="fas fa-bolt text-warning me-2"></i>WebSocket Streaming
531
+ </h2>
532
+ <p class="lead">
533
+ Real-time audio streaming for enhanced user experience. Get audio chunks as they're generated instead of waiting for the complete file.
534
+ </p>
535
+
536
+ <div class="alert alert-info">
537
+ <i class="fas fa-info-circle me-2"></i>
538
+ WebSocket streaming provides lower perceived latency and real-time progress tracking for TTS generation.
539
+ </div>
540
+
541
+ <h3 class="mt-4">Connection</h3>
542
+ <div class="code-block">
543
+ <pre><code>// JavaScript WebSocket client
544
+ const client = new WebSocketTTSClient({
545
+ socketUrl: '{{ request.url_root[:-1] }}',
546
+ debug: true
547
+ });
548
+
549
+ // Connection events
550
+ client.onConnect = () => console.log('Connected');
551
+ client.onDisconnect = () => console.log('Disconnected');</code></pre>
552
+ </div>
553
+
554
+ <h3 class="mt-4">Streaming TTS Generation</h3>
555
+ <div class="code-block">
556
+ <pre><code>// Generate speech with real-time streaming
557
+ const result = await client.generateSpeech('Hello, WebSocket world!', {
558
+ voice: 'alloy',
559
+ format: 'mp3',
560
+ chunkSize: 1024, // Characters per chunk
561
+
562
+ // Progress callback
563
+ onProgress: (progress) => {
564
+ console.log(`Progress: ${progress.progress}%`);
565
+ console.log(`Chunks: ${progress.chunksCompleted}/${progress.totalChunks}`);
566
+ },
567
+
568
+ // Receive audio chunks in real-time
569
+ onChunk: (chunk) => {
570
+ console.log(`Received chunk ${chunk.chunkIndex + 1}`);
571
+ // Process or play audio chunk immediately
572
+ processAudioChunk(chunk.audioData);
573
+ },
574
+
575
+ // Completion callback
576
+ onComplete: (result) => {
577
+ console.log('Streaming complete!');
578
+ // result.audioData contains the complete audio
579
+ }
580
+ });</code></pre>
581
+ </div>
582
+
583
+ <h3 class="mt-4">WebSocket Events</h3>
584
+ <div class="endpoint-card card">
585
+ <div class="card-body">
586
+ <h5>Client → Server Events</h5>
587
+ <table class="table table-sm">
588
+ <thead>
589
+ <tr>
590
+ <th>Event</th>
591
+ <th>Description</th>
592
+ <th>Payload</th>
593
+ </tr>
594
+ </thead>
595
+ <tbody>
596
+ <tr>
597
+ <td><code>generate_stream</code></td>
598
+ <td>Start TTS generation</td>
599
+ <td><code>{text, voice, format, chunk_size}</code></td>
600
+ </tr>
601
+ <tr>
602
+ <td><code>cancel_stream</code></td>
603
+ <td>Cancel active stream</td>
604
+ <td><code>{request_id}</code></td>
605
+ </tr>
606
+ </tbody>
607
+ </table>
608
+
609
+ <h5 class="mt-4">Server → Client Events</h5>
610
+ <table class="table table-sm">
611
+ <thead>
612
+ <tr>
613
+ <th>Event</th>
614
+ <th>Description</th>
615
+ <th>Payload</th>
616
+ </tr>
617
+ </thead>
618
+ <tbody>
619
+ <tr>
620
+ <td><code>stream_started</code></td>
621
+ <td>Stream initiated</td>
622
+ <td><code>{request_id, timestamp}</code></td>
623
+ </tr>
624
+ <tr>
625
+ <td><code>audio_chunk</code></td>
626
+ <td>Audio chunk ready</td>
627
+ <td><code>{request_id, chunk_index, audio_data, duration}</code></td>
628
+ </tr>
629
+ <tr>
630
+ <td><code>stream_progress</code></td>
631
+ <td>Progress update</td>
632
+ <td><code>{progress, chunks_completed, total_chunks}</code></td>
633
+ </tr>
634
+ <tr>
635
+ <td><code>stream_complete</code></td>
636
+ <td>Generation complete</td>
637
+ <td><code>{request_id, total_chunks, status}</code></td>
638
+ </tr>
639
+ <tr>
640
+ <td><code>stream_error</code></td>
641
+ <td>Error occurred</td>
642
+ <td><code>{request_id, error, timestamp}</code></td>
643
+ </tr>
644
+ </tbody>
645
+ </table>
646
+ </div>
647
+ </div>
648
+
649
+ <h3 class="mt-4">Benefits</h3>
650
+ <ul>
651
+ <li><strong>Real-time feedback:</strong> Users see progress as audio generates</li>
652
+ <li><strong>Lower latency:</strong> First audio chunk arrives quickly</li>
653
+ <li><strong>Cancellable:</strong> Stop generation mid-stream if needed</li>
654
+ <li><strong>Efficient:</strong> Process chunks as they arrive</li>
655
+ </ul>
656
+
657
+ <h3 class="mt-4">Example: Streaming Audio Player</h3>
658
+ <div class="code-block">
659
+ <pre><code>// Create a streaming audio player
660
+ const audioChunks = [];
661
+ let isPlaying = false;
662
+
663
+ const streamingPlayer = await client.generateSpeech(longText, {
664
+ voice: 'nova',
665
+ format: 'mp3',
666
+
667
+ onChunk: (chunk) => {
668
+ // Store chunk
669
+ audioChunks.push(chunk.audioData);
670
+
671
+ // Start playing after first chunk
672
+ if (!isPlaying && audioChunks.length >= 3) {
673
+ startStreamingPlayback(audioChunks);
674
+ isPlaying = true;
675
+ }
676
+ },
677
+
678
+ onComplete: (result) => {
679
+ // Ensure all chunks are played
680
+ finishPlayback(result.audioData);
681
+ }
682
+ });</code></pre>
683
+ </div>
684
+
685
+ <div class="alert alert-success mt-4">
686
+ <h6><i class="fas fa-rocket me-2"></i>Try It Out!</h6>
687
+ <p class="mb-0">
688
+ Experience WebSocket streaming in action at the
689
+ <a href="/websocket-demo" class="alert-link">WebSocket Demo</a> or enable streaming mode in the
690
+ <a href="/playground" class="alert-link">Playground</a>.
691
+ </p>
692
+ </div>
693
+ </section>
694
+ </div>
695
+ </div>
696
+ </div>
697
+ {% endblock %}
698
+
699
+ {% block extra_js %}
700
+ <script>
701
+ // Smooth scrolling for TOC links
702
+ document.querySelectorAll('.toc a').forEach(link => {
703
+ link.addEventListener('click', function(e) {
704
+ e.preventDefault();
705
+ const target = document.querySelector(this.getAttribute('href'));
706
+ if (target) {
707
+ target.scrollIntoView({ behavior: 'smooth' });
708
+
709
+ // Update active link
710
+ document.querySelectorAll('.toc a').forEach(l => l.classList.remove('active'));
711
+ this.classList.add('active');
712
+ }
713
+ });
714
+ });
715
+
716
+ // Highlight current section in TOC
717
+ window.addEventListener('scroll', function() {
718
+ const sections = document.querySelectorAll('section[id]');
719
+ const scrollPos = window.scrollY + 100;
720
+
721
+ sections.forEach(section => {
722
+ const top = section.offsetTop;
723
+ const bottom = top + section.offsetHeight;
724
+ const id = section.getAttribute('id');
725
+ const link = document.querySelector(`.toc a[href="#${id}"]`);
726
+
727
+ if (scrollPos >= top && scrollPos < bottom) {
728
+ document.querySelectorAll('.toc a').forEach(l => l.classList.remove('active'));
729
+ if (link) link.classList.add('active');
730
+ }
731
+ });
732
+ });
733
+ </script>
734
+ {% endblock %}
ttsfm-web/templates/index.html CHANGED
@@ -1,146 +1,156 @@
1
- {% extends "base.html" %}
2
-
3
- {% block title %}TTSFM - Free Text-to-Speech for Python{% endblock %}
4
-
5
- {% block content %}
6
- <!-- Hero Section -->
7
- <section class="hero-section">
8
- <div class="container">
9
- <div class="row align-items-center min-vh-75">
10
- <div class="col-lg-8 mx-auto text-center">
11
- <div class="hero-content">
12
- <div class="badge bg-primary text-white mb-3 px-3 py-2">
13
- <i class="fas fa-code me-2"></i>Python Package
14
- </div>
15
- <h1 class="display-4 fw-bold mb-4">
16
- Free Text-to-Speech for Python
17
- </h1>
18
- <p class="lead mb-4">
19
- Access free text-to-speech using openai.fm's service. No API keys required,
20
- just install and use immediately.
21
- </p>
22
- <div class="d-flex flex-wrap gap-3 justify-content-center">
23
- <a href="{{ url_for('playground') }}" class="btn btn-primary btn-lg">
24
- <i class="fas fa-play me-2"></i>Try Demo
25
- </a>
26
- <a href="{{ url_for('docs') }}" class="btn btn-outline-secondary btn-lg">
27
- <i class="fas fa-book me-2"></i>Documentation
28
- </a>
29
- <a href="https://github.com/dbccccccc/ttsfm" class="btn btn-outline-secondary btn-lg" target="_blank" rel="noopener noreferrer">
30
- <i class="fab fa-github me-2"></i>GitHub
31
- </a>
32
- </div>
33
- </div>
34
- </div>
35
- </div>
36
- </div>
37
- </section>
38
-
39
- <!-- Features Section -->
40
- <section class="py-5" style="background-color: #f8fafc;">
41
- <div class="container">
42
- <div class="row">
43
- <div class="col-12 text-center mb-5">
44
- <h2 class="fw-bold mb-4">Key Features</h2>
45
- <p class="lead text-muted">
46
- Simple, free, and powerful text-to-speech for Python developers.
47
- </p>
48
- </div>
49
- </div>
50
-
51
- <div class="row g-4">
52
- <div class="col-lg-4">
53
- <div class="text-center">
54
- <div class="feature-icon text-white rounded-circle d-inline-flex align-items-center justify-content-center mb-3" style="width: 4rem; height: 4rem; background-color: #2563eb;">
55
- <i class="fas fa-key"></i>
56
- </div>
57
- <h5 class="fw-bold">No API Keys</h5>
58
- <p class="text-muted">Completely free service with no registration or API keys required.</p>
59
- </div>
60
- </div>
61
-
62
- <div class="col-lg-4">
63
- <div class="text-center">
64
- <div class="feature-icon text-white rounded-circle d-inline-flex align-items-center justify-content-center mb-3" style="width: 4rem; height: 4rem; background-color: #10b981;">
65
- <i class="fas fa-bolt"></i>
66
- </div>
67
- <h5 class="fw-bold">Easy to Use</h5>
68
- <p class="text-muted">Simple Python API with both sync and async support for all use cases.</p>
69
- </div>
70
- </div>
71
-
72
- <div class="col-lg-4">
73
- <div class="text-center">
74
- <div class="feature-icon text-white rounded-circle d-inline-flex align-items-center justify-content-center mb-3" style="width: 4rem; height: 4rem; background-color: #64748b;">
75
- <i class="fas fa-microphone-alt"></i>
76
- </div>
77
- <h5 class="fw-bold">Multiple Voices</h5>
78
- <p class="text-muted">Access to various voice options and audio formats for your needs.</p>
79
- </div>
80
- </div>
81
- </div>
82
- </div>
83
- </section>
84
-
85
- <!-- Quick Start Section -->
86
- <section class="py-5">
87
- <div class="container">
88
- <div class="row">
89
- <div class="col-12 text-center mb-5">
90
- <h2 class="fw-bold mb-4">Getting Started</h2>
91
- <p class="lead text-muted">
92
- Install TTSFM and start generating speech with just a few lines of code.
93
- </p>
94
- </div>
95
- </div>
96
-
97
- <div class="row g-4">
98
- <div class="col-lg-6">
99
- <div class="card h-100">
100
- <div class="card-body">
101
- <h5 class="card-title">
102
- <i class="fas fa-download me-2 text-primary"></i>Installation
103
- </h5>
104
- <pre class="bg-light p-3 rounded"><code>pip install ttsfm</code></pre>
105
- <small class="text-muted">Requires Python 3.8+</small>
106
- </div>
107
- </div>
108
- </div>
109
-
110
- <div class="col-lg-6">
111
- <div class="card h-100">
112
- <div class="card-body">
113
- <h5 class="card-title">
114
- <i class="fas fa-play me-2 text-success"></i>Basic Usage
115
- </h5>
116
- <pre class="bg-light p-3 rounded"><code>from ttsfm import TTSClient
117
-
118
- client = TTSClient()
119
- response = client.generate_speech(
120
- text="Hello, world!",
121
- voice="alloy"
122
- )
123
- response.save_to_file("hello.wav")</code></pre>
124
- <small class="text-muted">No API keys required</small>
125
- </div>
126
- </div>
127
- </div>
128
- </div>
129
-
130
- <div class="row mt-4">
131
- <div class="col-12 text-center">
132
- <div class="d-flex justify-content-center gap-3 flex-wrap">
133
- <a href="{{ url_for('playground') }}" class="btn btn-primary">
134
- <i class="fas fa-play me-2"></i>Try Demo
135
- </a>
136
- <a href="{{ url_for('docs') }}" class="btn btn-outline-primary">
137
- <i class="fas fa-book me-2"></i>Documentation
138
- </a>
139
- </div>
140
- </div>
141
- </div>
142
- </div>
143
- </section>
144
-
145
-
146
- {% endblock %}
 
 
 
 
 
 
 
 
 
 
 
1
+ {% extends "base.html" %}
2
+
3
+ {% block title %}TTSFM - {{ _('home.title') }}{% endblock %}
4
+
5
+ {% block content %}
6
+ <!-- Hero Section -->
7
+ <section class="hero-section">
8
+ <div class="container">
9
+ <div class="row align-items-center min-vh-75">
10
+ <div class="col-lg-8 mx-auto text-center">
11
+ <div class="hero-content">
12
+ <div class="badge bg-primary text-white mb-3 px-3 py-2">
13
+ <i class="fas fa-code me-2"></i>Python Package
14
+ </div>
15
+ <h1 class="display-4 fw-bold mb-4">
16
+ {{ _('home.title') }}
17
+ </h1>
18
+ <p class="lead mb-4">
19
+ {{ _('home.subtitle') }}
20
+ </p>
21
+ <div class="d-flex flex-wrap gap-3 justify-content-center">
22
+ <a href="{{ url_for('playground') }}" class="btn btn-primary btn-lg">
23
+ <i class="fas fa-play me-2"></i>{{ _('home.try_demo') }}
24
+ </a>
25
+ <a href="{{ url_for('docs') }}" class="btn btn-outline-secondary btn-lg">
26
+ <i class="fas fa-book me-2"></i>{{ _('home.documentation') }}
27
+ </a>
28
+ <a href="https://github.com/dbccccccc/ttsfm" class="btn btn-outline-secondary btn-lg" target="_blank" rel="noopener noreferrer">
29
+ <i class="fab fa-github me-2"></i>{{ _('home.github') }}
30
+ </a>
31
+ </div>
32
+ </div>
33
+ </div>
34
+ </div>
35
+ </div>
36
+ </section>
37
+
38
+ <!-- Features Section -->
39
+ <section class="py-5" style="background-color: #f8fafc;">
40
+ <div class="container">
41
+ <div class="row">
42
+ <div class="col-12 text-center mb-5">
43
+ <h2 class="fw-bold mb-4">{{ _('home.features_title') }}</h2>
44
+ <p class="lead text-muted">
45
+ {{ _('home.features_subtitle') }}
46
+ </p>
47
+ </div>
48
+ </div>
49
+
50
+ <div class="row g-4">
51
+ <div class="col-lg-3">
52
+ <div class="text-center">
53
+ <div class="feature-icon text-white rounded-circle d-inline-flex align-items-center justify-content-center mb-3" style="width: 4rem; height: 4rem; background: linear-gradient(135deg, #4f46e5 0%, #6366f1 100%);">
54
+ <i class="fas fa-key"></i>
55
+ </div>
56
+ <h5 class="fw-bold">{{ _('home.feature_free_title') }}</h5>
57
+ <p class="text-muted">{{ _('home.feature_free_desc') }}</p>
58
+ </div>
59
+ </div>
60
+
61
+ <div class="col-lg-3">
62
+ <div class="text-center">
63
+ <div class="feature-icon text-white rounded-circle d-inline-flex align-items-center justify-content-center mb-3" style="width: 4rem; height: 4rem; background: linear-gradient(135deg, #f59e0b 0%, #fbbf24 100%);">
64
+ <i class="fas fa-magic"></i>
65
+ </div>
66
+ <h5 class="fw-bold">{{ _('home.feature_openai_title') }} <span class="badge bg-success ms-1">v3.2.3</span></h5>
67
+ <p class="text-muted">{{ _('home.feature_openai_desc') }}</p>
68
+ </div>
69
+ </div>
70
+
71
+ <div class="col-lg-3">
72
+ <div class="text-center">
73
+ <div class="feature-icon text-white rounded-circle d-inline-flex align-items-center justify-content-center mb-3" style="width: 4rem; height: 4rem; background: linear-gradient(135deg, #059669 0%, #10b981 100%);">
74
+ <i class="fas fa-bolt"></i>
75
+ </div>
76
+ <h5 class="fw-bold">{{ _('home.feature_async_title') }}</h5>
77
+ <p class="text-muted">{{ _('home.feature_async_desc') }}</p>
78
+ </div>
79
+ </div>
80
+
81
+ <div class="col-lg-3">
82
+ <div class="text-center">
83
+ <div class="feature-icon text-white rounded-circle d-inline-flex align-items-center justify-content-center mb-3" style="width: 4rem; height: 4rem; background: linear-gradient(135deg, #6b7280 0%, #9ca3af 100%);">
84
+ <i class="fas fa-microphone-alt"></i>
85
+ </div>
86
+ <h5 class="fw-bold">{{ _('home.feature_voices_title') }} & {{ _('home.feature_formats_title') }}</h5>
87
+ <p class="text-muted">{{ _('home.feature_voices_desc') }} {{ _('home.feature_formats_desc') }}</p>
88
+ </div>
89
+ </div>
90
+ </div>
91
+ </div>
92
+ </section>
93
+
94
+ <!-- Quick Start Section -->
95
+ <section class="py-5">
96
+ <div class="container">
97
+ <div class="row">
98
+ <div class="col-12 text-center mb-5">
99
+ <h2 class="fw-bold mb-4">{{ _('home.quick_start_title') }}</h2>
100
+ <p class="lead text-muted">
101
+ {{ _('home.subtitle') }}
102
+ </p>
103
+ </div>
104
+ </div>
105
+
106
+ <div class="row g-4">
107
+ <div class="col-lg-6">
108
+ <div class="card h-100">
109
+ <div class="card-body">
110
+ <h5 class="card-title">
111
+ <i class="fas fa-download me-2 text-primary"></i>{{ _('home.installation_title') }}
112
+ </h5>
113
+ <pre class="bg-light p-3 rounded"><code>{{ _('home.installation_code') }}</code></pre>
114
+ <small class="text-muted">Requires Python 3.8+</small>
115
+ </div>
116
+ </div>
117
+ </div>
118
+
119
+ <div class="col-lg-6">
120
+ <div class="card h-100">
121
+ <div class="card-body">
122
+ <h5 class="card-title">
123
+ <i class="fas fa-play me-2 text-success"></i>{{ _('home.usage_title') }}
124
+ </h5>
125
+ <pre class="bg-light p-3 rounded"><code>from ttsfm import TTSClient, Voice, AudioFormat
126
+
127
+ client = TTSClient()
128
+ response = client.generate_speech(
129
+ text="Hello, world!",
130
+ voice=Voice.ALLOY,
131
+ response_format=AudioFormat.MP3
132
+ )
133
+ response.save_to_file("hello")</code></pre>
134
+ <small class="text-muted">No API keys required</small>
135
+ </div>
136
+ </div>
137
+ </div>
138
+ </div>
139
+
140
+ <div class="row mt-4">
141
+ <div class="col-12 text-center">
142
+ <div class="d-flex justify-content-center gap-3 flex-wrap">
143
+ <a href="{{ url_for('playground') }}" class="btn btn-primary">
144
+ <i class="fas fa-play me-2"></i>{{ _('home.try_demo') }}
145
+ </a>
146
+ <a href="{{ url_for('docs') }}" class="btn btn-outline-primary">
147
+ <i class="fas fa-book me-2"></i>{{ _('home.documentation') }}
148
+ </a>
149
+ </div>
150
+ </div>
151
+ </div>
152
+ </div>
153
+ </section>
154
+
155
+
156
+ {% endblock %}
ttsfm-web/templates/playground.html CHANGED
@@ -1,295 +1,317 @@
1
- {% extends "base.html" %}
2
-
3
- {% block title %}TTSFM Playground - Try Text-to-Speech{% endblock %}
4
-
5
- {% block content %}
6
- <!-- Clean Playground Header -->
7
- <section class="py-5" style="background-color: white; border-bottom: 1px solid #e5e7eb;">
8
- <div class="container">
9
- <div class="row align-items-center">
10
- <div class="col-lg-8">
11
- <div class="fade-in">
12
- <div class="badge bg-primary text-white mb-3 px-3 py-2">
13
- <i class="fas fa-flask me-2"></i>Demo
14
- </div>
15
- <h1 class="display-4 fw-bold mb-3 text-dark">
16
- <i class="fas fa-play-circle me-3 text-primary"></i>TTS Playground
17
- </h1>
18
- <p class="lead mb-4 text-muted">
19
- Test the TTSFM text-to-speech functionality with different voices and formats.
20
- </p>
21
- </div>
22
- </div>
23
- <div class="col-lg-4 text-center">
24
- <div class="playground-visual fade-in" style="animation-delay: 0.3s;">
25
- <div class="playground-icon">
26
- <i class="fas fa-waveform-lines text-primary"></i>
27
- <div class="pulse-ring"></div>
28
- <div class="pulse-ring pulse-ring-delay"></div>
29
- </div>
30
- </div>
31
- </div>
32
- </div>
33
- </div>
34
- </section>
35
-
36
- <div class="container py-5 playground">
37
-
38
- <div class="row">
39
- <div class="col-lg-10 mx-auto">
40
- <div class="card shadow-lg-custom border-0 fade-in">
41
- <div class="card-header bg-gradient-primary text-white">
42
- <h4 class="mb-0 d-flex align-items-center">
43
- <i class="fas fa-microphone me-2"></i>
44
- Text-to-Speech Generator
45
- </h4>
46
- </div>
47
- <div class="card-body p-4">
48
- <form id="tts-form">
49
- <!-- Enhanced Text Input -->
50
- <div class="mb-4">
51
- <label for="text-input" class="form-label fw-bold d-flex align-items-center">
52
- <i class="fas fa-edit me-2 text-primary"></i>
53
- Text to Convert
54
- </label>
55
- <div class="position-relative">
56
- <textarea
57
- class="form-control shadow-sm"
58
- id="text-input"
59
- rows="4"
60
- placeholder="Enter the text you want to convert to speech..."
61
- required
62
- >Hello! This is a test of the TTSFM text-to-speech system.</textarea>
63
- <div class="position-absolute top-0 end-0 p-2">
64
- <button type="button" class="btn btn-sm btn-outline-secondary" id="clear-text-btn" title="Clear text">
65
- <i class="fas fa-times"></i>
66
- </button>
67
- </div>
68
- </div>
69
- <div class="form-text d-flex justify-content-between align-items-center">
70
- <div class="d-flex align-items-center gap-3">
71
- <span class="text-muted">
72
- <i class="fas fa-keyboard me-1"></i>
73
- <span id="char-count">0</span> characters
74
- </span>
75
- <span id="length-status" class=""></span>
76
- <span class="text-muted small">
77
- <i class="fas fa-lightbulb me-1"></i>
78
- Tip: Use Ctrl+Enter to generate
79
- </span>
80
- </div>
81
- <div class="btn-group" role="group">
82
- <button type="button" class="btn btn-sm btn-outline-primary" id="validate-text-btn">
83
- <i class="fas fa-check me-1"></i>Validate
84
- </button>
85
- <button type="button" class="btn btn-sm btn-outline-secondary" id="random-text-btn">
86
- <i class="fas fa-dice me-1"></i>Random
87
- </button>
88
- </div>
89
- </div>
90
- <div id="validation-result" class="mt-2 d-none"></div>
91
- </div>
92
-
93
- <div class="row">
94
- <!-- Enhanced Voice Selection -->
95
- <div class="col-md-6 mb-4">
96
- <label for="voice-select" class="form-label fw-bold d-flex align-items-center">
97
- <i class="fas fa-microphone me-2 text-primary"></i>
98
- Voice
99
- </label>
100
- <select class="form-select shadow-sm" id="voice-select" required>
101
- <option value="">Loading voices...</option>
102
- </select>
103
- <div class="form-text">
104
- <span>Choose from available voices</span>
105
- </div>
106
- </div>
107
-
108
- <!-- Enhanced Format Selection -->
109
- <div class="col-md-6 mb-4">
110
- <label for="format-select" class="form-label fw-bold d-flex align-items-center">
111
- <i class="fas fa-file-audio me-2 text-primary"></i>
112
- Audio Format
113
- </label>
114
- <select class="form-select shadow-sm" id="format-select" required>
115
- <option value="">Loading formats...</option>
116
- </select>
117
- <div class="form-text">
118
- <span>Select your preferred audio format</span>
119
- </div>
120
- </div>
121
- </div>
122
-
123
- <!-- Advanced Options -->
124
- <div class="row">
125
- <div class="col-md-6 mb-4">
126
- <label for="max-length-input" class="form-label fw-bold">
127
- <i class="fas fa-ruler me-2"></i>Max Length
128
- </label>
129
- <input
130
- type="number"
131
- class="form-control"
132
- id="max-length-input"
133
- value="4096"
134
- min="100"
135
- max="10000"
136
- >
137
- <div class="form-text">
138
- Maximum characters per request (default: 4096)
139
- </div>
140
- </div>
141
-
142
- <div class="col-md-6 mb-4">
143
- <label class="form-label fw-bold">
144
- <i class="fas fa-cog me-2"></i>Options
145
- </label>
146
- <div class="form-check">
147
- <input class="form-check-input" type="checkbox" id="validate-length-check" checked>
148
- <label class="form-check-label" for="validate-length-check">
149
- Enable length validation
150
- </label>
151
- </div>
152
- <div class="form-check">
153
- <input class="form-check-input" type="checkbox" id="auto-split-check">
154
- <label class="form-check-label" for="auto-split-check">
155
- Auto-split long text
156
- </label>
157
- </div>
158
- </div>
159
- </div>
160
-
161
- <!-- Instructions (Optional) -->
162
- <div class="mb-4">
163
- <label for="instructions-input" class="form-label fw-bold">
164
- <i class="fas fa-magic me-2"></i>Instructions (Optional)
165
- </label>
166
- <input
167
- type="text"
168
- class="form-control"
169
- id="instructions-input"
170
- placeholder="e.g., Speak in a cheerful and upbeat tone"
171
- >
172
- <div class="form-text">
173
- Provide optional instructions for voice modulation
174
- </div>
175
- </div>
176
-
177
- <!-- Enhanced Generate Button -->
178
- <div class="text-center mb-4">
179
- <div class="d-grid gap-2 d-md-block">
180
- <button type="submit" class="btn btn-primary btn-lg px-4 py-3" id="generate-btn">
181
- <span class="btn-text">
182
- <i class="fas fa-magic me-2"></i>Generate Speech
183
- </span>
184
- <span class="loading-spinner">
185
- <i class="fas fa-spinner fa-spin me-2"></i>Generating...
186
- </span>
187
- </button>
188
- <button type="button" class="btn btn-outline-secondary btn-lg ms-md-3" id="reset-form-btn">
189
- <i class="fas fa-redo me-2"></i>Reset
190
- </button>
191
- </div>
192
- </div>
193
- </form>
194
-
195
- <!-- Enhanced Audio Player -->
196
- <div id="audio-result" class="d-none">
197
- <div class="border-top pt-4 mt-4">
198
- <div class="d-flex align-items-center justify-content-between mb-3">
199
- <h5 class="mb-0 d-flex align-items-center">
200
- <i class="fas fa-volume-up me-2 text-success"></i>
201
- Generated Audio
202
- <span class="badge bg-success ms-2">
203
- <i class="fas fa-check me-1"></i>Ready
204
- </span>
205
- </h5>
206
- <div class="btn-group" role="group">
207
- <button type="button" class="btn btn-sm btn-outline-primary" id="replay-btn" title="Replay audio">
208
- <i class="fas fa-redo"></i>
209
- </button>
210
- <button type="button" class="btn btn-sm btn-outline-secondary" id="share-btn" title="Share audio">
211
- <i class="fas fa-share"></i>
212
- </button>
213
- </div>
214
- </div>
215
-
216
- <div class="audio-player-container bg-light rounded p-3 mb-3">
217
- <audio controls class="audio-player w-100" id="audio-player" preload="metadata">
218
- Your browser does not support the audio element.
219
- </audio>
220
- <div class="audio-controls mt-2 d-flex justify-content-between align-items-center">
221
- <div class="audio-info">
222
- <span id="audio-info" class="text-muted small"></span>
223
- </div>
224
- <div class="audio-actions">
225
- <button type="button" class="btn btn-success btn-sm" id="download-btn">
226
- <i class="fas fa-download me-1"></i>Download
227
- </button>
228
- </div>
229
- </div>
230
- </div>
231
-
232
- <div class="audio-stats row text-center">
233
- <div class="col-md-3 col-6">
234
- <div class="stat-item">
235
- <i class="fas fa-clock text-primary"></i>
236
- <div class="stat-value" id="audio-duration">--</div>
237
- <div class="stat-label">Duration</div>
238
- </div>
239
- </div>
240
- <div class="col-md-3 col-6">
241
- <div class="stat-item">
242
- <i class="fas fa-file text-info"></i>
243
- <div class="stat-value" id="audio-size">--</div>
244
- <div class="stat-label">File Size</div>
245
- </div>
246
- </div>
247
- <div class="col-md-3 col-6">
248
- <div class="stat-item">
249
- <i class="fas fa-microphone text-warning"></i>
250
- <div class="stat-value" id="audio-voice">--</div>
251
- <div class="stat-label">Voice</div>
252
- </div>
253
- </div>
254
- <div class="col-md-3 col-6">
255
- <div class="stat-item">
256
- <i class="fas fa-music text-success"></i>
257
- <div class="stat-value" id="audio-format">--</div>
258
- <div class="stat-label">Format</div>
259
- </div>
260
- </div>
261
- </div>
262
- </div>
263
- </div>
264
-
265
- <!-- Batch Results -->
266
- <div id="batch-result" class="d-none">
267
- <hr>
268
- <h5 class="mb-3">
269
- <i class="fas fa-layer-group me-2"></i>Batch Processing Results
270
- </h5>
271
- <div class="alert alert-info" id="batch-summary"></div>
272
- <div id="batch-chunks" class="row g-3"></div>
273
- <div class="mt-3">
274
- <button type="button" class="btn btn-outline-primary" id="download-all-btn">
275
- <i class="fas fa-download me-2"></i>Download All Audio Files
276
- </button>
277
- </div>
278
- </div>
279
- </div>
280
- </div>
281
- </div>
282
- </div>
283
- </div>
284
- {% endblock %}
285
-
286
- {% block extra_js %}
287
- <!-- Playground JavaScript -->
288
- <script src="{{ url_for('static', filename='js/playground.js') }}"></script>
289
- <script>
290
- // Additional playground-specific functionality
291
- console.log('TTSFM Playground loaded successfully!');
292
-
293
-
294
- </script>
295
- {% endblock %}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {% extends "base.html" %}
2
+
3
+ {% block title %}TTSFM {{ _('nav.playground') }} - {{ _('playground.title') }}{% endblock %}
4
+
5
+ {% block content %}
6
+ <!-- Clean Playground Header -->
7
+ <section class="py-5" style="background-color: white; border-bottom: 1px solid #e5e7eb;">
8
+ <div class="container">
9
+ <div class="row align-items-center">
10
+ <div class="col-lg-8">
11
+ <div class="fade-in">
12
+ <div class="badge bg-primary text-white mb-3 px-3 py-2">
13
+ <i class="fas fa-flask me-2"></i>Demo
14
+ </div>
15
+ <h1 class="display-4 fw-bold mb-3 text-dark">
16
+ <i class="fas fa-play-circle me-3 text-primary"></i>{{ _('playground.title') }}
17
+ </h1>
18
+ <p class="lead mb-4 text-muted">
19
+ {{ _('playground.subtitle') }}
20
+ </p>
21
+ </div>
22
+ </div>
23
+ <div class="col-lg-4 text-center">
24
+ <div class="playground-visual fade-in" style="animation-delay: 0.3s;">
25
+ <div class="playground-icon">
26
+ <i class="fas fa-waveform-lines text-primary"></i>
27
+ <div class="pulse-ring"></div>
28
+ <div class="pulse-ring pulse-ring-delay"></div>
29
+ </div>
30
+ </div>
31
+ </div>
32
+ </div>
33
+ </div>
34
+ </section>
35
+
36
+ <div class="container py-5 playground">
37
+
38
+ <div class="row">
39
+ <div class="col-lg-10 mx-auto">
40
+ <div class="card shadow-lg-custom border-0 fade-in">
41
+ <div class="card-header bg-gradient-primary text-white">
42
+ <h4 class="mb-0 d-flex align-items-center">
43
+ <i class="fas fa-microphone me-2"></i>
44
+ {{ _('playground.title') }}
45
+ </h4>
46
+ </div>
47
+ <div class="card-body p-4">
48
+ <form id="tts-form" onsubmit="return false;">
49
+ <!-- Enhanced Text Input -->
50
+ <div class="mb-4">
51
+ <label for="text-input" class="form-label fw-bold d-flex align-items-center">
52
+ <i class="fas fa-edit me-2 text-primary"></i>
53
+ {{ _('playground.text_input_label') }}
54
+ </label>
55
+ <div class="position-relative">
56
+ <textarea
57
+ class="form-control shadow-sm"
58
+ id="text-input"
59
+ rows="4"
60
+ placeholder="{{ _('playground.text_input_placeholder') }}"
61
+ required
62
+ >Hello! This is a test of the TTSFM text-to-speech system.</textarea>
63
+ <div class="position-absolute top-0 end-0 p-2">
64
+ <button type="button" class="btn btn-sm btn-outline-secondary" id="clear-text-btn" title="Clear text">
65
+ <i class="fas fa-times"></i>
66
+ </button>
67
+ </div>
68
+ </div>
69
+ <div class="form-text d-flex justify-content-between align-items-center">
70
+ <div class="d-flex align-items-center gap-3">
71
+ <span class="text-muted">
72
+ <i class="fas fa-keyboard me-1"></i>
73
+ <span id="char-count">0</span> {{ _('playground.character_count') }}
74
+ </span>
75
+ <span id="length-status" class=""></span>
76
+ <span id="auto-combine-status" class="badge bg-success d-none">
77
+ <i class="fas fa-magic me-1"></i>{{ _('playground.max_length_warning') }}
78
+ </span>
79
+ <span class="text-muted small">
80
+ <i class="fas fa-lightbulb me-1"></i>
81
+ Tip: Use Ctrl+Enter to generate
82
+ </span>
83
+ </div>
84
+ <div class="btn-group" role="group">
85
+ <button type="button" class="btn btn-sm btn-outline-primary" id="validate-text-btn">
86
+ <i class="fas fa-check me-1"></i>{{ _('common.validate') if _('common.validate') != 'common.validate' else 'Validate' }}
87
+ </button>
88
+ <button type="button" class="btn btn-sm btn-outline-secondary" id="random-text-btn">
89
+ <i class="fas fa-dice me-1"></i>{{ _('playground.random_text') }}
90
+ </button>
91
+ </div>
92
+ </div>
93
+ <div id="validation-result" class="mt-2 d-none"></div>
94
+ </div>
95
+
96
+ <div class="row">
97
+ <!-- Enhanced Voice Selection -->
98
+ <div class="col-md-6 mb-4">
99
+ <label for="voice-select" class="form-label fw-bold d-flex align-items-center">
100
+ <i class="fas fa-microphone me-2 text-primary"></i>
101
+ {{ _('playground.voice_label') }}
102
+ </label>
103
+ <select class="form-select shadow-sm" id="voice-select" required>
104
+ <option value="">{{ _('common.loading_voices') }}</option>
105
+ </select>
106
+ <div class="form-text">
107
+ <span>{{ _('common.choose_voice') }}</span>
108
+ </div>
109
+ </div>
110
+
111
+ <!-- Enhanced Format Selection -->
112
+ <div class="col-md-6 mb-4">
113
+ <label for="format-select" class="form-label fw-bold d-flex align-items-center">
114
+ <i class="fas fa-file-audio me-2 text-primary"></i>
115
+ {{ _('playground.format_label') }}
116
+ </label>
117
+ <select class="form-select shadow-sm" id="format-select" required>
118
+ <option value="">{{ _('common.loading_formats') }}</option>
119
+ </select>
120
+ <div class="form-text">
121
+ <span>{{ _('common.select_format') }}</span>
122
+ </div>
123
+ </div>
124
+ </div>
125
+
126
+ <!-- Advanced Options -->
127
+ <div class="row">
128
+ <div class="col-md-6 mb-4">
129
+ <label for="max-length-input" class="form-label fw-bold">
130
+ <i class="fas fa-ruler me-2"></i>{{ _('common.max_length') }}
131
+ </label>
132
+ <input
133
+ type="number"
134
+ class="form-control"
135
+ id="max-length-input"
136
+ value="4096"
137
+ min="100"
138
+ max="10000"
139
+ >
140
+ <div class="form-text">
141
+ {{ _('playground.max_length_description') }}
142
+ </div>
143
+ </div>
144
+
145
+ <div class="col-md-6 mb-4">
146
+ <label class="form-label fw-bold">
147
+ <i class="fas fa-cog me-2"></i>{{ _('common.options') }}
148
+ </label>
149
+ <div class="form-check">
150
+ <input class="form-check-input" type="checkbox" id="validate-length-check" checked>
151
+ <label class="form-check-label" for="validate-length-check">
152
+ {{ _('playground.enable_length_validation') }}
153
+ </label>
154
+ </div>
155
+ <div class="form-check">
156
+ <input class="form-check-input" type="checkbox" id="auto-combine-check" checked>
157
+ <label class="form-check-label" for="auto-combine-check">
158
+ <span class="fw-bold text-primary">{{ _('playground.auto_combine_long_text') }}</span>
159
+ <i class="fas fa-info-circle ms-1" data-bs-toggle="tooltip"
160
+ title="{{ _('playground.auto_combine_tooltip') }}"></i>
161
+ </label>
162
+ <div class="form-text small">
163
+ <i class="fas fa-magic me-1"></i>
164
+ {{ _('playground.auto_combine_description') }}
165
+ </div>
166
+ </div>
167
+ </div>
168
+ </div>
169
+
170
+ <!-- Instructions (Optional) -->
171
+ <div class="mb-4">
172
+ <label for="instructions-input" class="form-label fw-bold">
173
+ <i class="fas fa-magic me-2"></i>{{ _('playground.instructions_label') }}
174
+ </label>
175
+ <input
176
+ type="text"
177
+ class="form-control"
178
+ id="instructions-input"
179
+ placeholder="{{ _('playground.instructions_placeholder') }}"
180
+ >
181
+ <div class="form-text">
182
+ {{ _('playground.instructions_description') }}
183
+ </div>
184
+ </div>
185
+
186
+ <!-- API Key (Optional) -->
187
+ <div class="mb-4" id="api-key-section">
188
+ <label for="api-key-input" class="form-label fw-bold">
189
+ <i class="fas fa-key me-2"></i>{{ _('playground.api_key_optional') }}
190
+ </label>
191
+ <div class="input-group">
192
+ <input
193
+ type="password"
194
+ class="form-control"
195
+ id="api-key-input"
196
+ placeholder="{{ _('playground.api_key_placeholder') }}"
197
+ >
198
+ <button class="btn btn-outline-secondary" type="button" id="toggle-api-key-visibility">
199
+ <i class="fas fa-eye" id="api-key-eye-icon"></i>
200
+ </button>
201
+ </div>
202
+ <div class="form-text">
203
+ <i class="fas fa-info-circle me-1"></i>
204
+ {{ _('playground.api_key_description') }}
205
+ </div>
206
+ </div>
207
+
208
+ <!-- Enhanced Generate Button -->
209
+ <div class="text-center mb-4">
210
+ <div class="d-grid gap-2 d-md-block">
211
+ <button type="submit" class="btn btn-primary btn-lg px-4 py-3" id="generate-btn">
212
+ <span class="btn-text">
213
+ <i class="fas fa-magic me-2"></i>{{ _('playground.generate_speech') }}
214
+ </span>
215
+ <span class="loading-spinner">
216
+ <i class="fas fa-spinner fa-spin me-2"></i>{{ _('playground.generating') }}
217
+ </span>
218
+ </button>
219
+ <button type="button" class="btn btn-outline-secondary btn-lg ms-md-3" id="reset-form-btn">
220
+ <i class="fas fa-redo me-2"></i>{{ _('common.reset') }}
221
+ </button>
222
+ </div>
223
+ </div>
224
+ </form>
225
+
226
+ <!-- Enhanced Audio Player -->
227
+ <div id="audio-result" class="d-none">
228
+ <div class="border-top pt-4 mt-4">
229
+ <div class="d-flex align-items-center justify-content-between mb-3">
230
+ <h5 class="mb-0 d-flex align-items-center">
231
+ <i class="fas fa-volume-up me-2 text-success"></i>
232
+ {{ _('playground.audio_player_title') }}
233
+ <span class="badge bg-success ms-2">
234
+ <i class="fas fa-check me-1"></i>Ready
235
+ </span>
236
+ </h5>
237
+ <div class="btn-group" role="group">
238
+ <button type="button" class="btn btn-sm btn-outline-primary" id="replay-btn" title="Replay audio">
239
+ <i class="fas fa-redo"></i>
240
+ </button>
241
+ <button type="button" class="btn btn-sm btn-outline-secondary" id="share-btn" title="Share audio">
242
+ <i class="fas fa-share"></i>
243
+ </button>
244
+ </div>
245
+ </div>
246
+
247
+ <div class="audio-player-container bg-light rounded p-3 mb-3">
248
+ <audio controls class="audio-player w-100" id="audio-player" preload="metadata">
249
+ Your browser does not support the audio element.
250
+ </audio>
251
+ <div class="audio-controls mt-2 d-flex justify-content-between align-items-center">
252
+ <div class="audio-info">
253
+ <span id="audio-info" class="text-muted small"></span>
254
+ </div>
255
+ <div class="audio-actions">
256
+ <button type="button" class="btn btn-success btn-sm" id="download-btn">
257
+ <i class="fas fa-download me-1"></i>{{ _('playground.download_audio') }}
258
+ </button>
259
+ </div>
260
+ </div>
261
+ </div>
262
+
263
+ <div class="audio-stats row text-center">
264
+ <div class="col-md-3 col-6">
265
+ <div class="stat-item">
266
+ <i class="fas fa-clock text-primary"></i>
267
+ <div class="stat-value" id="audio-duration">--</div>
268
+ <div class="stat-label">{{ _('playground.duration') }}</div>
269
+ </div>
270
+ </div>
271
+ <div class="col-md-3 col-6">
272
+ <div class="stat-item">
273
+ <i class="fas fa-file text-info"></i>
274
+ <div class="stat-value" id="audio-size">--</div>
275
+ <div class="stat-label">{{ _('playground.file_size') }}</div>
276
+ </div>
277
+ </div>
278
+ <div class="col-md-3 col-6">
279
+ <div class="stat-item">
280
+ <i class="fas fa-microphone text-warning"></i>
281
+ <div class="stat-value" id="audio-voice">--</div>
282
+ <div class="stat-label">{{ _('playground.voice') }}</div>
283
+ </div>
284
+ </div>
285
+ <div class="col-md-3 col-6">
286
+ <div class="stat-item">
287
+ <i class="fas fa-music text-success"></i>
288
+ <div class="stat-value" id="audio-format">--</div>
289
+ <div class="stat-label">{{ _('playground.format') }}</div>
290
+ </div>
291
+ </div>
292
+ </div>
293
+ </div>
294
+ </div>
295
+
296
+
297
+ </div>
298
+ </div>
299
+ </div>
300
+ </div>
301
+ </div>
302
+ {% endblock %}
303
+
304
+ {% block extra_js %}
305
+ <!-- Socket.IO for WebSocket support -->
306
+ <script src="https://cdn.socket.io/4.6.0/socket.io.min.js"></script>
307
+ <!-- WebSocket TTS Client -->
308
+ <script src="{{ url_for('static', filename='js/websocket-tts.js') }}"></script>
309
+ <!-- Enhanced Playground JavaScript with WebSocket Support -->
310
+ <script src="{{ url_for('static', filename='js/playground-enhanced-fixed.js') }}"></script>
311
+ <script>
312
+ // Additional playground-specific functionality
313
+ console.log('TTSFM Enhanced Playground with WebSocket support loaded successfully!');
314
+
315
+
316
+ </script>
317
+ {% endblock %}
ttsfm-web/templates/websocket_demo.html ADDED
@@ -0,0 +1,390 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {% extends "base.html" %}
2
+
3
+ {% block title %}{{ _('websocket.title', 'WebSocket Streaming Demo') }} - TTSFM{% endblock %}
4
+
5
+ {% block content %}
6
+ <div class="container mt-5">
7
+ <div class="row">
8
+ <div class="col-lg-10 mx-auto">
9
+ <h1 class="text-center mb-4">
10
+ <i class="fas fa-bolt text-warning"></i>
11
+ {{ _('websocket.title', 'WebSocket Streaming Demo') }}
12
+ </h1>
13
+
14
+ <!-- Connection Status -->
15
+ <div class="alert alert-info" id="connection-status">
16
+ <i class="fas fa-plug me-2"></i>
17
+ <span id="status-text">Connecting to WebSocket server...</span>
18
+ </div>
19
+
20
+ <!-- Input Form -->
21
+ <div class="card shadow-sm mb-4">
22
+ <div class="card-body">
23
+ <h5 class="card-title">{{ _('playground.generate_speech', 'Generate Speech') }}</h5>
24
+
25
+ <form id="streaming-form">
26
+ <div class="mb-3">
27
+ <label for="text-input" class="form-label">
28
+ {{ _('playground.text_input', 'Text to Convert') }}
29
+ </label>
30
+ <textarea
31
+ class="form-control"
32
+ id="text-input"
33
+ rows="4"
34
+ maxlength="4096"
35
+ placeholder="{{ _('playground.text_placeholder', 'Enter your text here...') }}"
36
+ >Experience the future of text-to-speech with real-time WebSocket streaming! This innovative feature delivers audio chunks as they're generated, providing a more responsive and engaging user experience.</textarea>
37
+ <div class="form-text">
38
+ <i class="fas fa-info-circle me-1"></i>
39
+ Streaming will split text into chunks for real-time delivery
40
+ </div>
41
+ </div>
42
+
43
+ <div class="row">
44
+ <div class="col-md-6 mb-3">
45
+ <label for="voice-select" class="form-label">
46
+ {{ _('playground.voice', 'Voice') }}
47
+ </label>
48
+ <select class="form-select" id="voice-select">
49
+ <option value="alloy">Alloy</option>
50
+ <option value="echo">Echo</option>
51
+ <option value="fable">Fable</option>
52
+ <option value="onyx">Onyx</option>
53
+ <option value="nova">Nova</option>
54
+ <option value="shimmer">Shimmer</option>
55
+ </select>
56
+ </div>
57
+
58
+ <div class="col-md-6 mb-3">
59
+ <label for="format-select" class="form-label">
60
+ {{ _('playground.format', 'Audio Format') }}
61
+ </label>
62
+ <select class="form-select" id="format-select">
63
+ <option value="mp3">MP3</option>
64
+ <option value="wav">WAV</option>
65
+ <option value="opus">OPUS</option>
66
+ </select>
67
+ </div>
68
+ </div>
69
+
70
+ <div class="d-grid gap-2 d-md-flex justify-content-md-end">
71
+ <button type="submit" class="btn btn-primary" id="stream-btn">
72
+ <i class="fas fa-bolt me-2"></i>
73
+ Start Streaming
74
+ </button>
75
+ <button type="button" class="btn btn-danger" id="cancel-btn" style="display: none;">
76
+ <i class="fas fa-stop me-2"></i>
77
+ Cancel
78
+ </button>
79
+ </div>
80
+ </form>
81
+ </div>
82
+ </div>
83
+
84
+ <!-- Progress Section -->
85
+ <div class="card shadow-sm mb-4" id="progress-section" style="display: none;">
86
+ <div class="card-body">
87
+ <h5 class="card-title">Streaming Progress</h5>
88
+
89
+ <div class="progress mb-3" style="height: 25px;">
90
+ <div
91
+ class="progress-bar progress-bar-striped progress-bar-animated"
92
+ id="progress-bar"
93
+ role="progressbar"
94
+ style="width: 0%"
95
+ >
96
+ <span id="progress-text">0%</span>
97
+ </div>
98
+ </div>
99
+
100
+ <div class="row text-center">
101
+ <div class="col-md-4">
102
+ <h6>Chunks Received</h6>
103
+ <p class="h4"><span id="chunks-received">0</span> / <span id="total-chunks">0</span></p>
104
+ </div>
105
+ <div class="col-md-4">
106
+ <h6>Data Transferred</h6>
107
+ <p class="h4" id="data-transferred">0 KB</p>
108
+ </div>
109
+ <div class="col-md-4">
110
+ <h6>Generation Time</h6>
111
+ <p class="h4" id="generation-time">0.0s</p>
112
+ </div>
113
+ </div>
114
+ </div>
115
+ </div>
116
+
117
+ <!-- Audio Chunks Display -->
118
+ <div class="card shadow-sm mb-4" id="chunks-section" style="display: none;">
119
+ <div class="card-body">
120
+ <h5 class="card-title">Audio Chunks</h5>
121
+ <div id="chunks-container" class="row g-2">
122
+ <!-- Chunks will be added here dynamically -->
123
+ </div>
124
+ </div>
125
+ </div>
126
+
127
+ <!-- Final Audio Player -->
128
+ <div class="card shadow-sm" id="audio-section" style="display: none;">
129
+ <div class="card-body">
130
+ <h5 class="card-title">Generated Audio</h5>
131
+ <audio id="audio-player" controls class="w-100"></audio>
132
+ <div class="mt-2">
133
+ <button class="btn btn-success" id="download-btn">
134
+ <i class="fas fa-download me-2"></i>
135
+ Download Audio
136
+ </button>
137
+ </div>
138
+ </div>
139
+ </div>
140
+
141
+ <!-- Info Section -->
142
+ <div class="card shadow-sm mt-4">
143
+ <div class="card-body">
144
+ <h5 class="card-title">
145
+ <i class="fas fa-info-circle text-info me-2"></i>
146
+ About WebSocket Streaming
147
+ </h5>
148
+ <p>
149
+ This demo showcases real-time audio streaming using WebSockets. Instead of waiting
150
+ for the entire audio to be generated, you receive chunks as they're processed,
151
+ providing immediate feedback and a more responsive experience.
152
+ </p>
153
+ <ul>
154
+ <li><strong>Lower Perceived Latency:</strong> Start receiving audio before generation completes</li>
155
+ <li><strong>Progress Tracking:</strong> Real-time updates on generation progress</li>
156
+ <li><strong>Cancellable:</strong> Stop generation mid-stream if needed</li>
157
+ <li><strong>Efficient:</strong> Stream chunks as they're ready, no waiting</li>
158
+ </ul>
159
+ </div>
160
+ </div>
161
+ </div>
162
+ </div>
163
+ </div>
164
+
165
+ <!-- Include Socket.IO -->
166
+ <script src="https://cdn.socket.io/4.6.0/socket.io.min.js"></script>
167
+ <!-- Include our WebSocket client -->
168
+ <script src="{{ url_for('static', filename='js/websocket-tts.js') }}"></script>
169
+
170
+ <script>
171
+ // Initialize WebSocket client
172
+ let wsClient = null;
173
+ let currentRequestId = null;
174
+ let startTime = null;
175
+
176
+ // Initialize on page load
177
+ document.addEventListener('DOMContentLoaded', function() {
178
+ // Create WebSocket client
179
+ wsClient = new WebSocketTTSClient({
180
+ debug: true,
181
+ onConnect: () => {
182
+ updateConnectionStatus('connected');
183
+ },
184
+ onDisconnect: () => {
185
+ updateConnectionStatus('disconnected');
186
+ },
187
+ onError: (error) => {
188
+ updateConnectionStatus('error');
189
+ showError(`Connection error: ${error.message}`);
190
+ }
191
+ });
192
+
193
+ // Form submission
194
+ document.getElementById('streaming-form').addEventListener('submit', handleStreamingSubmit);
195
+
196
+ // Cancel button
197
+ document.getElementById('cancel-btn').addEventListener('click', handleCancel);
198
+ });
199
+
200
+ function updateConnectionStatus(status) {
201
+ const statusEl = document.getElementById('connection-status');
202
+ const statusText = document.getElementById('status-text');
203
+
204
+ statusEl.className = 'alert';
205
+
206
+ switch(status) {
207
+ case 'connected':
208
+ statusEl.classList.add('alert-success');
209
+ statusText.innerHTML = '<i class="fas fa-check-circle me-2"></i>Connected to WebSocket server';
210
+ break;
211
+ case 'disconnected':
212
+ statusEl.classList.add('alert-warning');
213
+ statusText.innerHTML = '<i class="fas fa-exclamation-triangle me-2"></i>Disconnected from server';
214
+ break;
215
+ case 'error':
216
+ statusEl.classList.add('alert-danger');
217
+ statusText.innerHTML = '<i class="fas fa-times-circle me-2"></i>Connection error';
218
+ break;
219
+ default:
220
+ statusEl.classList.add('alert-info');
221
+ statusText.innerHTML = '<i class="fas fa-plug me-2"></i>Connecting...';
222
+ }
223
+ }
224
+
225
+ async function handleStreamingSubmit(e) {
226
+ e.preventDefault();
227
+
228
+ if (!wsClient || !wsClient.isConnected()) {
229
+ showError('WebSocket not connected. Please refresh the page.');
230
+ return;
231
+ }
232
+
233
+ // Get form values
234
+ const text = document.getElementById('text-input').value.trim();
235
+ const voice = document.getElementById('voice-select').value;
236
+ const format = document.getElementById('format-select').value;
237
+
238
+ if (!text) {
239
+ showError('Please enter some text to convert.');
240
+ return;
241
+ }
242
+
243
+ // Reset UI
244
+ resetUI();
245
+
246
+ // Show progress section
247
+ document.getElementById('progress-section').style.display = 'block';
248
+ document.getElementById('chunks-section').style.display = 'block';
249
+ document.getElementById('stream-btn').disabled = true;
250
+ document.getElementById('cancel-btn').style.display = 'inline-block';
251
+
252
+ startTime = Date.now();
253
+
254
+ try {
255
+ const result = await wsClient.generateSpeech(text, {
256
+ voice: voice,
257
+ format: format,
258
+ chunkSize: 512, // Smaller chunks for more updates
259
+ onStart: (data) => {
260
+ currentRequestId = data.request_id;
261
+ console.log('Stream started:', data);
262
+ },
263
+ onProgress: (progress) => {
264
+ updateProgress(progress);
265
+ },
266
+ onChunk: (chunk) => {
267
+ handleAudioChunk(chunk);
268
+ },
269
+ onComplete: (result) => {
270
+ handleStreamComplete(result);
271
+ },
272
+ onError: (error) => {
273
+ showError(`Streaming error: ${error.message}`);
274
+ }
275
+ });
276
+
277
+ console.log('Streaming completed:', result);
278
+
279
+ } catch (error) {
280
+ showError(`Failed to generate speech: ${error.message}`);
281
+ resetUI();
282
+ }
283
+ }
284
+
285
+ function updateProgress(progress) {
286
+ const progressBar = document.getElementById('progress-bar');
287
+ const progressText = document.getElementById('progress-text');
288
+ const chunksReceived = document.getElementById('chunks-received');
289
+ const totalChunks = document.getElementById('total-chunks');
290
+ const generationTime = document.getElementById('generation-time');
291
+
292
+ progressBar.style.width = `${progress.progress}%`;
293
+ progressText.textContent = `${progress.progress}%`;
294
+ chunksReceived.textContent = progress.chunksCompleted;
295
+ totalChunks.textContent = progress.totalChunks;
296
+
297
+ if (startTime) {
298
+ const elapsed = (Date.now() - startTime) / 1000;
299
+ generationTime.textContent = `${elapsed.toFixed(1)}s`;
300
+ }
301
+ }
302
+
303
+ function handleAudioChunk(chunk) {
304
+ const container = document.getElementById('chunks-container');
305
+
306
+ // Create chunk visualization
307
+ const chunkEl = document.createElement('div');
308
+ chunkEl.className = 'col-auto';
309
+ chunkEl.innerHTML = `
310
+ <div class="badge bg-primary p-2" title="Chunk ${chunk.chunkIndex + 1}">
311
+ <i class="fas fa-music me-1"></i>
312
+ ${chunk.chunkIndex + 1}
313
+ <small class="d-block">${(chunk.audioData.byteLength / 1024).toFixed(1)}KB</small>
314
+ </div>
315
+ `;
316
+
317
+ container.appendChild(chunkEl);
318
+
319
+ // Update data transferred
320
+ const currentData = parseFloat(document.getElementById('data-transferred').textContent);
321
+ const newData = currentData + (chunk.audioData.byteLength / 1024);
322
+ document.getElementById('data-transferred').textContent = `${newData.toFixed(1)} KB`;
323
+ }
324
+
325
+ function handleStreamComplete(result) {
326
+ // Create blob from combined audio
327
+ const blob = new Blob([result.audioData], { type: `audio/${result.format}` });
328
+ const url = URL.createObjectURL(blob);
329
+
330
+ // Set up audio player
331
+ const audioPlayer = document.getElementById('audio-player');
332
+ audioPlayer.src = url;
333
+
334
+ // Show audio section
335
+ document.getElementById('audio-section').style.display = 'block';
336
+
337
+ // Set up download button
338
+ document.getElementById('download-btn').onclick = () => {
339
+ const a = document.createElement('a');
340
+ a.href = url;
341
+ a.download = `tts_stream_${Date.now()}.${result.format}`;
342
+ a.click();
343
+ };
344
+
345
+ // Update final stats
346
+ document.getElementById('generation-time').textContent = `${(result.generationTime / 1000).toFixed(2)}s`;
347
+
348
+ // Reset buttons
349
+ document.getElementById('stream-btn').disabled = false;
350
+ document.getElementById('cancel-btn').style.display = 'none';
351
+
352
+ // Update progress bar to success
353
+ const progressBar = document.getElementById('progress-bar');
354
+ progressBar.classList.remove('progress-bar-animated');
355
+ progressBar.classList.add('bg-success');
356
+ }
357
+
358
+ function handleCancel() {
359
+ if (currentRequestId) {
360
+ wsClient.cancelStream(currentRequestId);
361
+ showInfo('Stream cancelled');
362
+ resetUI();
363
+ }
364
+ }
365
+
366
+ function resetUI() {
367
+ document.getElementById('progress-section').style.display = 'none';
368
+ document.getElementById('chunks-section').style.display = 'none';
369
+ document.getElementById('audio-section').style.display = 'none';
370
+ document.getElementById('stream-btn').disabled = false;
371
+ document.getElementById('cancel-btn').style.display = 'none';
372
+ document.getElementById('chunks-container').innerHTML = '';
373
+ document.getElementById('progress-bar').style.width = '0%';
374
+ document.getElementById('progress-bar').className = 'progress-bar progress-bar-striped progress-bar-animated';
375
+ document.getElementById('data-transferred').textContent = '0 KB';
376
+ currentRequestId = null;
377
+ startTime = null;
378
+ }
379
+
380
+ function showError(message) {
381
+ console.error(message);
382
+ // You could add a toast notification here
383
+ }
384
+
385
+ function showInfo(message) {
386
+ console.info(message);
387
+ // You could add a toast notification here
388
+ }
389
+ </script>
390
+ {% endblock %}
ttsfm-web/translations/en.json ADDED
@@ -0,0 +1,224 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nav": {
3
+ "home": "Home",
4
+ "playground": "Playground",
5
+ "documentation": "Documentation",
6
+ "github": "GitHub",
7
+ "status_checking": "Checking...",
8
+ "status_online": "Online",
9
+ "status_offline": "Offline"
10
+ },
11
+ "common": {
12
+ "loading": "Loading...",
13
+ "error": "Error",
14
+ "success": "Success",
15
+ "warning": "Warning",
16
+ "info": "Info",
17
+ "close": "Close",
18
+ "save": "Save",
19
+ "cancel": "Cancel",
20
+ "confirm": "Confirm",
21
+ "download": "Download",
22
+ "upload": "Upload",
23
+ "generate": "Generate",
24
+ "play": "Play",
25
+ "stop": "Stop",
26
+ "pause": "Pause",
27
+ "resume": "Resume",
28
+ "clear": "Clear",
29
+ "reset": "Reset",
30
+ "copy": "Copy",
31
+ "copied": "Copied!",
32
+ "language": "Language",
33
+ "english": "English",
34
+ "chinese": "中文",
35
+ "validate": "Validate",
36
+ "options": "Options",
37
+ "max_length": "Max Length",
38
+ "tip": "Tip",
39
+ "choose_voice": "Choose from available voices",
40
+ "select_format": "Select your preferred audio format",
41
+ "loading_voices": "Loading voices...",
42
+ "loading_formats": "Loading formats...",
43
+ "ctrl_enter_tip": "Use Ctrl+Enter to generate",
44
+ "auto_combine_enabled": "Auto-combine enabled"
45
+ },
46
+ "home": {
47
+ "title": "Free Text-to-Speech for Python",
48
+ "subtitle": "Generate high-quality speech from text using the free openai.fm service. No API keys, no registration - just install and start creating audio.",
49
+ "try_demo": "Try Demo",
50
+ "documentation": "Documentation",
51
+ "github": "GitHub",
52
+ "features_title": "Key Features",
53
+ "features_subtitle": "Simple, free, and powerful text-to-speech for Python developers.",
54
+ "feature_free_title": "Completely Free",
55
+ "feature_free_desc": "No API keys or registration required. Uses the free openai.fm service.",
56
+ "feature_voices_title": "11 Voices",
57
+ "feature_voices_desc": "All OpenAI-compatible voices available for different use cases.",
58
+ "feature_formats_title": "6 Audio Formats",
59
+ "feature_formats_desc": "MP3, WAV, OPUS, AAC, FLAC, and PCM support for any application.",
60
+ "feature_docker_title": "Docker Ready",
61
+ "feature_docker_desc": "One-command deployment with web interface and API endpoints.",
62
+ "feature_openai_title": "OpenAI Compatible",
63
+ "feature_openai_desc": "Drop-in replacement for OpenAI's TTS API with auto-combine for long text.",
64
+ "feature_async_title": "Async & Sync",
65
+ "feature_async_desc": "Both asyncio and synchronous clients for maximum flexibility.",
66
+ "quick_start_title": "Quick Start",
67
+ "installation_title": "Installation",
68
+ "installation_code": "pip install ttsfm",
69
+ "usage_title": "Basic Usage",
70
+ "docker_title": "Docker Deployment",
71
+ "docker_desc": "Run TTSFM with web interface:",
72
+ "api_title": "OpenAI-Compatible API",
73
+ "api_desc": "Use with OpenAI Python client:",
74
+ "footer_copyright": "© 2024 dbcccc"
75
+ },
76
+ "playground": {
77
+ "title": "Interactive TTS Playground",
78
+ "subtitle": "Test different voices and audio formats in real-time",
79
+ "text_input_label": "Text to Convert",
80
+ "text_input_placeholder": "Enter the text you want to convert to speech...",
81
+ "voice_label": "Voice",
82
+ "format_label": "Audio Format",
83
+ "instructions_label": "Voice Instructions (Optional)",
84
+ "instructions_placeholder": "Additional instructions for voice generation...",
85
+ "character_count": "characters",
86
+ "max_length_warning": "Text exceeds maximum length. It will be automatically split and combined.",
87
+ "generate_speech": "Generate Speech",
88
+ "generating": "Generating...",
89
+ "download_audio": "Download Audio",
90
+ "audio_player_title": "Generated Audio",
91
+ "file_size": "File Size",
92
+ "duration": "Duration",
93
+ "format": "Format",
94
+ "voice": "Voice",
95
+ "chunks_combined": "Chunks Combined",
96
+ "random_text": "Random Text",
97
+ "clear_text": "Clear Text",
98
+ "max_length_description": "Maximum characters per request (default: 4096)",
99
+ "enable_length_validation": "Enable length validation",
100
+ "auto_combine_long_text": "Auto-combine long text",
101
+ "auto_combine_tooltip": "Automatically split long text and combine audio chunks into a single file",
102
+ "auto_combine_description": "Automatically handles text longer than the limit",
103
+ "instructions_description": "Provide optional instructions for voice modulation",
104
+ "api_key_optional": "API Key (Optional)",
105
+ "api_key_placeholder": "Enter your API key if required",
106
+ "api_key_description": "Only required if API key protection is enabled on the server",
107
+ "sample_texts": {
108
+ "welcome": "Welcome to TTSFM! This is a free text-to-speech service that converts your text into high-quality audio using advanced AI technology.",
109
+ "story": "Once upon a time, in a digital world far away, there lived a small Python package that could transform any text into beautiful speech. This package was called TTSFM, and it brought joy to developers everywhere.",
110
+ "technical": "TTSFM is a Python client for text-to-speech APIs that provides both synchronous and asynchronous interfaces. It supports multiple voices and audio formats, making it perfect for various applications.",
111
+ "multilingual": "TTSFM supports multiple languages and voices, allowing you to create diverse audio content for global audiences. The service is completely free and requires no API keys.",
112
+ "long": "This is a longer text sample designed to test the auto-combine feature of TTSFM. When text exceeds the maximum length limit, TTSFM automatically splits it into smaller chunks, generates audio for each chunk, and then seamlessly combines them into a single audio file. This process is completely transparent to the user and ensures that you can convert text of any length without worrying about technical limitations. The resulting audio maintains consistent quality and natural flow throughout the entire content."
113
+ },
114
+ "error_messages": {
115
+ "empty_text": "Please enter some text to convert.",
116
+ "generation_failed": "Failed to generate speech. Please try again.",
117
+ "network_error": "Network error. Please check your connection and try again.",
118
+ "invalid_format": "Invalid audio format selected.",
119
+ "invalid_voice": "Invalid voice selected.",
120
+ "text_too_long": "Text is too long. Please reduce the length or enable auto-combine.",
121
+ "server_error": "Server error. Please try again later."
122
+ },
123
+ "success_messages": {
124
+ "generation_complete": "Speech generated successfully!",
125
+ "text_copied": "Text copied to clipboard!",
126
+ "download_started": "Download started!"
127
+ }
128
+ },
129
+ "docs": {
130
+ "title": "API Documentation",
131
+ "subtitle": "Complete reference for the TTSFM Text-to-Speech API. Free, simple, and powerful.",
132
+ "contents": "Contents",
133
+ "overview": "Overview",
134
+ "authentication": "Authentication",
135
+ "text_validation": "Text Validation",
136
+ "endpoints": "API Endpoints",
137
+ "voices": "Voices",
138
+ "formats": "Audio Formats",
139
+ "generate": "Generate Speech",
140
+ "combined": "Combined Audio",
141
+ "status": "Status & Health",
142
+ "errors": "Error Handling",
143
+ "examples": "Code Examples",
144
+ "python_package": "Python Package",
145
+ "overview_title": "Overview",
146
+ "overview_desc": "The TTSFM API provides a modern, OpenAI-compatible interface for text-to-speech generation. It supports multiple voices, audio formats, and includes advanced features like text length validation and intelligent auto-combine functionality.",
147
+ "base_url": "Base URL:",
148
+ "key_features": "Key Features",
149
+ "feature_voices": "11 different voice options - Choose from alloy, echo, nova, and more",
150
+ "feature_formats": "Multiple audio formats - MP3, WAV, OPUS, AAC, FLAC, PCM support",
151
+ "feature_openai": "OpenAI compatibility - Drop-in replacement for OpenAI's TTS API",
152
+ "feature_auto_combine": "Auto-combine feature - Automatically handles long text (>4096 chars) by splitting and combining audio",
153
+ "feature_validation": "Text length validation - Smart validation with configurable limits",
154
+ "feature_monitoring": "Real-time monitoring - Status endpoints and health checks",
155
+ "new_version": "New in v3.2.3:",
156
+ "new_version_desc": "Enhanced `/v1/audio/speech` endpoint with intelligent auto-combine feature. Streamlined web interface with clean, user-friendly design and automatic long-text handling!",
157
+ "authentication_title": "Authentication",
158
+ "authentication_desc": "Currently, the API supports optional API key authentication. If configured, include your API key in the request headers.",
159
+ "text_validation_title": "Text Length Validation",
160
+ "text_validation_desc": "TTSFM includes built-in text length validation to ensure compatibility with TTS models. The default maximum length is 4096 characters, but this can be customized.",
161
+ "important": "Important:",
162
+ "text_validation_warning": "Text exceeding the maximum length will be rejected unless validation is disabled or the text is split into chunks.",
163
+ "validation_options": "Validation Options",
164
+ "max_length_option": "Maximum allowed characters (default: 4096)",
165
+ "validate_length_option": "Enable/disable validation (default: true)",
166
+ "preserve_words_option": "Avoid splitting words when chunking (default: true)",
167
+ "endpoints_title": "API Endpoints",
168
+ "get_voices_desc": "Get list of available voices.",
169
+ "get_formats_desc": "Get list of supported audio formats.",
170
+ "validate_text_desc": "Validate text length and get splitting suggestions.",
171
+ "generate_speech_desc": "Generate speech from text.",
172
+ "response_example": "Response Example:",
173
+ "request_body": "Request Body:",
174
+ "parameters": "Parameters:",
175
+ "text_param": "Text to convert to speech",
176
+ "voice_param": "Voice ID (default: \"alloy\")",
177
+ "format_param": "Audio format (default: \"mp3\")",
178
+ "instructions_param": "Voice modulation instructions",
179
+ "max_length_param": "Maximum text length (default: 4096)",
180
+ "validate_length_param": "Enable validation (default: true)",
181
+ "response": "Response:",
182
+ "response_audio": "Returns audio file with appropriate Content-Type header.",
183
+ "response_combined_audio": "Returns a single audio file containing all chunks combined seamlessly.",
184
+ "required": "required",
185
+ "optional": "optional",
186
+ "python_package_title": "Python Package",
187
+ "long_text_support": "Long Text Support",
188
+ "long_text_desc": "The TTSFM Python package includes built-in long text splitting functionality for developers who need fine-grained control:",
189
+ "developer_features": "Developer Features:",
190
+ "manual_splitting": "Manual Splitting: Full control over text chunking for advanced use cases",
191
+ "word_preservation": "Word Preservation: Maintains word boundaries for natural speech",
192
+ "separate_files": "Separate Files: Each chunk saved as individual audio file",
193
+ "cli_support": "CLI Support: Use `--split-long-text` flag for command-line usage",
194
+ "note": "Note:",
195
+ "auto_combine_note": "For web users, the auto-combine feature in `/v1/audio/speech` is recommended as it automatically handles long text and returns a single seamless audio file.",
196
+ "combined_audio_desc": "Generate a single combined audio file from long text. Automatically splits text into chunks, generates speech for each chunk, and combines them into one seamless audio file.",
197
+ "response_headers": "Response Headers:",
198
+ "chunks_combined_header": "Number of chunks that were combined",
199
+ "original_text_length_header": "Original text length in characters",
200
+ "audio_size_header": "Final audio file size in bytes",
201
+ "openai_compatible_desc": "Enhanced OpenAI-compatible endpoint with auto-combine feature. Automatically handles long text by splitting and combining audio chunks when needed.",
202
+ "enhanced_parameters": "Enhanced Parameters:",
203
+ "auto_combine_param": "Automatically split long text and combine audio chunks into a single file",
204
+ "auto_combine_false": "Return error if text exceeds max_length (standard OpenAI behavior)",
205
+ "max_length_chunk_param": "Maximum characters per chunk when splitting",
206
+ "auto_combine_header": "Whether auto-combine was enabled (true/false)",
207
+ "chunks_combined_response": "Number of audio chunks combined (1 for short text)",
208
+ "original_text_response": "Original text length (for long text processing)",
209
+ "audio_format_header": "Audio format of the response",
210
+ "audio_size_response": "Audio file size in bytes",
211
+ "short_text_comment": "Short text (works normally)",
212
+ "long_text_auto_comment": "Long text with auto-combine (default)",
213
+ "long_text_no_auto_comment": "Long text without auto-combine (will error)",
214
+ "audio_combination": "Audio Combination:",
215
+ "audio_combination_desc": "Uses advanced audio processing (PyDub) when available, with intelligent fallbacks for different environments. Supports all audio formats.",
216
+ "use_cases": "Use Cases:",
217
+ "use_case_articles": "Long Articles: Convert blog posts or articles to single audio files",
218
+ "use_case_audiobooks": "Audiobooks: Generate chapters as single audio files",
219
+ "use_case_podcasts": "Podcasts: Create podcast episodes from scripts",
220
+ "use_case_education": "Educational Content: Convert learning materials to audio",
221
+ "example_usage": "Example Usage:",
222
+ "python_example_comment": "Python example"
223
+ }
224
+ }
ttsfm-web/translations/zh.json ADDED
@@ -0,0 +1,224 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nav": {
3
+ "home": "首页",
4
+ "playground": "试用平台",
5
+ "documentation": "文档",
6
+ "github": "GitHub",
7
+ "status_checking": "检查中...",
8
+ "status_online": "在线",
9
+ "status_offline": "离线"
10
+ },
11
+ "common": {
12
+ "loading": "加载中...",
13
+ "error": "错误",
14
+ "success": "成功",
15
+ "warning": "警告",
16
+ "info": "信息",
17
+ "close": "关闭",
18
+ "save": "保存",
19
+ "cancel": "取消",
20
+ "confirm": "确认",
21
+ "download": "下载",
22
+ "upload": "上传",
23
+ "generate": "生成",
24
+ "play": "播放",
25
+ "stop": "停止",
26
+ "pause": "暂停",
27
+ "resume": "继续",
28
+ "clear": "清除",
29
+ "reset": "重置",
30
+ "copy": "复制",
31
+ "copied": "已复制!",
32
+ "language": "语言",
33
+ "english": "English",
34
+ "chinese": "中文",
35
+ "validate": "验证",
36
+ "options": "选项",
37
+ "max_length": "最大长度",
38
+ "tip": "提示",
39
+ "choose_voice": "从可用声音中选择",
40
+ "select_format": "选择您偏好的音频格式",
41
+ "loading_voices": "加载声音中...",
42
+ "loading_formats": "加载格式中...",
43
+ "ctrl_enter_tip": "使用 Ctrl+Enter 生成",
44
+ "auto_combine_enabled": "自动合并已启用"
45
+ },
46
+ "home": {
47
+ "title": "免费的Python文本转语音",
48
+ "subtitle": "使用免费的openai.fm服务从文本生成高质量语音。无需API密钥,无需注册 - 只需安装即可开始创建音频。",
49
+ "try_demo": "试用演示",
50
+ "documentation": "文档",
51
+ "github": "GitHub",
52
+ "features_title": "主要特性",
53
+ "features_subtitle": "简单、免费且强大的Python开发者文本转语音工具。",
54
+ "feature_free_title": "完全免费",
55
+ "feature_free_desc": "无需API密钥或注册。使用免费的openai.fm服务。",
56
+ "feature_voices_title": "11种声音",
57
+ "feature_voices_desc": "提供所有OpenAI兼容的声音,适用于不同使用场景。",
58
+ "feature_formats_title": "6种音频格式",
59
+ "feature_formats_desc": "支持MP3、WAV、OPUS、AAC、FLAC和PCM格式,适用于任何应用。",
60
+ "feature_docker_title": "Docker就绪",
61
+ "feature_docker_desc": "一键部署,包含Web界面和API端点。",
62
+ "feature_openai_title": "OpenAI兼容",
63
+ "feature_openai_desc": "OpenAI TTS API的直接替代品,支持长文本自动合并。",
64
+ "feature_async_title": "异步和同步",
65
+ "feature_async_desc": "提供asyncio和同步客户端,最大化灵活性。",
66
+ "quick_start_title": "快速开始",
67
+ "installation_title": "安装",
68
+ "installation_code": "pip install ttsfm",
69
+ "usage_title": "基本用法",
70
+ "docker_title": "Docker部署",
71
+ "docker_desc": "运行带有Web界面的TTSFM:",
72
+ "api_title": "OpenAI兼容API",
73
+ "api_desc": "与OpenAI Python客户端一起使用:",
74
+ "footer_copyright": "© 2024 dbcccc"
75
+ },
76
+ "playground": {
77
+ "title": "交互式TTS试用平台",
78
+ "subtitle": "实时测试不同的声音和音频格式",
79
+ "text_input_label": "要转换的文本",
80
+ "text_input_placeholder": "输入您想要转换为语音的文本...",
81
+ "voice_label": "声音",
82
+ "format_label": "音频格式",
83
+ "instructions_label": "声音指令(可选)",
84
+ "instructions_placeholder": "语音生成的额外指令...",
85
+ "character_count": "字符",
86
+ "max_length_warning": "文本超过最大长度。将自动分割并合并。",
87
+ "generate_speech": "生成语音",
88
+ "generating": "生成中...",
89
+ "download_audio": "下载音频",
90
+ "audio_player_title": "生成的音频",
91
+ "file_size": "文件大小",
92
+ "duration": "时长",
93
+ "format": "格式",
94
+ "voice": "声音",
95
+ "chunks_combined": "合并片段",
96
+ "random_text": "随机文本",
97
+ "clear_text": "清除文本",
98
+ "max_length_description": "每个请求的最大字符数(默认:4096)",
99
+ "enable_length_validation": "启用长度验证",
100
+ "auto_combine_long_text": "自动合并长文本",
101
+ "auto_combine_tooltip": "自动分割长文本并将音频片段合并为单个文件",
102
+ "auto_combine_description": "自动处理超过限制的文本",
103
+ "instructions_description": "为声音调制提供可选指令",
104
+ "api_key_optional": "API密钥(可选)",
105
+ "api_key_placeholder": "如果需要,请输入您的API密钥",
106
+ "api_key_description": "仅在服务器启用API密钥保护时需要",
107
+ "sample_texts": {
108
+ "welcome": "欢迎使用TTSFM!这是一个免费的文本转语音服务,使用先进的AI技术将您的文本转换为高质量音频。",
109
+ "story": "很久很久以前,在一个遥远的数字世界里,住着一个小小的Python包,它能够将任何文本转换成美妙的语音。这个包叫做TTSFM,它为世界各地的开发者带来了快乐。",
110
+ "technical": "TTSFM是一个用于文本转语音API的Python客户端,提供同步和异步接口。它支持多种声音和音频格式,非常适合各种应用。",
111
+ "multilingual": "TTSFM支持多种语言和声音,让您能够为全球受众创建多样化的音频内容。该服务完全免费,无需API密钥。",
112
+ "long": "这是一个较长的文本示例,用于测试TTSFM的自动合并功能。当文本超过最大长度限制时,TTSFM会自动将其分割成较小的片段,为每个片段生成音频,然后无缝地将它们合并成一个音频文件。这个过程对用户完全透明,确保您可以转换任何长度的文本,而无需担心技术限制。生成的音频在整个内容中保持一致的质量和自然的流畅性。"
113
+ },
114
+ "error_messages": {
115
+ "empty_text": "请输入要转换的文本。",
116
+ "generation_failed": "语音生成失败。请重试。",
117
+ "network_error": "网络错误。请检查您的连接并重试。",
118
+ "invalid_format": "选择的音频格式无效。",
119
+ "invalid_voice": "选择的声音无效。",
120
+ "text_too_long": "文本太长。请减少长度或启用自动合并。",
121
+ "server_error": "服务器错误。请稍后重试。"
122
+ },
123
+ "success_messages": {
124
+ "generation_complete": "语音生成成功!",
125
+ "text_copied": "文本已复制到剪贴板!",
126
+ "download_started": "下载已开始!"
127
+ }
128
+ },
129
+ "docs": {
130
+ "title": "API文档",
131
+ "subtitle": "TTSFM文本转语音API的完整参考。免费、简单且强大。",
132
+ "contents": "目录",
133
+ "overview": "概述",
134
+ "authentication": "身份验证",
135
+ "text_validation": "文本验证",
136
+ "endpoints": "API端点",
137
+ "voices": "声音",
138
+ "formats": "音频格式",
139
+ "generate": "生成语音",
140
+ "combined": "合并音频",
141
+ "status": "状态和健康检查",
142
+ "errors": "错误处理",
143
+ "examples": "代码示例",
144
+ "python_package": "Python包",
145
+ "overview_title": "概述",
146
+ "overview_desc": "TTSFM API提供现代的、OpenAI兼容的文本转语音生成接口。它支持多种声音、音频格式,并包含高级功能,如文本长度验证和智能自动合并功能。",
147
+ "base_url": "基础URL:",
148
+ "key_features": "主要特性",
149
+ "feature_voices": "11种不同的声音选项 - 从alloy、echo、nova等中选择",
150
+ "feature_formats": "多种音频格式 - 支持MP3、WAV、OPUS、AAC、FLAC、PCM",
151
+ "feature_openai": "OpenAI兼容性 - OpenAI TTS API的直接替代品",
152
+ "feature_auto_combine": "自动合并功能 - 自动处理长文本(>4096字符),通过分割和合并音频",
153
+ "feature_validation": "文本长度验证 - 智能验证,可配置限制",
154
+ "feature_monitoring": "实时监控 - 状态端点和健康检查",
155
+ "new_version": "v3.2.3新功能:",
156
+ "new_version_desc": "增强的`/v1/audio/speech`端点,具有智能自动合并功能。简化的Web界面,设计简洁、用户友好,自动处理长文本!",
157
+ "authentication_title": "身份验证",
158
+ "authentication_desc": "目前,API支持可选的API密钥身份验证。如果已配置,请在请求头中包含您的API密钥。",
159
+ "text_validation_title": "文本长度验证",
160
+ "text_validation_desc": "TTSFM包含内置的文本长度验证,以确保与TTS模型的兼容性。默认最大长度为4096个字符,但可以自定义。",
161
+ "important": "重要:",
162
+ "text_validation_warning": "超过最大长度的文本将被拒绝,除非禁用验证或将文本分割成块。",
163
+ "validation_options": "验证选项",
164
+ "max_length_option": "允许的最大字符数(默认:4096)",
165
+ "validate_length_option": "启用/禁用验证(默认:true)",
166
+ "preserve_words_option": "分块时避免分割单词(默认:true)",
167
+ "endpoints_title": "API端点",
168
+ "get_voices_desc": "获取可用声音列表。",
169
+ "get_formats_desc": "获取支持的音频格式列表。",
170
+ "validate_text_desc": "验证文本长度并获取分割建议。",
171
+ "generate_speech_desc": "从文本生成语音。",
172
+ "response_example": "响应示例:",
173
+ "request_body": "请求体:",
174
+ "parameters": "参数:",
175
+ "text_param": "要转换为语音的文本",
176
+ "voice_param": "声音ID(默认:\"alloy\")",
177
+ "format_param": "音频格式(默认:\"mp3\")",
178
+ "instructions_param": "声音调制指令",
179
+ "max_length_param": "最大文本长度(默认:4096)",
180
+ "validate_length_param": "启用验证(默认:true)",
181
+ "response": "响应:",
182
+ "response_audio": "返回带有适当Content-Type头的音频文件。",
183
+ "response_combined_audio": "返回包含所有块无缝合并的单个音频文件。",
184
+ "required": "必需",
185
+ "optional": "可选",
186
+ "python_package_title": "Python包",
187
+ "long_text_support": "长文本支持",
188
+ "long_text_desc": "TTSFM Python包包含内置的长文本分割功能,为需要精细控制的开发者提供支持:",
189
+ "developer_features": "开发者功能:",
190
+ "manual_splitting": "手动分割:对高级用例的文本分块进行完全控制",
191
+ "word_preservation": "单词保护:维护单词边界以获得自然语音",
192
+ "separate_files": "单独文件:每个块保存为单独的音频文件",
193
+ "cli_support": "CLI支持:使用`--split-long-text`标志进行命令行使用",
194
+ "note": "注意:",
195
+ "auto_combine_note": "对于Web用户,建议使用`/v1/audio/speech`中的自动合并功能,因为它会自动处理长文本并返回单个无缝音频文件。",
196
+ "combined_audio_desc": "从长文本生成单个合并的音频文件。自动将文本分割成块,为每个块生成语音,并将它们合并成一个无缝的音频文件。",
197
+ "response_headers": "响应头:",
198
+ "chunks_combined_header": "合并的块数",
199
+ "original_text_length_header": "原始文本长度(字符数)",
200
+ "audio_size_header": "最终音频文件大小(字节)",
201
+ "openai_compatible_desc": "增强的OpenAI兼容端点,具有自动合并功能。在需要时自动处理长文本,通过分割和合并音频块。",
202
+ "enhanced_parameters": "增强参数:",
203
+ "auto_combine_param": "自动分割长文本并将音频块合并为单个文件",
204
+ "auto_combine_false": "如果文本超过max_length则返回错误(标准OpenAI行为)",
205
+ "max_length_chunk_param": "分割时每个块的最大字符数",
206
+ "auto_combine_header": "是否启用了自动合并(true/false)",
207
+ "chunks_combined_response": "合并的音频块数(短文本为1)",
208
+ "original_text_response": "原始文本长度(用于长文本处理)",
209
+ "audio_format_header": "响应的音频格式",
210
+ "audio_size_response": "音频文件大小(字节)",
211
+ "short_text_comment": "短文本(正常工作)",
212
+ "long_text_auto_comment": "带自动合并的长文本(默认)",
213
+ "long_text_no_auto_comment": "不带自动合并的长文本(将出错)",
214
+ "audio_combination": "音频合并:",
215
+ "audio_combination_desc": "在可用时使用高级音频处理(PyDub),在不同环境中具有智能回退。支持所有音频格式。",
216
+ "use_cases": "使用场景:",
217
+ "use_case_articles": "长文章:将博客文章或文章转换为单个音频文件",
218
+ "use_case_audiobooks": "有声书:将章节生成为单个音频文件",
219
+ "use_case_podcasts": "播客:从脚本创建播客剧集",
220
+ "use_case_education": "教育内容:将学习材料转换为音频",
221
+ "example_usage": "使用示例:",
222
+ "python_example_comment": "Python示例"
223
+ }
224
+ }
ttsfm-web/websocket_handler.py ADDED
@@ -0,0 +1,231 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ WebSocket handler for real-time TTS streaming.
3
+
4
+ Because apparently waiting 2 seconds for audio generation is too much for modern users.
5
+ At least this will make it FEEL faster.
6
+ """
7
+
8
+ import asyncio
9
+ import json
10
+ import logging
11
+ import uuid
12
+ import time
13
+ from typing import Optional, Dict, Any
14
+ from datetime import datetime
15
+
16
+ from flask_socketio import SocketIO, emit, disconnect
17
+ from flask import request
18
+
19
+ from ttsfm import TTSClient, Voice, AudioFormat, TTSException
20
+ from ttsfm.utils import split_text_by_length, estimate_audio_duration
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
+ class WebSocketTTSHandler:
26
+ """
27
+ Handles WebSocket connections for streaming TTS generation.
28
+
29
+ Because your users can't wait 2 seconds for a complete response.
30
+ """
31
+
32
+ def __init__(self, socketio: SocketIO, tts_client: TTSClient):
33
+ self.socketio = socketio
34
+ self.tts_client = tts_client
35
+ self.active_sessions: Dict[str, Dict[str, Any]] = {}
36
+
37
+ # Register WebSocket events
38
+ self._register_events()
39
+
40
+ def _register_events(self):
41
+ """Register all WebSocket event handlers."""
42
+
43
+ @self.socketio.on('connect')
44
+ def handle_connect():
45
+ """Handle new WebSocket connection."""
46
+ session_id = request.sid
47
+ self.active_sessions[session_id] = {
48
+ 'connected_at': datetime.now(),
49
+ 'request_count': 0,
50
+ 'last_request': None
51
+ }
52
+ logger.info(f"WebSocket client connected: {session_id}")
53
+ emit('connected', {'session_id': session_id, 'status': 'ready'})
54
+
55
+ @self.socketio.on('disconnect')
56
+ def handle_disconnect():
57
+ """Handle WebSocket disconnection."""
58
+ session_id = request.sid
59
+ if session_id in self.active_sessions:
60
+ del self.active_sessions[session_id]
61
+ logger.info(f"WebSocket client disconnected: {session_id}")
62
+
63
+ @self.socketio.on('generate_stream')
64
+ def handle_generate_stream(data):
65
+ """
66
+ Handle streaming TTS generation request.
67
+
68
+ Expected data format:
69
+ {
70
+ 'text': str,
71
+ 'voice': str,
72
+ 'format': str,
73
+ 'chunk_size': int (optional, default 1024 chars),
74
+ 'instructions': str (optional, voice modulation instructions)
75
+ }
76
+ """
77
+ session_id = request.sid
78
+ request_id = data.get('request_id', str(uuid.uuid4()))
79
+
80
+ # Update session info
81
+ if session_id in self.active_sessions:
82
+ self.active_sessions[session_id]['request_count'] += 1
83
+ self.active_sessions[session_id]['last_request'] = datetime.now()
84
+
85
+ # Emit acknowledgment
86
+ emit('stream_started', {
87
+ 'request_id': request_id,
88
+ 'timestamp': time.time()
89
+ })
90
+
91
+ # Start async generation
92
+ self.socketio.start_background_task(
93
+ self._generate_stream,
94
+ session_id,
95
+ request_id,
96
+ data
97
+ )
98
+
99
+ @self.socketio.on('cancel_stream')
100
+ def handle_cancel_stream(data):
101
+ """Handle stream cancellation request."""
102
+ request_id = data.get('request_id')
103
+ session_id = request.sid
104
+
105
+ # In a real implementation, you'd track and cancel the actual generation
106
+ logger.info(f"Stream cancellation requested: {request_id}")
107
+ emit('stream_cancelled', {'request_id': request_id})
108
+
109
+ def _generate_stream(self, session_id: str, request_id: str, data: Dict[str, Any]):
110
+ """
111
+ Generate TTS audio in chunks and stream to client.
112
+
113
+ This is where the magic happens. And by magic, I mean
114
+ chunking text and pretending it's real-time.
115
+ """
116
+ try:
117
+ # Extract parameters
118
+ text = data.get('text', '')
119
+ voice = data.get('voice', 'alloy')
120
+ format_str = data.get('format', 'mp3')
121
+ chunk_size = data.get('chunk_size', 1024)
122
+ instructions = data.get('instructions', None) # Voice instructions support!
123
+
124
+ if not text:
125
+ self._emit_error(session_id, request_id, "No text provided")
126
+ return
127
+
128
+ # Convert string parameters to enums
129
+ try:
130
+ voice_enum = Voice(voice.lower())
131
+ format_enum = AudioFormat(format_str.lower())
132
+ except ValueError as e:
133
+ self._emit_error(session_id, request_id, f"Invalid parameter: {str(e)}")
134
+ return
135
+
136
+ # Split text into chunks for "streaming" effect
137
+ chunks = split_text_by_length(text, chunk_size, preserve_words=True)
138
+ total_chunks = len(chunks)
139
+
140
+ logger.info(f"Starting stream generation: {request_id} with {total_chunks} chunks")
141
+
142
+ # Emit initial progress
143
+ self.socketio.emit('stream_progress', {
144
+ 'request_id': request_id,
145
+ 'progress': 0,
146
+ 'total_chunks': total_chunks,
147
+ 'status': 'processing'
148
+ }, room=session_id)
149
+
150
+ # Process each chunk
151
+ for i, chunk in enumerate(chunks):
152
+ # Check if client is still connected
153
+ if session_id not in self.active_sessions:
154
+ logger.warning(f"Client disconnected during generation: {session_id}")
155
+ break
156
+
157
+ try:
158
+ # Generate audio for chunk
159
+ start_time = time.time()
160
+ response = self.tts_client.generate_speech(
161
+ text=chunk,
162
+ voice=voice_enum,
163
+ response_format=format_enum,
164
+ instructions=instructions, # Pass voice instructions!
165
+ validate_length=False # We already chunked it
166
+ )
167
+ generation_time = time.time() - start_time
168
+
169
+ # Emit chunk data
170
+ chunk_data = {
171
+ 'request_id': request_id,
172
+ 'chunk_index': i,
173
+ 'total_chunks': total_chunks,
174
+ 'audio_data': response.audio_data.hex(), # Convert bytes to hex string
175
+ 'format': format_enum.value,
176
+ 'duration': response.duration,
177
+ 'generation_time': generation_time,
178
+ 'chunk_text': chunk[:50] + '...' if len(chunk) > 50 else chunk
179
+ }
180
+
181
+ self.socketio.emit('audio_chunk', chunk_data, room=session_id)
182
+
183
+ # Emit progress update
184
+ progress = int(((i + 1) / total_chunks) * 100)
185
+ self.socketio.emit('stream_progress', {
186
+ 'request_id': request_id,
187
+ 'progress': progress,
188
+ 'total_chunks': total_chunks,
189
+ 'chunks_completed': i + 1,
190
+ 'status': 'processing'
191
+ }, room=session_id)
192
+
193
+ # Small delay to prevent overwhelming the client
194
+ # (and to make it feel more "real-time")
195
+ self.socketio.sleep(0.1)
196
+
197
+ except Exception as e:
198
+ logger.error(f"Error generating chunk {i}: {str(e)}")
199
+ self._emit_error(session_id, request_id, f"Chunk {i} generation failed: {str(e)}")
200
+ # Continue with next chunk instead of failing completely
201
+ continue
202
+
203
+ # Emit completion
204
+ self.socketio.emit('stream_complete', {
205
+ 'request_id': request_id,
206
+ 'total_chunks': total_chunks,
207
+ 'status': 'completed',
208
+ 'timestamp': time.time()
209
+ }, room=session_id)
210
+
211
+ logger.info(f"Stream generation completed: {request_id}")
212
+
213
+ except Exception as e:
214
+ logger.error(f"Stream generation failed: {str(e)}")
215
+ self._emit_error(session_id, request_id, str(e))
216
+
217
+ def _emit_error(self, session_id: str, request_id: str, error_message: str):
218
+ """Emit error to specific session."""
219
+ self.socketio.emit('stream_error', {
220
+ 'request_id': request_id,
221
+ 'error': error_message,
222
+ 'timestamp': time.time()
223
+ }, room=session_id)
224
+
225
+ def get_active_sessions_count(self) -> int:
226
+ """Get count of active WebSocket sessions."""
227
+ return len(self.active_sessions)
228
+
229
+ def get_session_info(self, session_id: str) -> Optional[Dict[str, Any]]:
230
+ """Get information about a specific session."""
231
+ return self.active_sessions.get(session_id)
ttsfm/__init__.py CHANGED
@@ -1,183 +1,193 @@
1
- """
2
- TTSFM - Text-to-Speech for Free using OpenAI.fm
3
-
4
- A Python library for generating high-quality text-to-speech audio using the free OpenAI.fm service.
5
- Supports multiple voices and audio formats with a simple, intuitive API.
6
-
7
- Features:
8
- - 🎤 6 premium AI voices (alloy, echo, fable, nova, onyx, shimmer)
9
- - 🎵 6 audio formats (MP3, WAV, OPUS, AAC, FLAC, PCM)
10
- - 🚀 Fast and reliable speech generation
11
- - 📝 Comprehensive text processing and validation
12
- - 🔄 Automatic retry with exponential backoff
13
- - 📊 Detailed response metadata and statistics
14
- - 🌐 Both synchronous and asynchronous APIs
15
- - 🎯 OpenAI-compatible API format
16
- - 🔧 Smart format optimization for best quality
17
-
18
- Audio Format Support:
19
- - MP3: Good quality, small file size - ideal for web and general use
20
- - WAV: Lossless quality, large file size - ideal for professional use
21
- - OPUS: High-quality compressed audio - ideal for streaming
22
- - AAC: Advanced audio codec - ideal for mobile devices
23
- - FLAC: Lossless compression - ideal for archival
24
- - PCM: Raw audio data - ideal for processing
25
-
26
- Example:
27
- >>> from ttsfm import TTSClient, Voice, AudioFormat
28
- >>>
29
- >>> client = TTSClient()
30
- >>>
31
- >>> # Generate MP3 audio
32
- >>> mp3_response = client.generate_speech(
33
- ... text="Hello, world!",
34
- ... voice=Voice.ALLOY,
35
- ... response_format=AudioFormat.MP3
36
- ... )
37
- >>> mp3_response.save_to_file("hello") # Saves as hello.mp3
38
- >>>
39
- >>> # Generate WAV audio
40
- >>> wav_response = client.generate_speech(
41
- ... text="High quality audio",
42
- ... voice=Voice.NOVA,
43
- ... response_format=AudioFormat.WAV
44
- ... )
45
- >>> wav_response.save_to_file("audio") # Saves as audio.wav
46
- >>>
47
- >>> # Generate OPUS audio
48
- >>> opus_response = client.generate_speech(
49
- ... text="Compressed audio",
50
- ... voice=Voice.ECHO,
51
- ... response_format=AudioFormat.OPUS
52
- ... )
53
- >>> opus_response.save_to_file("compressed") # Saves as compressed.wav
54
- """
55
-
56
- from .client import TTSClient
57
- from .async_client import AsyncTTSClient
58
- from .models import (
59
- TTSRequest,
60
- TTSResponse,
61
- Voice,
62
- AudioFormat,
63
- TTSError,
64
- APIError,
65
- NetworkError,
66
- ValidationError
67
- )
68
- from .exceptions import (
69
- TTSException,
70
- APIException,
71
- NetworkException,
72
- ValidationException,
73
- RateLimitException,
74
- AuthenticationException
75
- )
76
- from .utils import (
77
- validate_text_length,
78
- split_text_by_length
79
- )
80
-
81
- __version__ = "3.0.0"
82
- __author__ = "dbcccc"
83
- __email__ = "[email protected]"
84
- __description__ = "Text-to-Speech API Client with OpenAI compatibility"
85
- __url__ = "https://github.com/dbccccccc/ttsfm"
86
-
87
- # Default client instance for convenience
88
- default_client = None
89
-
90
- def create_client(base_url: str = None, api_key: str = None, **kwargs) -> TTSClient:
91
- """
92
- Create a new TTS client instance.
93
-
94
- Args:
95
- base_url: Base URL for the TTS service
96
- api_key: API key for authentication (if required)
97
- **kwargs: Additional client configuration
98
-
99
- Returns:
100
- TTSClient: Configured client instance
101
- """
102
- return TTSClient(base_url=base_url, api_key=api_key, **kwargs)
103
-
104
- def create_async_client(base_url: str = None, api_key: str = None, **kwargs) -> AsyncTTSClient:
105
- """
106
- Create a new async TTS client instance.
107
-
108
- Args:
109
- base_url: Base URL for the TTS service
110
- api_key: API key for authentication (if required)
111
- **kwargs: Additional client configuration
112
-
113
- Returns:
114
- AsyncTTSClient: Configured async client instance
115
- """
116
- return AsyncTTSClient(base_url=base_url, api_key=api_key, **kwargs)
117
-
118
- def set_default_client(client: TTSClient) -> None:
119
- """Set the default client instance for convenience functions."""
120
- global default_client
121
- default_client = client
122
-
123
- def generate_speech(text: str, voice: str = "alloy", **kwargs) -> bytes:
124
- """
125
- Convenience function to generate speech using the default client.
126
-
127
- Args:
128
- text: Text to convert to speech
129
- voice: Voice to use for generation
130
- **kwargs: Additional generation parameters
131
-
132
- Returns:
133
- bytes: Generated audio data
134
-
135
- Raises:
136
- TTSException: If no default client is set or generation fails
137
- """
138
- if default_client is None:
139
- raise TTSException("No default client set. Use create_client() first.")
140
-
141
- return default_client.generate_speech(text=text, voice=voice, **kwargs)
142
-
143
- # Export all public components
144
- __all__ = [
145
- # Main classes
146
- "TTSClient",
147
- "AsyncTTSClient",
148
-
149
- # Models
150
- "TTSRequest",
151
- "TTSResponse",
152
- "Voice",
153
- "AudioFormat",
154
- "TTSError",
155
- "APIError",
156
- "NetworkError",
157
- "ValidationError",
158
-
159
- # Exceptions
160
- "TTSException",
161
- "APIException",
162
- "NetworkException",
163
- "ValidationException",
164
- "RateLimitException",
165
- "AuthenticationException",
166
-
167
- # Factory functions
168
- "create_client",
169
- "create_async_client",
170
- "set_default_client",
171
- "generate_speech",
172
-
173
- # Utility functions
174
- "validate_text_length",
175
- "split_text_by_length",
176
-
177
- # Package metadata
178
- "__version__",
179
- "__author__",
180
- "__email__",
181
- "__description__",
182
- "__url__"
183
- ]
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ TTSFM - Text-to-Speech for Free using OpenAI.fm
3
+
4
+ A Python library for generating high-quality text-to-speech audio using the free OpenAI.fm service.
5
+ Supports multiple voices and audio formats with a simple, intuitive API.
6
+
7
+ Example:
8
+ >>> from ttsfm import TTSClient, Voice, AudioFormat
9
+ >>>
10
+ >>> client = TTSClient()
11
+ >>>
12
+ >>> # Generate MP3 audio
13
+ >>> mp3_response = client.generate_speech(
14
+ ... text="Hello, world!",
15
+ ... voice=Voice.ALLOY,
16
+ ... response_format=AudioFormat.MP3
17
+ ... )
18
+ >>> mp3_response.save_to_file("hello") # Saves as hello.mp3
19
+ >>>
20
+ >>> # Generate WAV audio
21
+ >>> wav_response = client.generate_speech(
22
+ ... text="High quality audio",
23
+ ... voice=Voice.NOVA,
24
+ ... response_format=AudioFormat.WAV
25
+ ... )
26
+ >>> wav_response.save_to_file("audio") # Saves as audio.wav
27
+ >>>
28
+ >>> # Generate OPUS audio
29
+ >>> opus_response = client.generate_speech(
30
+ ... text="Compressed audio",
31
+ ... voice=Voice.ECHO,
32
+ ... response_format=AudioFormat.OPUS
33
+ ... )
34
+ >>> opus_response.save_to_file("compressed") # Saves as compressed.wav
35
+ """
36
+
37
+ from .client import TTSClient
38
+ from .async_client import AsyncTTSClient
39
+ from .models import (
40
+ TTSRequest,
41
+ TTSResponse,
42
+ Voice,
43
+ AudioFormat,
44
+ TTSError,
45
+ APIError,
46
+ NetworkError,
47
+ ValidationError
48
+ )
49
+ from .exceptions import (
50
+ TTSException,
51
+ APIException,
52
+ NetworkException,
53
+ ValidationException,
54
+ RateLimitException,
55
+ AuthenticationException,
56
+ ServiceUnavailableException,
57
+ QuotaExceededException,
58
+ AudioProcessingException
59
+ )
60
+ from .utils import (
61
+ validate_text_length,
62
+ split_text_by_length
63
+ )
64
+
65
+ __version__ = "3.2.3"
66
+ __author__ = "dbcccc"
67
+ __email__ = "[email protected]"
68
+ __description__ = "Text-to-Speech API Client with OpenAI compatibility"
69
+ __url__ = "https://github.com/dbccccccc/ttsfm"
70
+
71
+ # Default client instance for convenience
72
+ default_client = None
73
+
74
+ def create_client(base_url: str = None, api_key: str = None, **kwargs) -> TTSClient:
75
+ """
76
+ Create a new TTS client instance.
77
+
78
+ Args:
79
+ base_url: Base URL for the TTS service
80
+ api_key: API key for authentication (if required)
81
+ **kwargs: Additional client configuration
82
+
83
+ Returns:
84
+ TTSClient: Configured client instance
85
+ """
86
+ return TTSClient(base_url=base_url, api_key=api_key, **kwargs)
87
+
88
+ def create_async_client(base_url: str = None, api_key: str = None, **kwargs) -> AsyncTTSClient:
89
+ """
90
+ Create a new async TTS client instance.
91
+
92
+ Args:
93
+ base_url: Base URL for the TTS service
94
+ api_key: API key for authentication (if required)
95
+ **kwargs: Additional client configuration
96
+
97
+ Returns:
98
+ AsyncTTSClient: Configured async client instance
99
+ """
100
+ return AsyncTTSClient(base_url=base_url, api_key=api_key, **kwargs)
101
+
102
+ def set_default_client(client: TTSClient) -> None:
103
+ """Set the default client instance for convenience functions."""
104
+ global default_client
105
+ default_client = client
106
+
107
+ def generate_speech(text: str, voice: str = "alloy", **kwargs) -> bytes:
108
+ """
109
+ Convenience function to generate speech using the default client.
110
+
111
+ Args:
112
+ text: Text to convert to speech
113
+ voice: Voice to use for generation
114
+ **kwargs: Additional generation parameters
115
+
116
+ Returns:
117
+ bytes: Generated audio data
118
+
119
+ Raises:
120
+ TTSException: If no default client is set or generation fails
121
+ """
122
+ if default_client is None:
123
+ raise TTSException("No default client set. Use create_client() first.")
124
+
125
+ return default_client.generate_speech(text=text, voice=voice, **kwargs)
126
+
127
+ def generate_speech_long_text(text: str, voice: str = "alloy", **kwargs) -> list:
128
+ """
129
+ Convenience function to generate speech from long text using the default client.
130
+
131
+ Automatically splits long text into chunks and generates speech for each chunk.
132
+
133
+ Args:
134
+ text: Text to convert to speech (can be longer than 4096 characters)
135
+ voice: Voice to use for generation
136
+ **kwargs: Additional generation parameters (max_length, preserve_words, etc.)
137
+
138
+ Returns:
139
+ list: List of TTSResponse objects for each chunk
140
+
141
+ Raises:
142
+ TTSException: If no default client is set or generation fails
143
+ """
144
+ if default_client is None:
145
+ raise TTSException("No default client set. Use create_client() first.")
146
+
147
+ return default_client.generate_speech_long_text(text=text, voice=voice, **kwargs)
148
+
149
+ # Export all public components
150
+ __all__ = [
151
+ # Main classes
152
+ "TTSClient",
153
+ "AsyncTTSClient",
154
+
155
+ # Models
156
+ "TTSRequest",
157
+ "TTSResponse",
158
+ "Voice",
159
+ "AudioFormat",
160
+ "TTSError",
161
+ "APIError",
162
+ "NetworkError",
163
+ "ValidationError",
164
+
165
+ # Exceptions
166
+ "TTSException",
167
+ "APIException",
168
+ "NetworkException",
169
+ "ValidationException",
170
+ "RateLimitException",
171
+ "AuthenticationException",
172
+ "ServiceUnavailableException",
173
+ "QuotaExceededException",
174
+ "AudioProcessingException",
175
+
176
+ # Factory functions
177
+ "create_client",
178
+ "create_async_client",
179
+ "set_default_client",
180
+ "generate_speech",
181
+ "generate_speech_long_text",
182
+
183
+ # Utility functions
184
+ "validate_text_length",
185
+ "split_text_by_length",
186
+
187
+ # Package metadata
188
+ "__version__",
189
+ "__author__",
190
+ "__email__",
191
+ "__description__",
192
+ "__url__"
193
+ ]
ttsfm/async_client.py CHANGED
@@ -1,464 +1,504 @@
1
- """
2
- Asynchronous TTS client implementation.
3
-
4
- This module provides the AsyncTTSClient class for asynchronous
5
- text-to-speech generation with OpenAI-compatible API.
6
- """
7
-
8
- import json
9
- import uuid
10
- import asyncio
11
- import logging
12
- from typing import Optional, Dict, Any, Union, List
13
-
14
- import aiohttp
15
- from aiohttp import ClientTimeout, ClientSession
16
-
17
- from .models import (
18
- TTSRequest, TTSResponse, Voice, AudioFormat,
19
- get_content_type, get_format_from_content_type
20
- )
21
- from .exceptions import (
22
- TTSException, APIException, NetworkException, ValidationException,
23
- create_exception_from_response
24
- )
25
- from .utils import (
26
- get_realistic_headers, sanitize_text, validate_url, build_url,
27
- exponential_backoff, estimate_audio_duration, format_file_size,
28
- validate_text_length, split_text_by_length
29
- )
30
-
31
-
32
- logger = logging.getLogger(__name__)
33
-
34
-
35
- class AsyncTTSClient:
36
- """
37
- Asynchronous TTS client for text-to-speech generation.
38
-
39
- This client provides an async interface for generating speech from text
40
- using OpenAI-compatible TTS services with support for concurrent requests.
41
-
42
- Attributes:
43
- base_url: Base URL for the TTS service
44
- api_key: API key for authentication (if required)
45
- timeout: Request timeout in seconds
46
- max_retries: Maximum number of retry attempts
47
- verify_ssl: Whether to verify SSL certificates
48
- max_concurrent: Maximum concurrent requests
49
- """
50
-
51
- def __init__(
52
- self,
53
- base_url: str = "https://www.openai.fm",
54
- api_key: Optional[str] = None,
55
- timeout: float = 30.0,
56
- max_retries: int = 3,
57
- verify_ssl: bool = True,
58
- max_concurrent: int = 10,
59
- **kwargs
60
- ):
61
- """
62
- Initialize the async TTS client.
63
-
64
- Args:
65
- base_url: Base URL for the TTS service
66
- api_key: API key for authentication
67
- timeout: Request timeout in seconds
68
- max_retries: Maximum retry attempts
69
- verify_ssl: Whether to verify SSL certificates
70
- max_concurrent: Maximum concurrent requests
71
- **kwargs: Additional configuration options
72
- """
73
- self.base_url = base_url.rstrip('/')
74
- self.api_key = api_key
75
- self.timeout = timeout
76
- self.max_retries = max_retries
77
- self.verify_ssl = verify_ssl
78
- self.max_concurrent = max_concurrent
79
-
80
- # Validate base URL
81
- if not validate_url(self.base_url):
82
- raise ValidationException(f"Invalid base URL: {self.base_url}")
83
-
84
- # Session will be created when needed
85
- self._session: Optional[ClientSession] = None
86
- self._semaphore = asyncio.Semaphore(max_concurrent)
87
-
88
- logger.info(f"Initialized async TTS client with base URL: {self.base_url}")
89
-
90
- async def __aenter__(self):
91
- """Async context manager entry."""
92
- await self._ensure_session()
93
- return self
94
-
95
- async def __aexit__(self, exc_type, exc_val, exc_tb):
96
- """Async context manager exit."""
97
- await self.close()
98
-
99
- async def _ensure_session(self):
100
- """Ensure HTTP session is created."""
101
- if self._session is None or self._session.closed:
102
- # Setup headers
103
- headers = get_realistic_headers()
104
- if self.api_key:
105
- headers["Authorization"] = f"Bearer {self.api_key}"
106
-
107
- # Create timeout configuration
108
- timeout = ClientTimeout(total=self.timeout)
109
-
110
- # Create session
111
- connector = aiohttp.TCPConnector(
112
- verify_ssl=self.verify_ssl,
113
- limit=self.max_concurrent * 2
114
- )
115
-
116
- self._session = ClientSession(
117
- headers=headers,
118
- timeout=timeout,
119
- connector=connector
120
- )
121
-
122
- async def generate_speech(
123
- self,
124
- text: str,
125
- voice: Union[Voice, str] = Voice.ALLOY,
126
- response_format: Union[AudioFormat, str] = AudioFormat.MP3,
127
- instructions: Optional[str] = None,
128
- max_length: int = 4096,
129
- validate_length: bool = True,
130
- **kwargs
131
- ) -> TTSResponse:
132
- """
133
- Generate speech from text asynchronously.
134
-
135
- Args:
136
- text: Text to convert to speech
137
- voice: Voice to use for generation
138
- response_format: Audio format for output
139
- instructions: Optional instructions for voice modulation
140
- max_length: Maximum allowed text length in characters (default: 4096)
141
- validate_length: Whether to validate text length (default: True)
142
- **kwargs: Additional parameters
143
-
144
- Returns:
145
- TTSResponse: Generated audio response
146
-
147
- Raises:
148
- TTSException: If generation fails
149
- ValueError: If text exceeds max_length and validate_length is True
150
- """
151
- # Create and validate request
152
- request = TTSRequest(
153
- input=sanitize_text(text),
154
- voice=voice,
155
- response_format=response_format,
156
- instructions=instructions,
157
- max_length=max_length,
158
- validate_length=validate_length,
159
- **kwargs
160
- )
161
-
162
- return await self._make_request(request)
163
-
164
- async def generate_speech_long_text(
165
- self,
166
- text: str,
167
- voice: Union[Voice, str] = Voice.ALLOY,
168
- response_format: Union[AudioFormat, str] = AudioFormat.MP3,
169
- instructions: Optional[str] = None,
170
- max_length: int = 4096,
171
- preserve_words: bool = True,
172
- **kwargs
173
- ) -> List[TTSResponse]:
174
- """
175
- Generate speech from long text by splitting it into chunks asynchronously.
176
-
177
- This method automatically splits text that exceeds max_length into
178
- smaller chunks and generates speech for each chunk concurrently.
179
-
180
- Args:
181
- text: Text to convert to speech
182
- voice: Voice to use for generation
183
- response_format: Audio format for output
184
- instructions: Optional instructions for voice modulation
185
- max_length: Maximum length per chunk (default: 4096)
186
- preserve_words: Whether to avoid splitting words (default: True)
187
- **kwargs: Additional parameters
188
-
189
- Returns:
190
- List[TTSResponse]: List of generated audio responses
191
-
192
- Raises:
193
- TTSException: If generation fails for any chunk
194
- """
195
- # Sanitize text first
196
- clean_text = sanitize_text(text)
197
-
198
- # Split text into chunks
199
- chunks = split_text_by_length(clean_text, max_length, preserve_words)
200
-
201
- if not chunks:
202
- raise ValueError("No valid text chunks found after processing")
203
-
204
- # Create requests for all chunks
205
- requests = []
206
- for chunk in chunks:
207
- request = TTSRequest(
208
- input=chunk,
209
- voice=voice,
210
- response_format=response_format,
211
- instructions=instructions,
212
- max_length=max_length,
213
- validate_length=False, # We already split the text
214
- **kwargs
215
- )
216
- requests.append(request)
217
-
218
- # Process all chunks concurrently
219
- return await self.generate_speech_batch(requests)
220
-
221
- async def generate_speech_batch(
222
- self,
223
- requests: List[TTSRequest]
224
- ) -> List[TTSResponse]:
225
- """
226
- Generate speech for multiple requests concurrently.
227
-
228
- Args:
229
- requests: List of TTS requests
230
-
231
- Returns:
232
- List[TTSResponse]: List of generated audio responses
233
-
234
- Raises:
235
- TTSException: If any generation fails
236
- """
237
- if not requests:
238
- return []
239
-
240
- # Process requests concurrently with semaphore limiting
241
- tasks = [self._make_request(request) for request in requests]
242
- responses = await asyncio.gather(*tasks, return_exceptions=True)
243
-
244
- # Check for exceptions and convert them
245
- results = []
246
- for i, response in enumerate(responses):
247
- if isinstance(response, Exception):
248
- raise TTSException(f"Request {i} failed: {str(response)}")
249
- results.append(response)
250
-
251
- return results
252
-
253
- async def generate_speech_from_request(self, request: TTSRequest) -> TTSResponse:
254
- """
255
- Generate speech from a TTSRequest object asynchronously.
256
-
257
- Args:
258
- request: TTS request object
259
-
260
- Returns:
261
- TTSResponse: Generated audio response
262
- """
263
- return await self._make_request(request)
264
-
265
- async def _make_request(self, request: TTSRequest) -> TTSResponse:
266
- """
267
- Make the actual HTTP request to the TTS service.
268
-
269
- Args:
270
- request: TTS request object
271
-
272
- Returns:
273
- TTSResponse: Generated audio response
274
-
275
- Raises:
276
- TTSException: If request fails
277
- """
278
- await self._ensure_session()
279
-
280
- async with self._semaphore: # Limit concurrent requests
281
- url = build_url(self.base_url, "api/generate")
282
-
283
- # Prepare form data for openai.fm API
284
- form_data = {
285
- 'input': request.input,
286
- 'voice': request.voice.value,
287
- 'generation': str(uuid.uuid4()),
288
- 'response_format': request.response_format.value if hasattr(request.response_format, 'value') else str(request.response_format)
289
- }
290
-
291
- # Add prompt/instructions if provided
292
- if request.instructions:
293
- form_data['prompt'] = request.instructions
294
- else:
295
- # Default prompt for better quality
296
- form_data['prompt'] = (
297
- "Affect/personality: Natural and clear\n\n"
298
- "Tone: Friendly and professional, creating a pleasant listening experience.\n\n"
299
- "Pronunciation: Clear, articulate, and steady, ensuring each word is easily understood "
300
- "while maintaining a natural, conversational flow.\n\n"
301
- "Pause: Brief, purposeful pauses between sentences to allow time for the listener "
302
- "to process the information.\n\n"
303
- "Emotion: Warm and engaging, conveying the intended message effectively."
304
- )
305
-
306
- logger.info(f"Generating speech for text: '{request.input[:50]}...' with voice: {request.voice}")
307
-
308
- # Make request with retries
309
- for attempt in range(self.max_retries + 1):
310
- try:
311
- # Add random delay for rate limiting (except first attempt)
312
- if attempt > 0:
313
- delay = exponential_backoff(attempt - 1)
314
- logger.info(f"Retrying request after {delay:.2f}s (attempt {attempt + 1})")
315
- await asyncio.sleep(delay)
316
-
317
- # Use form data as required by openai.fm
318
- async with self._session.post(url, data=form_data) as response:
319
- # Handle different response types
320
- if response.status == 200:
321
- return await self._process_openai_fm_response(response, request)
322
- else:
323
- # Try to parse error response
324
- try:
325
- error_data = await response.json()
326
- except (json.JSONDecodeError, ValueError):
327
- text = await response.text()
328
- error_data = {"error": {"message": text or "Unknown error"}}
329
-
330
- # Create appropriate exception
331
- exception = create_exception_from_response(
332
- response.status,
333
- error_data,
334
- f"TTS request failed with status {response.status}"
335
- )
336
-
337
- # Don't retry for certain errors
338
- if response.status in [400, 401, 403, 404]:
339
- raise exception
340
-
341
- # For retryable errors, continue to next attempt
342
- if attempt == self.max_retries:
343
- raise exception
344
-
345
- logger.warning(f"Request failed with status {response.status}, retrying...")
346
- continue
347
-
348
- except asyncio.TimeoutError:
349
- if attempt == self.max_retries:
350
- raise NetworkException(
351
- f"Request timed out after {self.timeout}s",
352
- timeout=self.timeout,
353
- retry_count=attempt
354
- )
355
- logger.warning(f"Request timed out, retrying...")
356
- continue
357
-
358
- except aiohttp.ClientError as e:
359
- if attempt == self.max_retries:
360
- raise NetworkException(
361
- f"Client error: {str(e)}",
362
- retry_count=attempt
363
- )
364
- logger.warning(f"Client error, retrying...")
365
- continue
366
-
367
- # This should never be reached, but just in case
368
- raise TTSException("Maximum retries exceeded")
369
-
370
- async def _process_openai_fm_response(
371
- self,
372
- response: aiohttp.ClientResponse,
373
- request: TTSRequest
374
- ) -> TTSResponse:
375
- """
376
- Process a successful response from the openai.fm TTS service.
377
-
378
- Args:
379
- response: HTTP response object
380
- request: Original TTS request
381
-
382
- Returns:
383
- TTSResponse: Processed response object
384
- """
385
- # Get content type from response headers
386
- content_type = response.headers.get("content-type", "audio/mpeg")
387
-
388
- # Get audio data
389
- audio_data = await response.read()
390
-
391
- if not audio_data:
392
- raise APIException("Received empty audio data from openai.fm")
393
-
394
- # Determine format from content type
395
- if "audio/mpeg" in content_type or "audio/mp3" in content_type:
396
- actual_format = AudioFormat.MP3
397
- elif "audio/wav" in content_type:
398
- actual_format = AudioFormat.WAV
399
- elif "audio/opus" in content_type:
400
- actual_format = AudioFormat.OPUS
401
- elif "audio/aac" in content_type:
402
- actual_format = AudioFormat.AAC
403
- elif "audio/flac" in content_type:
404
- actual_format = AudioFormat.FLAC
405
- else:
406
- # Default to MP3 for openai.fm
407
- actual_format = AudioFormat.MP3
408
-
409
- # Estimate duration based on text length
410
- estimated_duration = estimate_audio_duration(request.input)
411
-
412
- # Check if returned format differs from requested format
413
- requested_format = request.response_format
414
- if isinstance(requested_format, str):
415
- try:
416
- requested_format = AudioFormat(requested_format.lower())
417
- except ValueError:
418
- requested_format = AudioFormat.MP3 # Default fallback
419
-
420
- # Import here to avoid circular imports
421
- from .models import maps_to_wav
422
-
423
- # Check if format differs from request
424
- if actual_format != requested_format:
425
- if maps_to_wav(requested_format.value) and actual_format.value == "wav":
426
- logger.debug(
427
- f"Format '{requested_format.value}' requested, returning WAV format."
428
- )
429
- else:
430
- logger.warning(
431
- f"Requested format '{requested_format.value}' but received '{actual_format.value}' "
432
- f"from service."
433
- )
434
-
435
- # Create response object
436
- tts_response = TTSResponse(
437
- audio_data=audio_data,
438
- content_type=content_type,
439
- format=actual_format,
440
- size=len(audio_data),
441
- duration=estimated_duration,
442
- metadata={
443
- "response_headers": dict(response.headers),
444
- "status_code": response.status,
445
- "url": str(response.url),
446
- "service": "openai.fm",
447
- "voice": request.voice.value,
448
- "original_text": request.input[:100] + "..." if len(request.input) > 100 else request.input,
449
- "requested_format": requested_format.value,
450
- "actual_format": actual_format.value
451
- }
452
- )
453
-
454
- logger.info(
455
- f"Successfully generated {format_file_size(len(audio_data))} "
456
- f"of {actual_format.value.upper()} audio from openai.fm using voice '{request.voice.value}'"
457
- )
458
-
459
- return tts_response
460
-
461
- async def close(self):
462
- """Close the HTTP session."""
463
- if self._session and not self._session.closed:
464
- await self._session.close()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Asynchronous TTS client implementation.
3
+
4
+ This module provides the AsyncTTSClient class for asynchronous
5
+ text-to-speech generation with OpenAI-compatible API.
6
+ """
7
+
8
+ import json
9
+ import uuid
10
+ import asyncio
11
+ import logging
12
+ from typing import Optional, Dict, Any, Union, List
13
+
14
+ import aiohttp
15
+ from aiohttp import ClientTimeout, ClientSession
16
+
17
+ from .models import (
18
+ TTSRequest, TTSResponse, Voice, AudioFormat,
19
+ get_content_type, get_format_from_content_type
20
+ )
21
+ from .exceptions import (
22
+ TTSException, APIException, NetworkException, ValidationException,
23
+ create_exception_from_response
24
+ )
25
+ from .utils import (
26
+ get_realistic_headers, sanitize_text, validate_url, build_url,
27
+ exponential_backoff, estimate_audio_duration, format_file_size,
28
+ validate_text_length, split_text_by_length
29
+ )
30
+
31
+
32
+ logger = logging.getLogger(__name__)
33
+
34
+
35
+ class AsyncTTSClient:
36
+ """
37
+ Asynchronous TTS client for text-to-speech generation.
38
+
39
+ This client provides an async interface for generating speech from text
40
+ using OpenAI-compatible TTS services with support for concurrent requests.
41
+
42
+ Attributes:
43
+ base_url: Base URL for the TTS service
44
+ api_key: API key for authentication (if required)
45
+ timeout: Request timeout in seconds
46
+ max_retries: Maximum number of retry attempts
47
+ verify_ssl: Whether to verify SSL certificates
48
+ max_concurrent: Maximum concurrent requests
49
+ """
50
+
51
+ def __init__(
52
+ self,
53
+ base_url: str = "https://www.openai.fm",
54
+ api_key: Optional[str] = None,
55
+ timeout: float = 30.0,
56
+ max_retries: int = 3,
57
+ verify_ssl: bool = True,
58
+ max_concurrent: int = 10,
59
+ **kwargs
60
+ ):
61
+ """
62
+ Initialize the async TTS client.
63
+
64
+ Args:
65
+ base_url: Base URL for the TTS service
66
+ api_key: API key for authentication
67
+ timeout: Request timeout in seconds
68
+ max_retries: Maximum retry attempts
69
+ verify_ssl: Whether to verify SSL certificates
70
+ max_concurrent: Maximum concurrent requests
71
+ **kwargs: Additional configuration options
72
+ """
73
+ self.base_url = base_url.rstrip('/')
74
+ self.api_key = api_key
75
+ self.timeout = timeout
76
+ self.max_retries = max_retries
77
+ self.verify_ssl = verify_ssl
78
+ self.max_concurrent = max_concurrent
79
+
80
+ # Validate base URL
81
+ if not validate_url(self.base_url):
82
+ raise ValidationException(f"Invalid base URL: {self.base_url}")
83
+
84
+ # Session will be created when needed
85
+ self._session: Optional[ClientSession] = None
86
+ self._semaphore = asyncio.Semaphore(max_concurrent)
87
+
88
+ logger.info(f"Initialized async TTS client with base URL: {self.base_url}")
89
+
90
+ async def __aenter__(self):
91
+ """Async context manager entry."""
92
+ await self._ensure_session()
93
+ return self
94
+
95
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
96
+ """Async context manager exit."""
97
+ await self.close()
98
+
99
+ async def _ensure_session(self):
100
+ """Ensure HTTP session is created."""
101
+ if self._session is None or self._session.closed:
102
+ # Setup headers
103
+ headers = get_realistic_headers()
104
+ if self.api_key:
105
+ headers["Authorization"] = f"Bearer {self.api_key}"
106
+
107
+ # Create timeout configuration
108
+ timeout = ClientTimeout(total=self.timeout)
109
+
110
+ # Create session
111
+ connector = aiohttp.TCPConnector(
112
+ verify_ssl=self.verify_ssl,
113
+ limit=self.max_concurrent * 2
114
+ )
115
+
116
+ self._session = ClientSession(
117
+ headers=headers,
118
+ timeout=timeout,
119
+ connector=connector
120
+ )
121
+
122
+ async def generate_speech(
123
+ self,
124
+ text: str,
125
+ voice: Union[Voice, str] = Voice.ALLOY,
126
+ response_format: Union[AudioFormat, str] = AudioFormat.MP3,
127
+ instructions: Optional[str] = None,
128
+ max_length: int = 4096,
129
+ validate_length: bool = True,
130
+ **kwargs
131
+ ) -> TTSResponse:
132
+ """
133
+ Generate speech from text asynchronously.
134
+
135
+ Args:
136
+ text: Text to convert to speech
137
+ voice: Voice to use for generation
138
+ response_format: Audio format for output
139
+ instructions: Optional instructions for voice modulation
140
+ max_length: Maximum allowed text length in characters (default: 4096)
141
+ validate_length: Whether to validate text length (default: True)
142
+ **kwargs: Additional parameters
143
+
144
+ Returns:
145
+ TTSResponse: Generated audio response
146
+
147
+ Raises:
148
+ TTSException: If generation fails
149
+ ValueError: If text exceeds max_length and validate_length is True
150
+ """
151
+ # Create and validate request
152
+ request = TTSRequest(
153
+ input=sanitize_text(text),
154
+ voice=voice,
155
+ response_format=response_format,
156
+ instructions=instructions,
157
+ max_length=max_length,
158
+ validate_length=validate_length,
159
+ **kwargs
160
+ )
161
+
162
+ return await self._make_request(request)
163
+
164
+ async def generate_speech_long_text(
165
+ self,
166
+ text: str,
167
+ voice: Union[Voice, str] = Voice.ALLOY,
168
+ response_format: Union[AudioFormat, str] = AudioFormat.MP3,
169
+ instructions: Optional[str] = None,
170
+ max_length: int = 4096,
171
+ preserve_words: bool = True,
172
+ **kwargs
173
+ ) -> List[TTSResponse]:
174
+ """
175
+ Generate speech from long text by splitting it into chunks asynchronously.
176
+
177
+ This method automatically splits text that exceeds max_length into
178
+ smaller chunks and generates speech for each chunk concurrently.
179
+
180
+ Args:
181
+ text: Text to convert to speech
182
+ voice: Voice to use for generation
183
+ response_format: Audio format for output
184
+ instructions: Optional instructions for voice modulation
185
+ max_length: Maximum length per chunk (default: 4096)
186
+ preserve_words: Whether to avoid splitting words (default: True)
187
+ **kwargs: Additional parameters
188
+
189
+ Returns:
190
+ List[TTSResponse]: List of generated audio responses
191
+
192
+ Raises:
193
+ TTSException: If generation fails for any chunk
194
+ """
195
+ # Sanitize text first
196
+ clean_text = sanitize_text(text)
197
+
198
+ # Split text into chunks
199
+ chunks = split_text_by_length(clean_text, max_length, preserve_words)
200
+
201
+ if not chunks:
202
+ raise ValueError("No valid text chunks found after processing")
203
+
204
+ # Create requests for all chunks
205
+ requests = []
206
+ for chunk in chunks:
207
+ request = TTSRequest(
208
+ input=chunk,
209
+ voice=voice,
210
+ response_format=response_format,
211
+ instructions=instructions,
212
+ max_length=max_length,
213
+ validate_length=False, # We already split the text
214
+ **kwargs
215
+ )
216
+ requests.append(request)
217
+
218
+ # Process all chunks concurrently
219
+ return await self.generate_speech_batch(requests)
220
+
221
+ async def generate_speech_from_long_text(
222
+ self,
223
+ text: str,
224
+ voice: Union[Voice, str] = Voice.ALLOY,
225
+ response_format: Union[AudioFormat, str] = AudioFormat.MP3,
226
+ instructions: Optional[str] = None,
227
+ max_length: int = 4096,
228
+ preserve_words: bool = True,
229
+ **kwargs
230
+ ) -> List[TTSResponse]:
231
+ """
232
+ Generate speech from long text by splitting it into chunks asynchronously.
233
+
234
+ This is an alias for generate_speech_long_text for consistency.
235
+
236
+ Args:
237
+ text: Text to convert to speech
238
+ voice: Voice to use for generation
239
+ response_format: Audio format for output
240
+ instructions: Optional instructions for voice modulation
241
+ max_length: Maximum length per chunk (default: 4096)
242
+ preserve_words: Whether to avoid splitting words (default: True)
243
+ **kwargs: Additional parameters
244
+
245
+ Returns:
246
+ List[TTSResponse]: List of generated audio responses
247
+
248
+ Raises:
249
+ TTSException: If generation fails for any chunk
250
+ """
251
+ return await self.generate_speech_long_text(
252
+ text=text,
253
+ voice=voice,
254
+ response_format=response_format,
255
+ instructions=instructions,
256
+ max_length=max_length,
257
+ preserve_words=preserve_words,
258
+ **kwargs
259
+ )
260
+
261
+ async def generate_speech_batch(
262
+ self,
263
+ requests: List[TTSRequest]
264
+ ) -> List[TTSResponse]:
265
+ """
266
+ Generate speech for multiple requests concurrently.
267
+
268
+ Args:
269
+ requests: List of TTS requests
270
+
271
+ Returns:
272
+ List[TTSResponse]: List of generated audio responses
273
+
274
+ Raises:
275
+ TTSException: If any generation fails
276
+ """
277
+ if not requests:
278
+ return []
279
+
280
+ # Process requests concurrently with semaphore limiting
281
+ tasks = [self._make_request(request) for request in requests]
282
+ responses = await asyncio.gather(*tasks, return_exceptions=True)
283
+
284
+ # Check for exceptions and convert them
285
+ results = []
286
+ for i, response in enumerate(responses):
287
+ if isinstance(response, Exception):
288
+ raise TTSException(f"Request {i} failed: {str(response)}")
289
+ results.append(response)
290
+
291
+ return results
292
+
293
+ async def generate_speech_from_request(self, request: TTSRequest) -> TTSResponse:
294
+ """
295
+ Generate speech from a TTSRequest object asynchronously.
296
+
297
+ Args:
298
+ request: TTS request object
299
+
300
+ Returns:
301
+ TTSResponse: Generated audio response
302
+ """
303
+ return await self._make_request(request)
304
+
305
+ async def _make_request(self, request: TTSRequest) -> TTSResponse:
306
+ """
307
+ Make the actual HTTP request to the TTS service.
308
+
309
+ Args:
310
+ request: TTS request object
311
+
312
+ Returns:
313
+ TTSResponse: Generated audio response
314
+
315
+ Raises:
316
+ TTSException: If request fails
317
+ """
318
+ await self._ensure_session()
319
+
320
+ async with self._semaphore: # Limit concurrent requests
321
+ url = build_url(self.base_url, "api/generate")
322
+
323
+ # Prepare form data for openai.fm API
324
+ form_data = {
325
+ 'input': request.input,
326
+ 'voice': request.voice.value,
327
+ 'generation': str(uuid.uuid4()),
328
+ 'response_format': request.response_format.value if hasattr(request.response_format, 'value') else str(request.response_format)
329
+ }
330
+
331
+ # Add prompt/instructions if provided
332
+ if request.instructions:
333
+ form_data['prompt'] = request.instructions
334
+ else:
335
+ # Default prompt for better quality
336
+ form_data['prompt'] = (
337
+ "Affect/personality: Natural and clear\n\n"
338
+ "Tone: Friendly and professional, creating a pleasant listening experience.\n\n"
339
+ "Pronunciation: Clear, articulate, and steady, ensuring each word is easily understood "
340
+ "while maintaining a natural, conversational flow.\n\n"
341
+ "Pause: Brief, purposeful pauses between sentences to allow time for the listener "
342
+ "to process the information.\n\n"
343
+ "Emotion: Warm and engaging, conveying the intended message effectively."
344
+ )
345
+
346
+ logger.info(f"Generating speech for text: '{request.input[:50]}...' with voice: {request.voice}")
347
+
348
+ # Make request with retries
349
+ for attempt in range(self.max_retries + 1):
350
+ try:
351
+ # Add random delay for rate limiting (except first attempt)
352
+ if attempt > 0:
353
+ delay = exponential_backoff(attempt - 1)
354
+ logger.info(f"Retrying request after {delay:.2f}s (attempt {attempt + 1})")
355
+ await asyncio.sleep(delay)
356
+
357
+ # Use form data as required by openai.fm
358
+ async with self._session.post(url, data=form_data) as response:
359
+ # Handle different response types
360
+ if response.status == 200:
361
+ return await self._process_openai_fm_response(response, request)
362
+ else:
363
+ # Try to parse error response
364
+ try:
365
+ error_data = await response.json()
366
+ except (json.JSONDecodeError, ValueError):
367
+ text = await response.text()
368
+ error_data = {"error": {"message": text or "Unknown error"}}
369
+
370
+ # Create appropriate exception
371
+ exception = create_exception_from_response(
372
+ response.status,
373
+ error_data,
374
+ f"TTS request failed with status {response.status}"
375
+ )
376
+
377
+ # Don't retry for certain errors
378
+ if response.status in [400, 401, 403, 404]:
379
+ raise exception
380
+
381
+ # For retryable errors, continue to next attempt
382
+ if attempt == self.max_retries:
383
+ raise exception
384
+
385
+ logger.warning(f"Request failed with status {response.status}, retrying...")
386
+ continue
387
+
388
+ except asyncio.TimeoutError:
389
+ if attempt == self.max_retries:
390
+ raise NetworkException(
391
+ f"Request timed out after {self.timeout}s",
392
+ timeout=self.timeout,
393
+ retry_count=attempt
394
+ )
395
+ logger.warning(f"Request timed out, retrying...")
396
+ continue
397
+
398
+ except aiohttp.ClientError as e:
399
+ if attempt == self.max_retries:
400
+ raise NetworkException(
401
+ f"Client error: {str(e)}",
402
+ retry_count=attempt
403
+ )
404
+ logger.warning(f"Client error, retrying...")
405
+ continue
406
+
407
+ # This should never be reached, but just in case
408
+ raise TTSException("Maximum retries exceeded")
409
+
410
+ async def _process_openai_fm_response(
411
+ self,
412
+ response: aiohttp.ClientResponse,
413
+ request: TTSRequest
414
+ ) -> TTSResponse:
415
+ """
416
+ Process a successful response from the openai.fm TTS service.
417
+
418
+ Args:
419
+ response: HTTP response object
420
+ request: Original TTS request
421
+
422
+ Returns:
423
+ TTSResponse: Processed response object
424
+ """
425
+ # Get content type from response headers
426
+ content_type = response.headers.get("content-type", "audio/mpeg")
427
+
428
+ # Get audio data
429
+ audio_data = await response.read()
430
+
431
+ if not audio_data:
432
+ raise APIException("Received empty audio data from openai.fm")
433
+
434
+ # Determine format from content type
435
+ if "audio/mpeg" in content_type or "audio/mp3" in content_type:
436
+ actual_format = AudioFormat.MP3
437
+ elif "audio/wav" in content_type:
438
+ actual_format = AudioFormat.WAV
439
+ elif "audio/opus" in content_type:
440
+ actual_format = AudioFormat.OPUS
441
+ elif "audio/aac" in content_type:
442
+ actual_format = AudioFormat.AAC
443
+ elif "audio/flac" in content_type:
444
+ actual_format = AudioFormat.FLAC
445
+ else:
446
+ # Default to MP3 for openai.fm
447
+ actual_format = AudioFormat.MP3
448
+
449
+ # Estimate duration based on text length
450
+ estimated_duration = estimate_audio_duration(request.input)
451
+
452
+ # Check if returned format differs from requested format
453
+ requested_format = request.response_format
454
+ if isinstance(requested_format, str):
455
+ try:
456
+ requested_format = AudioFormat(requested_format.lower())
457
+ except ValueError:
458
+ requested_format = AudioFormat.MP3 # Default fallback
459
+
460
+ # Import here to avoid circular imports
461
+ from .models import maps_to_wav
462
+
463
+ # Check if format differs from request
464
+ if actual_format != requested_format:
465
+ if maps_to_wav(requested_format.value) and actual_format.value == "wav":
466
+ logger.debug(
467
+ f"Format '{requested_format.value}' requested, returning WAV format."
468
+ )
469
+ else:
470
+ logger.warning(
471
+ f"Requested format '{requested_format.value}' but received '{actual_format.value}' "
472
+ f"from service."
473
+ )
474
+
475
+ # Create response object
476
+ tts_response = TTSResponse(
477
+ audio_data=audio_data,
478
+ content_type=content_type,
479
+ format=actual_format,
480
+ size=len(audio_data),
481
+ duration=estimated_duration,
482
+ metadata={
483
+ "response_headers": dict(response.headers),
484
+ "status_code": response.status,
485
+ "url": str(response.url),
486
+ "service": "openai.fm",
487
+ "voice": request.voice.value,
488
+ "original_text": request.input[:100] + "..." if len(request.input) > 100 else request.input,
489
+ "requested_format": requested_format.value,
490
+ "actual_format": actual_format.value
491
+ }
492
+ )
493
+
494
+ logger.info(
495
+ f"Successfully generated {format_file_size(len(audio_data))} "
496
+ f"of {actual_format.value.upper()} audio from openai.fm using voice '{request.voice.value}'"
497
+ )
498
+
499
+ return tts_response
500
+
501
+ async def close(self):
502
+ """Close the HTTP session."""
503
+ if self._session and not self._session.closed:
504
+ await self._session.close()
ttsfm/cli.py CHANGED
@@ -1,362 +1,363 @@
1
- #!/usr/bin/env python3
2
- """
3
- Command-line interface for TTSFM.
4
-
5
- This module provides a command-line interface for the TTSFM package,
6
- allowing users to generate speech from text using various options.
7
- """
8
-
9
- import argparse
10
- import sys
11
- import os
12
- from typing import Optional
13
- from pathlib import Path
14
-
15
- from .client import TTSClient
16
- from .models import Voice, AudioFormat
17
- from .exceptions import TTSException, APIException, NetworkException
18
-
19
-
20
- def create_parser() -> argparse.ArgumentParser:
21
- """Create and configure the argument parser."""
22
- parser = argparse.ArgumentParser(
23
- prog="ttsfm",
24
- description="TTSFM - Text-to-Speech API Client",
25
- formatter_class=argparse.RawDescriptionHelpFormatter,
26
- epilog="""
27
- Examples:
28
- ttsfm "Hello, world!" --output hello.mp3
29
- ttsfm "Hello, world!" --voice nova --format wav --output hello.wav
30
- ttsfm "Hello, world!" --url http://localhost:7000 --output hello.mp3
31
- ttsfm --text-file input.txt --output speech.mp3
32
- """
33
- )
34
-
35
- # Text input options (mutually exclusive)
36
- text_group = parser.add_mutually_exclusive_group(required=True)
37
- text_group.add_argument(
38
- "text",
39
- nargs="?",
40
- help="Text to convert to speech"
41
- )
42
- text_group.add_argument(
43
- "--text-file", "-f",
44
- type=str,
45
- help="Read text from file"
46
- )
47
-
48
- # Output options
49
- parser.add_argument(
50
- "--output", "-o",
51
- type=str,
52
- required=True,
53
- help="Output file path"
54
- )
55
-
56
- # TTS options
57
- parser.add_argument(
58
- "--voice", "-v",
59
- type=str,
60
- default="alloy",
61
- choices=["alloy", "echo", "fable", "onyx", "nova", "shimmer"],
62
- help="Voice to use for speech generation (default: alloy)"
63
- )
64
-
65
- parser.add_argument(
66
- "--format",
67
- type=str,
68
- default="mp3",
69
- choices=["mp3", "opus", "aac", "flac", "wav", "pcm"],
70
- help="Audio format (default: mp3)"
71
- )
72
-
73
- parser.add_argument(
74
- "--speed",
75
- type=float,
76
- default=1.0,
77
- help="Speech speed (0.25 to 4.0, default: 1.0)"
78
- )
79
-
80
- # Client options
81
- parser.add_argument(
82
- "--url", "-u",
83
- type=str,
84
- default="http://localhost:7000",
85
- help="TTS service URL (default: http://localhost:7000)"
86
- )
87
-
88
- parser.add_argument(
89
- "--api-key", "-k",
90
- type=str,
91
- help="API key for authentication"
92
- )
93
-
94
- parser.add_argument(
95
- "--timeout",
96
- type=float,
97
- default=30.0,
98
- help="Request timeout in seconds (default: 30.0)"
99
- )
100
-
101
- parser.add_argument(
102
- "--retries",
103
- type=int,
104
- default=3,
105
- help="Maximum number of retries (default: 3)"
106
- )
107
-
108
- # Text length validation options
109
- parser.add_argument(
110
- "--max-length",
111
- type=int,
112
- default=4096,
113
- help="Maximum text length in characters (default: 4096)"
114
- )
115
-
116
- parser.add_argument(
117
- "--no-length-validation",
118
- action="store_true",
119
- help="Disable text length validation"
120
- )
121
-
122
- parser.add_argument(
123
- "--split-long-text",
124
- action="store_true",
125
- help="Automatically split long text into chunks"
126
- )
127
-
128
- # Other options
129
- parser.add_argument(
130
- "--verbose", "-V",
131
- action="store_true",
132
- help="Enable verbose output"
133
- )
134
-
135
- parser.add_argument(
136
- "--version",
137
- action="version",
138
- version=f"%(prog)s {get_version()}"
139
- )
140
-
141
- return parser
142
-
143
-
144
- def get_version() -> str:
145
- """Get the package version."""
146
- try:
147
- from . import __version__
148
- return __version__
149
- except ImportError:
150
- return "unknown"
151
-
152
-
153
- def read_text_file(file_path: str) -> str:
154
- """Read text from a file."""
155
- try:
156
- with open(file_path, 'r', encoding='utf-8') as f:
157
- return f.read().strip()
158
- except FileNotFoundError:
159
- print(f"Error: File '{file_path}' not found.", file=sys.stderr)
160
- sys.exit(1)
161
- except Exception as e:
162
- print(f"Error reading file '{file_path}': {e}", file=sys.stderr)
163
- sys.exit(1)
164
-
165
-
166
- def validate_speed(speed: float) -> float:
167
- """Validate and return the speed parameter."""
168
- if not 0.25 <= speed <= 4.0:
169
- print("Error: Speed must be between 0.25 and 4.0", file=sys.stderr)
170
- sys.exit(1)
171
- return speed
172
-
173
-
174
- def get_voice_enum(voice_str: str) -> Voice:
175
- """Convert voice string to Voice enum."""
176
- voice_map = {
177
- "alloy": Voice.ALLOY,
178
- "echo": Voice.ECHO,
179
- "fable": Voice.FABLE,
180
- "onyx": Voice.ONYX,
181
- "nova": Voice.NOVA,
182
- "shimmer": Voice.SHIMMER,
183
- }
184
- return voice_map[voice_str.lower()]
185
-
186
-
187
- def get_format_enum(format_str: str) -> AudioFormat:
188
- """Convert format string to AudioFormat enum."""
189
- format_map = {
190
- "mp3": AudioFormat.MP3,
191
- "opus": AudioFormat.OPUS,
192
- "aac": AudioFormat.AAC,
193
- "flac": AudioFormat.FLAC,
194
- "wav": AudioFormat.WAV,
195
- "pcm": AudioFormat.PCM,
196
- }
197
- return format_map[format_str.lower()]
198
-
199
-
200
- def handle_long_text(args, text: str, voice: Voice, audio_format: AudioFormat, speed: float) -> None:
201
- """Handle long text by splitting it into chunks and generating multiple files."""
202
- from .utils import split_text_by_length
203
- import os
204
-
205
- # Split text into chunks
206
- chunks = split_text_by_length(text, args.max_length, preserve_words=True)
207
-
208
- if not chunks:
209
- print("Error: No valid text chunks found after processing.", file=sys.stderr)
210
- sys.exit(1)
211
-
212
- print(f"Split text into {len(chunks)} chunks")
213
-
214
- # Create client
215
- try:
216
- client = TTSClient(
217
- base_url=args.url,
218
- api_key=args.api_key,
219
- timeout=args.timeout,
220
- max_retries=args.retries
221
- )
222
-
223
- # Generate speech for each chunk
224
- base_name, ext = os.path.splitext(args.output)
225
-
226
- for i, chunk in enumerate(chunks, 1):
227
- if args.verbose:
228
- print(f"Processing chunk {i}/{len(chunks)} ({len(chunk)} characters)...")
229
-
230
- # Generate filename for this chunk
231
- if len(chunks) == 1:
232
- output_file = args.output
233
- else:
234
- output_file = f"{base_name}_part{i:03d}{ext}"
235
-
236
- # Generate speech for this chunk
237
- audio_data = client.generate_speech(
238
- text=chunk,
239
- voice=voice,
240
- response_format=audio_format,
241
- speed=speed,
242
- max_length=args.max_length,
243
- validate_length=False # We already split the text
244
- )
245
-
246
- # Save to file
247
- with open(output_file, 'wb') as f:
248
- f.write(audio_data)
249
-
250
- print(f"Generated: {output_file}")
251
-
252
- if len(chunks) > 1:
253
- print(f"\nGenerated {len(chunks)} audio files from long text.")
254
- print(f"Files: {base_name}_part001{ext} to {base_name}_part{len(chunks):03d}{ext}")
255
-
256
- except Exception as e:
257
- print(f"Error processing long text: {e}", file=sys.stderr)
258
- if args.verbose:
259
- import traceback
260
- traceback.print_exc()
261
- sys.exit(1)
262
-
263
-
264
- def main() -> None:
265
- """Main CLI entry point."""
266
- parser = create_parser()
267
- args = parser.parse_args()
268
-
269
- # Get text input
270
- if args.text:
271
- text = args.text
272
- else:
273
- text = read_text_file(args.text_file)
274
-
275
- if not text:
276
- print("Error: No text provided.", file=sys.stderr)
277
- sys.exit(1)
278
-
279
- # Validate parameters
280
- speed = validate_speed(args.speed)
281
- voice = get_voice_enum(args.voice)
282
- audio_format = get_format_enum(args.format)
283
-
284
- # Create output directory if needed
285
- output_path = Path(args.output)
286
- output_path.parent.mkdir(parents=True, exist_ok=True)
287
-
288
- # Check text length and handle accordingly
289
- text_length = len(text)
290
- validate_length = not args.no_length_validation
291
-
292
- if args.verbose:
293
- print(f"Text: {text[:50]}{'...' if len(text) > 50 else ''}")
294
- print(f"Text length: {text_length} characters")
295
- print(f"Max length: {args.max_length}")
296
- print(f"Length validation: {'enabled' if validate_length else 'disabled'}")
297
- print(f"Voice: {args.voice}")
298
- print(f"Format: {args.format}")
299
- print(f"Speed: {speed}")
300
- print(f"URL: {args.url}")
301
- print(f"Output: {args.output}")
302
- print()
303
-
304
- # Handle long text
305
- if text_length > args.max_length:
306
- if args.split_long_text:
307
- print(f"Text is {text_length} characters, splitting into chunks...")
308
- return handle_long_text(args, text, voice, audio_format, speed)
309
- elif validate_length:
310
- print(f"Error: Text is too long ({text_length} characters). "
311
- f"Maximum allowed is {args.max_length} characters.", file=sys.stderr)
312
- print("Use --split-long-text to automatically split the text, "
313
- "or --no-length-validation to disable this check.", file=sys.stderr)
314
- sys.exit(1)
315
-
316
- # Create client
317
- try:
318
- client = TTSClient(
319
- base_url=args.url,
320
- api_key=args.api_key,
321
- timeout=args.timeout,
322
- max_retries=args.retries
323
- )
324
-
325
- if args.verbose:
326
- print("Generating speech...")
327
-
328
- # Generate speech
329
- audio_data = client.generate_speech(
330
- text=text,
331
- voice=voice,
332
- response_format=audio_format,
333
- speed=speed,
334
- max_length=args.max_length,
335
- validate_length=validate_length
336
- )
337
-
338
- # Save to file
339
- with open(args.output, 'wb') as f:
340
- f.write(audio_data)
341
-
342
- print(f"Speech generated successfully: {args.output}")
343
-
344
- except NetworkException as e:
345
- print(f"Network error: {e}", file=sys.stderr)
346
- sys.exit(1)
347
- except APIException as e:
348
- print(f"API error: {e}", file=sys.stderr)
349
- sys.exit(1)
350
- except TTSException as e:
351
- print(f"TTS error: {e}", file=sys.stderr)
352
- sys.exit(1)
353
- except Exception as e:
354
- print(f"Unexpected error: {e}", file=sys.stderr)
355
- if args.verbose:
356
- import traceback
357
- traceback.print_exc()
358
- sys.exit(1)
359
-
360
-
361
- if __name__ == "__main__":
362
- main()
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Command-line interface for TTSFM.
4
+
5
+ This module provides a command-line interface for the TTSFM package,
6
+ allowing users to generate speech from text using various options.
7
+ """
8
+
9
+ import argparse
10
+ import sys
11
+ import os
12
+ from typing import Optional
13
+ from pathlib import Path
14
+
15
+ from .client import TTSClient
16
+ from .models import Voice, AudioFormat
17
+ from .exceptions import TTSException, APIException, NetworkException
18
+
19
+
20
+ def create_parser() -> argparse.ArgumentParser:
21
+ """Create and configure the argument parser."""
22
+ parser = argparse.ArgumentParser(
23
+ prog="ttsfm",
24
+ description="TTSFM - Text-to-Speech API Client",
25
+ formatter_class=argparse.RawDescriptionHelpFormatter,
26
+ epilog="""
27
+ Examples:
28
+ ttsfm "Hello, world!" --output hello.mp3
29
+ ttsfm "Hello, world!" --voice nova --format wav --output hello.wav
30
+ ttsfm "Hello, world!" --url http://localhost:7000 --output hello.mp3
31
+ ttsfm --text-file input.txt --output speech.mp3
32
+ """
33
+ )
34
+
35
+ # Text input options (mutually exclusive)
36
+ text_group = parser.add_mutually_exclusive_group(required=True)
37
+ text_group.add_argument(
38
+ "text",
39
+ nargs="?",
40
+ help="Text to convert to speech"
41
+ )
42
+ text_group.add_argument(
43
+ "--text-file", "-f",
44
+ type=str,
45
+ help="Read text from file"
46
+ )
47
+
48
+ # Output options
49
+ parser.add_argument(
50
+ "--output", "-o",
51
+ type=str,
52
+ required=True,
53
+ help="Output file path"
54
+ )
55
+
56
+ # TTS options
57
+ parser.add_argument(
58
+ "--voice", "-v",
59
+ type=str,
60
+ default="alloy",
61
+ choices=["alloy", "ash", "ballad", "coral", "echo", "fable", "nova", "onyx", "sage", "shimmer", "verse"],
62
+ help="Voice to use for speech generation (default: alloy)"
63
+ )
64
+
65
+ parser.add_argument(
66
+ "--format",
67
+ type=str,
68
+ default="mp3",
69
+ choices=["mp3", "opus", "aac", "flac", "wav", "pcm"],
70
+ help="Audio format (default: mp3)"
71
+ )
72
+
73
+ parser.add_argument(
74
+ "--speed",
75
+ type=float,
76
+ default=1.0,
77
+ help="Speech speed (0.25 to 4.0, default: 1.0)"
78
+ )
79
+
80
+ # Client options
81
+ parser.add_argument(
82
+ "--url", "-u",
83
+ type=str,
84
+ default="http://localhost:7000",
85
+ help="TTS service URL (default: http://localhost:7000)"
86
+ )
87
+
88
+ parser.add_argument(
89
+ "--api-key", "-k",
90
+ type=str,
91
+ help="API key for authentication"
92
+ )
93
+
94
+ parser.add_argument(
95
+ "--timeout",
96
+ type=float,
97
+ default=30.0,
98
+ help="Request timeout in seconds (default: 30.0)"
99
+ )
100
+
101
+ parser.add_argument(
102
+ "--retries",
103
+ type=int,
104
+ default=3,
105
+ help="Maximum number of retries (default: 3)"
106
+ )
107
+
108
+ # Text length validation options
109
+ parser.add_argument(
110
+ "--max-length",
111
+ type=int,
112
+ default=4096,
113
+ help="Maximum text length in characters (default: 4096)"
114
+ )
115
+
116
+ parser.add_argument(
117
+ "--no-length-validation",
118
+ action="store_true",
119
+ help="Disable text length validation"
120
+ )
121
+
122
+ parser.add_argument(
123
+ "--split-long-text",
124
+ action="store_true",
125
+ help="Automatically split long text into chunks"
126
+ )
127
+
128
+ # Other options
129
+ parser.add_argument(
130
+ "--verbose", "-V",
131
+ action="store_true",
132
+ help="Enable verbose output"
133
+ )
134
+
135
+ parser.add_argument(
136
+ "--version",
137
+ action="version",
138
+ version=f"%(prog)s {get_version()}"
139
+ )
140
+
141
+ return parser
142
+
143
+
144
+ def get_version() -> str:
145
+ """Get the package version."""
146
+ try:
147
+ from . import __version__
148
+ return __version__
149
+ except ImportError:
150
+ return "unknown"
151
+
152
+
153
+ def read_text_file(file_path: str) -> str:
154
+ """Read text from a file."""
155
+ try:
156
+ with open(file_path, 'r', encoding='utf-8') as f:
157
+ return f.read().strip()
158
+ except FileNotFoundError:
159
+ print(f"Error: File '{file_path}' not found.", file=sys.stderr)
160
+ sys.exit(1)
161
+ except Exception as e:
162
+ print(f"Error reading file '{file_path}': {e}", file=sys.stderr)
163
+ sys.exit(1)
164
+
165
+
166
+ def validate_speed(speed: float) -> float:
167
+ """Validate and return the speed parameter."""
168
+ if not 0.25 <= speed <= 4.0:
169
+ print("Error: Speed must be between 0.25 and 4.0", file=sys.stderr)
170
+ sys.exit(1)
171
+ return speed
172
+
173
+
174
+ def get_voice_enum(voice_str: str) -> Voice:
175
+ """Convert voice string to Voice enum."""
176
+ voice_map = {
177
+ "alloy": Voice.ALLOY,
178
+ "ash": Voice.ASH,
179
+ "ballad": Voice.BALLAD,
180
+ "coral": Voice.CORAL,
181
+ "echo": Voice.ECHO,
182
+ "fable": Voice.FABLE,
183
+ "nova": Voice.NOVA,
184
+ "onyx": Voice.ONYX,
185
+ "sage": Voice.SAGE,
186
+ "shimmer": Voice.SHIMMER,
187
+ "verse": Voice.VERSE,
188
+ }
189
+ return voice_map[voice_str.lower()]
190
+
191
+
192
+ def get_format_enum(format_str: str) -> AudioFormat:
193
+ """Convert format string to AudioFormat enum."""
194
+ format_map = {
195
+ "mp3": AudioFormat.MP3,
196
+ "opus": AudioFormat.OPUS,
197
+ "aac": AudioFormat.AAC,
198
+ "flac": AudioFormat.FLAC,
199
+ "wav": AudioFormat.WAV,
200
+ "pcm": AudioFormat.PCM,
201
+ }
202
+ return format_map[format_str.lower()]
203
+
204
+
205
+ def handle_long_text(args, text: str, voice: Voice, audio_format: AudioFormat, speed: float) -> None:
206
+ """Handle long text by splitting it into chunks and generating multiple files."""
207
+ import os
208
+
209
+ # Create client
210
+ try:
211
+ client = TTSClient(
212
+ base_url=args.url,
213
+ api_key=args.api_key,
214
+ timeout=args.timeout,
215
+ max_retries=args.retries
216
+ )
217
+
218
+ # Use the new long text method
219
+ responses = client.generate_speech_long_text(
220
+ text=text,
221
+ voice=voice,
222
+ response_format=audio_format,
223
+ speed=speed,
224
+ max_length=args.max_length,
225
+ preserve_words=True
226
+ )
227
+
228
+ if not responses:
229
+ print("Error: No valid text chunks found after processing.", file=sys.stderr)
230
+ sys.exit(1)
231
+
232
+ print(f"Generated {len(responses)} audio chunks")
233
+
234
+ # Save each response to a file
235
+ base_name, ext = os.path.splitext(args.output)
236
+
237
+ for i, response in enumerate(responses, 1):
238
+ if args.verbose:
239
+ print(f"Saving chunk {i}/{len(responses)}...")
240
+
241
+ # Generate filename for this chunk
242
+ if len(responses) == 1:
243
+ output_file = args.output
244
+ else:
245
+ output_file = f"{base_name}_part{i:03d}{ext}"
246
+
247
+ # Save to file
248
+ with open(output_file, 'wb') as f:
249
+ f.write(response.audio_data)
250
+
251
+ print(f"Generated: {output_file}")
252
+
253
+ if len(responses) > 1:
254
+ print(f"\nGenerated {len(responses)} audio files from long text.")
255
+ print(f"Files: {base_name}_part001{ext} to {base_name}_part{len(responses):03d}{ext}")
256
+
257
+ except Exception as e:
258
+ print(f"Error processing long text: {e}", file=sys.stderr)
259
+ if args.verbose:
260
+ import traceback
261
+ traceback.print_exc()
262
+ sys.exit(1)
263
+
264
+
265
+ def main() -> None:
266
+ """Main CLI entry point."""
267
+ parser = create_parser()
268
+ args = parser.parse_args()
269
+
270
+ # Get text input
271
+ if args.text:
272
+ text = args.text
273
+ else:
274
+ text = read_text_file(args.text_file)
275
+
276
+ if not text:
277
+ print("Error: No text provided.", file=sys.stderr)
278
+ sys.exit(1)
279
+
280
+ # Validate parameters
281
+ speed = validate_speed(args.speed)
282
+ voice = get_voice_enum(args.voice)
283
+ audio_format = get_format_enum(args.format)
284
+
285
+ # Create output directory if needed
286
+ output_path = Path(args.output)
287
+ output_path.parent.mkdir(parents=True, exist_ok=True)
288
+
289
+ # Check text length and handle accordingly
290
+ text_length = len(text)
291
+ validate_length = not args.no_length_validation
292
+
293
+ if args.verbose:
294
+ print(f"Text: {text[:50]}{'...' if len(text) > 50 else ''}")
295
+ print(f"Text length: {text_length} characters")
296
+ print(f"Max length: {args.max_length}")
297
+ print(f"Length validation: {'enabled' if validate_length else 'disabled'}")
298
+ print(f"Voice: {args.voice}")
299
+ print(f"Format: {args.format}")
300
+ print(f"Speed: {speed}")
301
+ print(f"URL: {args.url}")
302
+ print(f"Output: {args.output}")
303
+ print()
304
+
305
+ # Handle long text
306
+ if text_length > args.max_length:
307
+ if args.split_long_text:
308
+ print(f"Text is {text_length} characters, splitting into chunks...")
309
+ return handle_long_text(args, text, voice, audio_format, speed)
310
+ elif validate_length:
311
+ print(f"Error: Text is too long ({text_length} characters). "
312
+ f"Maximum allowed is {args.max_length} characters.", file=sys.stderr)
313
+ print("Use --split-long-text to automatically split the text, "
314
+ "or --no-length-validation to disable this check.", file=sys.stderr)
315
+ sys.exit(1)
316
+
317
+ # Create client
318
+ try:
319
+ client = TTSClient(
320
+ base_url=args.url,
321
+ api_key=args.api_key,
322
+ timeout=args.timeout,
323
+ max_retries=args.retries
324
+ )
325
+
326
+ if args.verbose:
327
+ print("Generating speech...")
328
+
329
+ # Generate speech
330
+ response = client.generate_speech(
331
+ text=text,
332
+ voice=voice,
333
+ response_format=audio_format,
334
+ speed=speed,
335
+ max_length=args.max_length,
336
+ validate_length=validate_length
337
+ )
338
+
339
+ # Save to file
340
+ with open(args.output, 'wb') as f:
341
+ f.write(response.audio_data)
342
+
343
+ print(f"Speech generated successfully: {args.output}")
344
+
345
+ except NetworkException as e:
346
+ print(f"Network error: {e}", file=sys.stderr)
347
+ sys.exit(1)
348
+ except APIException as e:
349
+ print(f"API error: {e}", file=sys.stderr)
350
+ sys.exit(1)
351
+ except TTSException as e:
352
+ print(f"TTS error: {e}", file=sys.stderr)
353
+ sys.exit(1)
354
+ except Exception as e:
355
+ print(f"Unexpected error: {e}", file=sys.stderr)
356
+ if args.verbose:
357
+ import traceback
358
+ traceback.print_exc()
359
+ sys.exit(1)
360
+
361
+
362
+ if __name__ == "__main__":
363
+ main()
ttsfm/client.py CHANGED
@@ -1,481 +1,530 @@
1
- """
2
- Main TTS client implementation.
3
-
4
- This module provides the primary TTSClient class for synchronous
5
- text-to-speech generation with OpenAI-compatible API.
6
- """
7
-
8
- import json
9
- import time
10
- import uuid
11
- import logging
12
- from typing import Optional, Dict, Any, Union, List
13
- from urllib.parse import urljoin
14
-
15
- import requests
16
- from requests.adapters import HTTPAdapter
17
- from urllib3.util.retry import Retry
18
-
19
- from .models import (
20
- TTSRequest, TTSResponse, Voice, AudioFormat,
21
- get_content_type, get_format_from_content_type
22
- )
23
- from .exceptions import (
24
- TTSException, APIException, NetworkException, ValidationException,
25
- create_exception_from_response
26
- )
27
- from .utils import (
28
- get_realistic_headers, sanitize_text, validate_url, build_url,
29
- exponential_backoff, estimate_audio_duration, format_file_size,
30
- validate_text_length, split_text_by_length
31
- )
32
-
33
-
34
- logger = logging.getLogger(__name__)
35
-
36
-
37
- class TTSClient:
38
- """
39
- Synchronous TTS client for text-to-speech generation.
40
-
41
- This client provides a simple interface for generating speech from text
42
- using OpenAI-compatible TTS services.
43
-
44
- Attributes:
45
- base_url: Base URL for the TTS service
46
- api_key: API key for authentication (if required)
47
- timeout: Request timeout in seconds
48
- max_retries: Maximum number of retry attempts
49
- verify_ssl: Whether to verify SSL certificates
50
- """
51
-
52
- def __init__(
53
- self,
54
- base_url: str = "https://www.openai.fm",
55
- api_key: Optional[str] = None,
56
- timeout: float = 30.0,
57
- max_retries: int = 3,
58
- verify_ssl: bool = True,
59
- preferred_format: Optional[AudioFormat] = None,
60
- **kwargs
61
- ):
62
- """
63
- Initialize the TTS client.
64
-
65
- Args:
66
- base_url: Base URL for the TTS service
67
- api_key: API key for authentication
68
- timeout: Request timeout in seconds
69
- max_retries: Maximum retry attempts
70
- verify_ssl: Whether to verify SSL certificates
71
- preferred_format: Preferred audio format (affects header selection)
72
- **kwargs: Additional configuration options
73
- """
74
- self.base_url = base_url.rstrip('/')
75
- self.api_key = api_key
76
- self.timeout = timeout
77
- self.max_retries = max_retries
78
- self.verify_ssl = verify_ssl
79
- self.preferred_format = preferred_format or AudioFormat.WAV
80
-
81
- # Validate base URL
82
- if not validate_url(self.base_url):
83
- raise ValidationException(f"Invalid base URL: {self.base_url}")
84
-
85
- # Setup HTTP session with retry strategy
86
- self.session = requests.Session()
87
-
88
- # Configure retry strategy
89
- retry_strategy = Retry(
90
- total=max_retries,
91
- status_forcelist=[429, 500, 502, 503, 504],
92
- allowed_methods=["HEAD", "GET", "POST"], # Updated parameter name
93
- backoff_factor=1
94
- )
95
-
96
- adapter = HTTPAdapter(max_retries=retry_strategy)
97
- self.session.mount("http://", adapter)
98
- self.session.mount("https://", adapter)
99
-
100
- # Set default headers
101
- self.session.headers.update(get_realistic_headers())
102
-
103
- if self.api_key:
104
- self.session.headers["Authorization"] = f"Bearer {self.api_key}"
105
-
106
- logger.info(f"Initialized TTS client with base URL: {self.base_url}")
107
-
108
- def _get_headers_for_format(self, requested_format: AudioFormat) -> Dict[str, str]:
109
- """
110
- Get appropriate headers to get the desired format from openai.fm.
111
-
112
- Based on testing, openai.fm returns:
113
- - MP3: When using simple/minimal headers
114
- - WAV: When using full Chrome security headers
115
-
116
- Args:
117
- requested_format: The desired audio format
118
-
119
- Returns:
120
- Dict[str, str]: HTTP headers optimized for the requested format
121
- """
122
- from .models import get_supported_format
123
-
124
- # Map requested format to supported format
125
- target_format = get_supported_format(requested_format)
126
-
127
- if target_format == AudioFormat.MP3:
128
- # Use minimal headers to get MP3 response
129
- return {
130
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
131
- 'Accept': 'audio/*,*/*;q=0.9'
132
- }
133
- else:
134
- # Use full realistic headers to get WAV response
135
- # This works for WAV, OPUS, AAC, FLAC, PCM formats
136
- return get_realistic_headers()
137
-
138
- def generate_speech(
139
- self,
140
- text: str,
141
- voice: Union[Voice, str] = Voice.ALLOY,
142
- response_format: Union[AudioFormat, str] = AudioFormat.MP3,
143
- instructions: Optional[str] = None,
144
- max_length: int = 4096,
145
- validate_length: bool = True,
146
- **kwargs
147
- ) -> TTSResponse:
148
- """
149
- Generate speech from text.
150
-
151
- Args:
152
- text: Text to convert to speech
153
- voice: Voice to use for generation
154
- response_format: Audio format for output
155
- instructions: Optional instructions for voice modulation
156
- max_length: Maximum allowed text length in characters (default: 4096)
157
- validate_length: Whether to validate text length (default: True)
158
- **kwargs: Additional parameters
159
-
160
- Returns:
161
- TTSResponse: Generated audio response
162
-
163
- Raises:
164
- TTSException: If generation fails
165
- ValueError: If text exceeds max_length and validate_length is True
166
- """
167
- # Create and validate request
168
- request = TTSRequest(
169
- input=sanitize_text(text),
170
- voice=voice,
171
- response_format=response_format,
172
- instructions=instructions,
173
- max_length=max_length,
174
- validate_length=validate_length,
175
- **kwargs
176
- )
177
-
178
- return self._make_request(request)
179
-
180
- def generate_speech_from_request(self, request: TTSRequest) -> TTSResponse:
181
- """
182
- Generate speech from a TTSRequest object.
183
-
184
- Args:
185
- request: TTS request object
186
-
187
- Returns:
188
- TTSResponse: Generated audio response
189
- """
190
- return self._make_request(request)
191
-
192
- def generate_speech_batch(
193
- self,
194
- text: str,
195
- voice: Union[Voice, str] = Voice.ALLOY,
196
- response_format: Union[AudioFormat, str] = AudioFormat.MP3,
197
- instructions: Optional[str] = None,
198
- max_length: int = 4096,
199
- preserve_words: bool = True,
200
- **kwargs
201
- ) -> List[TTSResponse]:
202
- """
203
- Generate speech from long text by splitting it into chunks.
204
-
205
- This method automatically splits text that exceeds max_length into
206
- smaller chunks and generates speech for each chunk separately.
207
-
208
- Args:
209
- text: Text to convert to speech
210
- voice: Voice to use for generation
211
- response_format: Audio format for output
212
- instructions: Optional instructions for voice modulation
213
- max_length: Maximum length per chunk (default: 4096)
214
- preserve_words: Whether to avoid splitting words (default: True)
215
- **kwargs: Additional parameters
216
-
217
- Returns:
218
- List[TTSResponse]: List of generated audio responses
219
-
220
- Raises:
221
- TTSException: If generation fails for any chunk
222
- """
223
-
224
- # Sanitize text first
225
- clean_text = sanitize_text(text)
226
-
227
- # Split text into chunks
228
- chunks = split_text_by_length(clean_text, max_length, preserve_words)
229
-
230
- if not chunks:
231
- raise ValueError("No valid text chunks found after processing")
232
-
233
- responses = []
234
-
235
- for i, chunk in enumerate(chunks):
236
- logger.info(f"Processing chunk {i+1}/{len(chunks)} ({len(chunk)} characters)")
237
-
238
- # Create request for this chunk (disable length validation since we already split)
239
- request = TTSRequest(
240
- input=chunk,
241
- voice=voice,
242
- response_format=response_format,
243
- instructions=instructions,
244
- max_length=max_length,
245
- validate_length=False, # We already split the text
246
- **kwargs
247
- )
248
-
249
- response = self._make_request(request)
250
- responses.append(response)
251
-
252
- return responses
253
-
254
- def _make_request(self, request: TTSRequest) -> TTSResponse:
255
- """
256
- Make the actual HTTP request to the openai.fm TTS service.
257
-
258
- Args:
259
- request: TTS request object
260
-
261
- Returns:
262
- TTSResponse: Generated audio response
263
-
264
- Raises:
265
- TTSException: If request fails
266
- """
267
- url = build_url(self.base_url, "api/generate")
268
-
269
- # Prepare form data for openai.fm API
270
- form_data = {
271
- 'input': request.input,
272
- 'voice': request.voice.value,
273
- 'generation': str(uuid.uuid4()),
274
- 'response_format': request.response_format.value if hasattr(request.response_format, 'value') else str(request.response_format)
275
- }
276
-
277
- # Add prompt/instructions if provided
278
- if request.instructions:
279
- form_data['prompt'] = request.instructions
280
- else:
281
- # Default prompt for better quality
282
- form_data['prompt'] = (
283
- "Affect/personality: Natural and clear\n\n"
284
- "Tone: Friendly and professional, creating a pleasant listening experience.\n\n"
285
- "Pronunciation: Clear, articulate, and steady, ensuring each word is easily understood "
286
- "while maintaining a natural, conversational flow.\n\n"
287
- "Pause: Brief, purposeful pauses between sentences to allow time for the listener "
288
- "to process the information.\n\n"
289
- "Emotion: Warm and engaging, conveying the intended message effectively."
290
- )
291
-
292
- # Get optimized headers for the requested format
293
- # Convert string format to AudioFormat enum if needed
294
- requested_format = request.response_format
295
- if isinstance(requested_format, str):
296
- try:
297
- requested_format = AudioFormat(requested_format.lower())
298
- except ValueError:
299
- requested_format = AudioFormat.WAV # Default to WAV for unknown formats
300
-
301
- format_headers = self._get_headers_for_format(requested_format)
302
-
303
- logger.info(f"Generating speech for text: '{request.input[:50]}...' with voice: {request.voice}")
304
- logger.debug(f"Using headers optimized for {requested_format.value} format")
305
-
306
- # Make request with retries
307
- for attempt in range(self.max_retries + 1):
308
- try:
309
- # Add random delay for rate limiting (except first attempt)
310
- if attempt > 0:
311
- delay = exponential_backoff(attempt - 1)
312
- logger.info(f"Retrying request after {delay:.2f}s (attempt {attempt + 1})")
313
- time.sleep(delay)
314
-
315
- # Use multipart form data as required by openai.fm
316
- response = self.session.post(
317
- url,
318
- data=form_data,
319
- headers=format_headers,
320
- timeout=self.timeout,
321
- verify=self.verify_ssl
322
- )
323
-
324
- # Handle different response types
325
- if response.status_code == 200:
326
- return self._process_openai_fm_response(response, request)
327
- else:
328
- # Try to parse error response
329
- try:
330
- error_data = response.json()
331
- except (json.JSONDecodeError, ValueError):
332
- error_data = {"error": {"message": response.text or "Unknown error"}}
333
-
334
- # Create appropriate exception
335
- exception = create_exception_from_response(
336
- response.status_code,
337
- error_data,
338
- f"TTS request failed with status {response.status_code}"
339
- )
340
-
341
- # Don't retry for certain errors
342
- if response.status_code in [400, 401, 403, 404]:
343
- raise exception
344
-
345
- # For retryable errors, continue to next attempt
346
- if attempt == self.max_retries:
347
- raise exception
348
-
349
- logger.warning(f"Request failed with status {response.status_code}, retrying...")
350
- continue
351
-
352
- except requests.exceptions.Timeout:
353
- if attempt == self.max_retries:
354
- raise NetworkException(
355
- f"Request timed out after {self.timeout}s",
356
- timeout=self.timeout,
357
- retry_count=attempt
358
- )
359
- logger.warning(f"Request timed out, retrying...")
360
- continue
361
-
362
- except requests.exceptions.ConnectionError as e:
363
- if attempt == self.max_retries:
364
- raise NetworkException(
365
- f"Connection error: {str(e)}",
366
- retry_count=attempt
367
- )
368
- logger.warning(f"Connection error, retrying...")
369
- continue
370
-
371
- except requests.exceptions.RequestException as e:
372
- if attempt == self.max_retries:
373
- raise NetworkException(
374
- f"Request error: {str(e)}",
375
- retry_count=attempt
376
- )
377
- logger.warning(f"Request error, retrying...")
378
- continue
379
-
380
- # This should never be reached, but just in case
381
- raise TTSException("Maximum retries exceeded")
382
-
383
- def _process_openai_fm_response(self, response: requests.Response, request: TTSRequest) -> TTSResponse:
384
- """
385
- Process a successful response from the openai.fm TTS service.
386
-
387
- Args:
388
- response: HTTP response object
389
- request: Original TTS request
390
-
391
- Returns:
392
- TTSResponse: Processed response object
393
- """
394
- # Get content type from response headers
395
- content_type = response.headers.get("content-type", "audio/mpeg")
396
-
397
- # Get audio data
398
- audio_data = response.content
399
-
400
- if not audio_data:
401
- raise APIException("Received empty audio data from openai.fm")
402
-
403
- # Determine format from content type
404
- if "audio/mpeg" in content_type or "audio/mp3" in content_type:
405
- actual_format = AudioFormat.MP3
406
- elif "audio/wav" in content_type:
407
- actual_format = AudioFormat.WAV
408
- elif "audio/opus" in content_type:
409
- actual_format = AudioFormat.OPUS
410
- elif "audio/aac" in content_type:
411
- actual_format = AudioFormat.AAC
412
- elif "audio/flac" in content_type:
413
- actual_format = AudioFormat.FLAC
414
- else:
415
- # Default to MP3 for openai.fm
416
- actual_format = AudioFormat.MP3
417
-
418
- # Estimate duration based on text length (rough approximation)
419
- estimated_duration = estimate_audio_duration(request.input)
420
-
421
- # Check if returned format differs from requested format
422
- requested_format = request.response_format
423
- if isinstance(requested_format, str):
424
- try:
425
- requested_format = AudioFormat(requested_format.lower())
426
- except ValueError:
427
- requested_format = AudioFormat.WAV # Default fallback
428
-
429
- # Import here to avoid circular imports
430
- from .models import get_supported_format, maps_to_wav
431
-
432
- # Check if format differs from request
433
- if actual_format != requested_format:
434
- if maps_to_wav(requested_format.value) and actual_format.value == "wav":
435
- logger.debug(
436
- f"Format '{requested_format.value}' requested, returning WAV format."
437
- )
438
- else:
439
- logger.warning(
440
- f"Requested format '{requested_format.value}' but received '{actual_format.value}' "
441
- f"from service."
442
- )
443
-
444
- # Create response object
445
- tts_response = TTSResponse(
446
- audio_data=audio_data,
447
- content_type=content_type,
448
- format=actual_format,
449
- size=len(audio_data),
450
- duration=estimated_duration,
451
- metadata={
452
- "response_headers": dict(response.headers),
453
- "status_code": response.status_code,
454
- "url": str(response.url),
455
- "service": "openai.fm",
456
- "voice": request.voice.value,
457
- "original_text": request.input[:100] + "..." if len(request.input) > 100 else request.input,
458
- "requested_format": requested_format.value,
459
- "actual_format": actual_format.value
460
- }
461
- )
462
-
463
- logger.info(
464
- f"Successfully generated {format_file_size(len(audio_data))} "
465
- f"of {actual_format.value.upper()} audio from openai.fm using voice '{request.voice.value}'"
466
- )
467
-
468
- return tts_response
469
-
470
- def close(self):
471
- """Close the HTTP session."""
472
- if hasattr(self, 'session'):
473
- self.session.close()
474
-
475
- def __enter__(self):
476
- """Context manager entry."""
477
- return self
478
-
479
- def __exit__(self, exc_type, exc_val, exc_tb):
480
- """Context manager exit."""
481
- self.close()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Main TTS client implementation.
3
+
4
+ This module provides the primary TTSClient class for synchronous
5
+ text-to-speech generation with OpenAI-compatible API.
6
+ """
7
+
8
+ import json
9
+ import time
10
+ import uuid
11
+ import logging
12
+ from typing import Optional, Dict, Any, Union, List
13
+ from urllib.parse import urljoin
14
+
15
+ import requests
16
+ from requests.adapters import HTTPAdapter
17
+ from urllib3.util.retry import Retry
18
+
19
+ from .models import (
20
+ TTSRequest, TTSResponse, Voice, AudioFormat,
21
+ get_content_type, get_format_from_content_type
22
+ )
23
+ from .exceptions import (
24
+ TTSException, APIException, NetworkException, ValidationException,
25
+ create_exception_from_response
26
+ )
27
+ from .utils import (
28
+ get_realistic_headers, sanitize_text, validate_url, build_url,
29
+ exponential_backoff, estimate_audio_duration, format_file_size,
30
+ validate_text_length, split_text_by_length
31
+ )
32
+
33
+
34
+ logger = logging.getLogger(__name__)
35
+
36
+
37
+ class TTSClient:
38
+ """
39
+ Synchronous TTS client for text-to-speech generation.
40
+
41
+ This client provides a simple interface for generating speech from text
42
+ using OpenAI-compatible TTS services.
43
+
44
+ Attributes:
45
+ base_url: Base URL for the TTS service
46
+ api_key: API key for authentication (if required)
47
+ timeout: Request timeout in seconds
48
+ max_retries: Maximum number of retry attempts
49
+ verify_ssl: Whether to verify SSL certificates
50
+ """
51
+
52
+ def __init__(
53
+ self,
54
+ base_url: str = "https://www.openai.fm",
55
+ api_key: Optional[str] = None,
56
+ timeout: float = 30.0,
57
+ max_retries: int = 3,
58
+ verify_ssl: bool = True,
59
+ preferred_format: Optional[AudioFormat] = None,
60
+ **kwargs
61
+ ):
62
+ """
63
+ Initialize the TTS client.
64
+
65
+ Args:
66
+ base_url: Base URL for the TTS service
67
+ api_key: API key for authentication
68
+ timeout: Request timeout in seconds
69
+ max_retries: Maximum retry attempts
70
+ verify_ssl: Whether to verify SSL certificates
71
+ preferred_format: Preferred audio format (affects header selection)
72
+ **kwargs: Additional configuration options
73
+ """
74
+ self.base_url = base_url.rstrip('/')
75
+ self.api_key = api_key
76
+ self.timeout = timeout
77
+ self.max_retries = max_retries
78
+ self.verify_ssl = verify_ssl
79
+ self.preferred_format = preferred_format or AudioFormat.WAV
80
+
81
+ # Validate base URL
82
+ if not validate_url(self.base_url):
83
+ raise ValidationException(f"Invalid base URL: {self.base_url}")
84
+
85
+ # Setup HTTP session with retry strategy
86
+ self.session = requests.Session()
87
+
88
+ # Configure retry strategy
89
+ retry_strategy = Retry(
90
+ total=max_retries,
91
+ status_forcelist=[429, 500, 502, 503, 504],
92
+ allowed_methods=["HEAD", "GET", "POST"], # Updated parameter name
93
+ backoff_factor=1
94
+ )
95
+
96
+ adapter = HTTPAdapter(
97
+ max_retries=retry_strategy,
98
+ pool_connections=10,
99
+ pool_maxsize=10
100
+ )
101
+ self.session.mount("http://", adapter)
102
+ self.session.mount("https://", adapter)
103
+
104
+ # Set default headers
105
+ self.session.headers.update(get_realistic_headers())
106
+
107
+ if self.api_key:
108
+ self.session.headers["Authorization"] = f"Bearer {self.api_key}"
109
+
110
+ logger.info(f"Initialized TTS client with base URL: {self.base_url}")
111
+
112
+ def _get_headers_for_format(self, requested_format: AudioFormat) -> Dict[str, str]:
113
+ """
114
+ Get appropriate headers to get the desired format from openai.fm.
115
+
116
+ Based on testing, openai.fm returns:
117
+ - MP3: When using no headers or very minimal headers
118
+ - WAV: When using more complex headers with specific Accept values
119
+
120
+ Args:
121
+ requested_format: The desired audio format
122
+
123
+ Returns:
124
+ Dict[str, str]: HTTP headers optimized for the requested format
125
+ """
126
+ from .models import get_supported_format
127
+
128
+ # Map requested format to supported format
129
+ target_format = get_supported_format(requested_format)
130
+
131
+ if target_format == AudioFormat.MP3:
132
+ # Use minimal headers to reliably get MP3 response
133
+ # Testing shows that no headers or very basic headers work best for MP3
134
+ return {
135
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
136
+ }
137
+ else:
138
+ # Use more complex headers to get WAV response
139
+ # This works for WAV, OPUS, AAC, FLAC, PCM formats
140
+ return {
141
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
142
+ 'Accept': 'audio/*,*/*;q=0.9'
143
+ }
144
+
145
+ def generate_speech(
146
+ self,
147
+ text: str,
148
+ voice: Union[Voice, str] = Voice.ALLOY,
149
+ response_format: Union[AudioFormat, str] = AudioFormat.MP3,
150
+ instructions: Optional[str] = None,
151
+ max_length: int = 4096,
152
+ validate_length: bool = True,
153
+ **kwargs
154
+ ) -> TTSResponse:
155
+ """
156
+ Generate speech from text.
157
+
158
+ Args:
159
+ text: Text to convert to speech
160
+ voice: Voice to use for generation
161
+ response_format: Audio format for output
162
+ instructions: Optional instructions for voice modulation
163
+ max_length: Maximum allowed text length in characters (default: 4096)
164
+ validate_length: Whether to validate text length (default: True)
165
+ **kwargs: Additional parameters
166
+
167
+ Returns:
168
+ TTSResponse: Generated audio response
169
+
170
+ Raises:
171
+ TTSException: If generation fails
172
+ ValueError: If text exceeds max_length and validate_length is True
173
+ """
174
+ # Create and validate request
175
+ request = TTSRequest(
176
+ input=sanitize_text(text),
177
+ voice=voice,
178
+ response_format=response_format,
179
+ instructions=instructions,
180
+ max_length=max_length,
181
+ validate_length=validate_length,
182
+ **kwargs
183
+ )
184
+
185
+ return self._make_request(request)
186
+
187
+ def generate_speech_from_request(self, request: TTSRequest) -> TTSResponse:
188
+ """
189
+ Generate speech from a TTSRequest object.
190
+
191
+ Args:
192
+ request: TTS request object
193
+
194
+ Returns:
195
+ TTSResponse: Generated audio response
196
+ """
197
+ return self._make_request(request)
198
+
199
+ def generate_speech_batch(
200
+ self,
201
+ text: str,
202
+ voice: Union[Voice, str] = Voice.ALLOY,
203
+ response_format: Union[AudioFormat, str] = AudioFormat.MP3,
204
+ instructions: Optional[str] = None,
205
+ max_length: int = 4096,
206
+ preserve_words: bool = True,
207
+ **kwargs
208
+ ) -> List[TTSResponse]:
209
+ """
210
+ Generate speech from long text by splitting it into chunks.
211
+
212
+ This method automatically splits text that exceeds max_length into
213
+ smaller chunks and generates speech for each chunk separately.
214
+
215
+ Args:
216
+ text: Text to convert to speech
217
+ voice: Voice to use for generation
218
+ response_format: Audio format for output
219
+ instructions: Optional instructions for voice modulation
220
+ max_length: Maximum length per chunk (default: 4096)
221
+ preserve_words: Whether to avoid splitting words (default: True)
222
+ **kwargs: Additional parameters
223
+
224
+ Returns:
225
+ List[TTSResponse]: List of generated audio responses
226
+
227
+ Raises:
228
+ TTSException: If generation fails for any chunk
229
+ """
230
+
231
+ # Sanitize text first
232
+ clean_text = sanitize_text(text)
233
+
234
+ # Split text into chunks
235
+ chunks = split_text_by_length(clean_text, max_length, preserve_words)
236
+
237
+ if not chunks:
238
+ raise ValueError("No valid text chunks found after processing")
239
+
240
+ responses = []
241
+
242
+ for i, chunk in enumerate(chunks):
243
+ logger.info(f"Processing chunk {i+1}/{len(chunks)} ({len(chunk)} characters)")
244
+
245
+ # Create request for this chunk (disable length validation since we already split)
246
+ request = TTSRequest(
247
+ input=chunk,
248
+ voice=voice,
249
+ response_format=response_format,
250
+ instructions=instructions,
251
+ max_length=max_length,
252
+ validate_length=False, # We already split the text
253
+ **kwargs
254
+ )
255
+
256
+ response = self._make_request(request)
257
+ responses.append(response)
258
+
259
+ return responses
260
+
261
+ def generate_speech_long_text(
262
+ self,
263
+ text: str,
264
+ voice: Union[Voice, str] = Voice.ALLOY,
265
+ response_format: Union[AudioFormat, str] = AudioFormat.MP3,
266
+ instructions: Optional[str] = None,
267
+ max_length: int = 4096,
268
+ preserve_words: bool = True,
269
+ **kwargs
270
+ ) -> List[TTSResponse]:
271
+ """
272
+ Generate speech from long text by splitting it into chunks.
273
+
274
+ This is an alias for generate_speech_batch for consistency with AsyncTTSClient.
275
+ Automatically splits text that exceeds max_length into smaller chunks
276
+ and generates speech for each chunk separately.
277
+
278
+ Args:
279
+ text: Text to convert to speech
280
+ voice: Voice to use for generation
281
+ response_format: Audio format for output
282
+ instructions: Optional instructions for voice modulation
283
+ max_length: Maximum length per chunk (default: 4096)
284
+ preserve_words: Whether to avoid splitting words (default: True)
285
+ **kwargs: Additional parameters
286
+
287
+ Returns:
288
+ List[TTSResponse]: List of generated audio responses
289
+
290
+ Raises:
291
+ TTSException: If generation fails for any chunk
292
+ """
293
+ return self.generate_speech_batch(
294
+ text=text,
295
+ voice=voice,
296
+ response_format=response_format,
297
+ instructions=instructions,
298
+ max_length=max_length,
299
+ preserve_words=preserve_words,
300
+ **kwargs
301
+ )
302
+
303
+ def _make_request(self, request: TTSRequest) -> TTSResponse:
304
+ """
305
+ Make the actual HTTP request to the openai.fm TTS service.
306
+
307
+ Args:
308
+ request: TTS request object
309
+
310
+ Returns:
311
+ TTSResponse: Generated audio response
312
+
313
+ Raises:
314
+ TTSException: If request fails
315
+ """
316
+ url = build_url(self.base_url, "api/generate")
317
+
318
+ # Prepare form data for openai.fm API
319
+ form_data = {
320
+ 'input': request.input,
321
+ 'voice': request.voice.value,
322
+ 'generation': str(uuid.uuid4()),
323
+ 'response_format': request.response_format.value if hasattr(request.response_format, 'value') else str(request.response_format)
324
+ }
325
+
326
+ # Add prompt/instructions if provided
327
+ if request.instructions:
328
+ form_data['prompt'] = request.instructions
329
+ else:
330
+ # Default prompt for better quality
331
+ form_data['prompt'] = (
332
+ "Affect/personality: Natural and clear\n\n"
333
+ "Tone: Friendly and professional, creating a pleasant listening experience.\n\n"
334
+ "Pronunciation: Clear, articulate, and steady, ensuring each word is easily understood "
335
+ "while maintaining a natural, conversational flow.\n\n"
336
+ "Pause: Brief, purposeful pauses between sentences to allow time for the listener "
337
+ "to process the information.\n\n"
338
+ "Emotion: Warm and engaging, conveying the intended message effectively."
339
+ )
340
+
341
+ # Get optimized headers for the requested format
342
+ # Convert string format to AudioFormat enum if needed
343
+ requested_format = request.response_format
344
+ if isinstance(requested_format, str):
345
+ try:
346
+ requested_format = AudioFormat(requested_format.lower())
347
+ except ValueError:
348
+ requested_format = AudioFormat.WAV # Default to WAV for unknown formats
349
+
350
+ format_headers = self._get_headers_for_format(requested_format)
351
+
352
+ logger.info(f"Generating speech for text: '{request.input[:50]}...' with voice: {request.voice}")
353
+ logger.debug(f"Using headers optimized for {requested_format.value} format")
354
+
355
+ # Make request with retries
356
+ for attempt in range(self.max_retries + 1):
357
+ try:
358
+ # Add random delay for rate limiting (except first attempt)
359
+ if attempt > 0:
360
+ delay = exponential_backoff(attempt - 1)
361
+ logger.info(f"Retrying request after {delay:.2f}s (attempt {attempt + 1})")
362
+ time.sleep(delay)
363
+
364
+ # Use multipart form data as required by openai.fm
365
+ response = self.session.post(
366
+ url,
367
+ data=form_data,
368
+ headers=format_headers,
369
+ timeout=self.timeout,
370
+ verify=self.verify_ssl
371
+ )
372
+
373
+ # Handle different response types
374
+ if response.status_code == 200:
375
+ return self._process_openai_fm_response(response, request)
376
+ else:
377
+ # Try to parse error response
378
+ try:
379
+ error_data = response.json()
380
+ except (json.JSONDecodeError, ValueError):
381
+ error_data = {"error": {"message": response.text or "Unknown error"}}
382
+
383
+ # Create appropriate exception
384
+ exception = create_exception_from_response(
385
+ response.status_code,
386
+ error_data,
387
+ f"TTS request failed with status {response.status_code}"
388
+ )
389
+
390
+ # Don't retry for certain errors
391
+ if response.status_code in [400, 401, 403, 404]:
392
+ raise exception
393
+
394
+ # For retryable errors, continue to next attempt
395
+ if attempt == self.max_retries:
396
+ raise exception
397
+
398
+ logger.warning(f"Request failed with status {response.status_code}, retrying...")
399
+ continue
400
+
401
+ except requests.exceptions.Timeout:
402
+ if attempt == self.max_retries:
403
+ raise NetworkException(
404
+ f"Request timed out after {self.timeout}s",
405
+ timeout=self.timeout,
406
+ retry_count=attempt
407
+ )
408
+ logger.warning(f"Request timed out, retrying...")
409
+ continue
410
+
411
+ except requests.exceptions.ConnectionError as e:
412
+ if attempt == self.max_retries:
413
+ raise NetworkException(
414
+ f"Connection error: {str(e)}",
415
+ retry_count=attempt
416
+ )
417
+ logger.warning(f"Connection error, retrying...")
418
+ continue
419
+
420
+ except requests.exceptions.RequestException as e:
421
+ if attempt == self.max_retries:
422
+ raise NetworkException(
423
+ f"Request error: {str(e)}",
424
+ retry_count=attempt
425
+ )
426
+ logger.warning(f"Request error, retrying...")
427
+ continue
428
+
429
+ # This should never be reached, but just in case
430
+ raise TTSException("Maximum retries exceeded")
431
+
432
+ def _process_openai_fm_response(self, response: requests.Response, request: TTSRequest) -> TTSResponse:
433
+ """
434
+ Process a successful response from the openai.fm TTS service.
435
+
436
+ Args:
437
+ response: HTTP response object
438
+ request: Original TTS request
439
+
440
+ Returns:
441
+ TTSResponse: Processed response object
442
+ """
443
+ # Get content type from response headers
444
+ content_type = response.headers.get("content-type", "audio/mpeg")
445
+
446
+ # Get audio data
447
+ audio_data = response.content
448
+
449
+ if not audio_data:
450
+ raise APIException("Received empty audio data from openai.fm")
451
+
452
+ # Determine format from content type
453
+ if "audio/mpeg" in content_type or "audio/mp3" in content_type:
454
+ actual_format = AudioFormat.MP3
455
+ elif "audio/wav" in content_type:
456
+ actual_format = AudioFormat.WAV
457
+ elif "audio/opus" in content_type:
458
+ actual_format = AudioFormat.OPUS
459
+ elif "audio/aac" in content_type:
460
+ actual_format = AudioFormat.AAC
461
+ elif "audio/flac" in content_type:
462
+ actual_format = AudioFormat.FLAC
463
+ else:
464
+ # Default to MP3 for openai.fm
465
+ actual_format = AudioFormat.MP3
466
+
467
+ # Estimate duration based on text length (rough approximation)
468
+ estimated_duration = estimate_audio_duration(request.input)
469
+
470
+ # Check if returned format differs from requested format
471
+ requested_format = request.response_format
472
+ if isinstance(requested_format, str):
473
+ try:
474
+ requested_format = AudioFormat(requested_format.lower())
475
+ except ValueError:
476
+ requested_format = AudioFormat.WAV # Default fallback
477
+
478
+ # Import here to avoid circular imports
479
+ from .models import get_supported_format, maps_to_wav
480
+
481
+ # Check if format differs from request
482
+ if actual_format != requested_format:
483
+ if maps_to_wav(requested_format.value) and actual_format.value == "wav":
484
+ logger.debug(
485
+ f"Format '{requested_format.value}' requested, returning WAV format."
486
+ )
487
+ else:
488
+ logger.warning(
489
+ f"Requested format '{requested_format.value}' but received '{actual_format.value}' "
490
+ f"from service."
491
+ )
492
+
493
+ # Create response object
494
+ tts_response = TTSResponse(
495
+ audio_data=audio_data,
496
+ content_type=content_type,
497
+ format=actual_format,
498
+ size=len(audio_data),
499
+ duration=estimated_duration,
500
+ metadata={
501
+ "response_headers": dict(response.headers),
502
+ "status_code": response.status_code,
503
+ "url": str(response.url),
504
+ "service": "openai.fm",
505
+ "voice": request.voice.value,
506
+ "original_text": request.input[:100] + "..." if len(request.input) > 100 else request.input,
507
+ "requested_format": requested_format.value,
508
+ "actual_format": actual_format.value
509
+ }
510
+ )
511
+
512
+ logger.info(
513
+ f"Successfully generated {format_file_size(len(audio_data))} "
514
+ f"of {actual_format.value.upper()} audio from openai.fm using voice '{request.voice.value}'"
515
+ )
516
+
517
+ return tts_response
518
+
519
+ def close(self):
520
+ """Close the HTTP session."""
521
+ if hasattr(self, 'session'):
522
+ self.session.close()
523
+
524
+ def __enter__(self):
525
+ """Context manager entry."""
526
+ return self
527
+
528
+ def __exit__(self, exc_type, exc_val, exc_tb):
529
+ """Context manager exit."""
530
+ self.close()
ttsfm/exceptions.py CHANGED
@@ -1,243 +1,243 @@
1
- """
2
- Exception classes for the TTSFM package.
3
-
4
- This module defines the exception hierarchy used throughout the package
5
- for consistent error handling and reporting.
6
- """
7
-
8
- from typing import Optional, Dict, Any
9
-
10
-
11
- class TTSException(Exception):
12
- """
13
- Base exception class for all TTSFM-related errors.
14
-
15
- Attributes:
16
- message: Human-readable error message
17
- code: Error code for programmatic handling
18
- details: Additional error details
19
- """
20
-
21
- def __init__(
22
- self,
23
- message: str,
24
- code: Optional[str] = None,
25
- details: Optional[Dict[str, Any]] = None
26
- ):
27
- super().__init__(message)
28
- self.message = message
29
- self.code = code or self.__class__.__name__
30
- self.details = details or {}
31
-
32
- def __str__(self) -> str:
33
- if self.code:
34
- return f"[{self.code}] {self.message}"
35
- return self.message
36
-
37
- def __repr__(self) -> str:
38
- return f"{self.__class__.__name__}(message='{self.message}', code='{self.code}')"
39
-
40
-
41
- class APIException(TTSException):
42
- """
43
- Exception raised for API-related errors.
44
-
45
- This includes HTTP errors, invalid responses, and server-side issues.
46
- """
47
-
48
- def __init__(
49
- self,
50
- message: str,
51
- status_code: Optional[int] = None,
52
- response_data: Optional[Dict[str, Any]] = None,
53
- **kwargs
54
- ):
55
- super().__init__(message, **kwargs)
56
- self.status_code = status_code
57
- self.response_data = response_data or {}
58
-
59
- def __str__(self) -> str:
60
- if self.status_code:
61
- return f"[HTTP {self.status_code}] {self.message}"
62
- return super().__str__()
63
-
64
-
65
- class NetworkException(TTSException):
66
- """
67
- Exception raised for network-related errors.
68
-
69
- This includes connection timeouts, DNS resolution failures, and other
70
- network connectivity issues.
71
- """
72
-
73
- def __init__(
74
- self,
75
- message: str,
76
- timeout: Optional[float] = None,
77
- retry_count: int = 0,
78
- **kwargs
79
- ):
80
- super().__init__(message, **kwargs)
81
- self.timeout = timeout
82
- self.retry_count = retry_count
83
-
84
-
85
- class ValidationException(TTSException):
86
- """
87
- Exception raised for input validation errors.
88
-
89
- This includes invalid parameters, missing required fields, and
90
- data format issues.
91
- """
92
-
93
- def __init__(
94
- self,
95
- message: str,
96
- field: Optional[str] = None,
97
- value: Optional[Any] = None,
98
- **kwargs
99
- ):
100
- super().__init__(message, **kwargs)
101
- self.field = field
102
- self.value = value
103
-
104
- def __str__(self) -> str:
105
- if self.field:
106
- return f"Validation error for '{self.field}': {self.message}"
107
- return f"Validation error: {self.message}"
108
-
109
-
110
- class RateLimitException(APIException):
111
- """
112
- Exception raised when API rate limits are exceeded.
113
-
114
- Attributes:
115
- retry_after: Seconds to wait before retrying (if provided by server)
116
- limit: Rate limit that was exceeded
117
- remaining: Remaining requests in current window
118
- """
119
-
120
- def __init__(
121
- self,
122
- message: str = "Rate limit exceeded",
123
- retry_after: Optional[int] = None,
124
- limit: Optional[int] = None,
125
- remaining: Optional[int] = None,
126
- **kwargs
127
- ):
128
- super().__init__(message, status_code=429, **kwargs)
129
- self.retry_after = retry_after
130
- self.limit = limit
131
- self.remaining = remaining
132
-
133
- def __str__(self) -> str:
134
- msg = super().__str__()
135
- if self.retry_after:
136
- msg += f" (retry after {self.retry_after}s)"
137
- return msg
138
-
139
-
140
- class AuthenticationException(APIException):
141
- """
142
- Exception raised for authentication and authorization errors.
143
-
144
- This includes invalid API keys, expired tokens, and insufficient
145
- permissions.
146
- """
147
-
148
- def __init__(
149
- self,
150
- message: str = "Authentication failed",
151
- **kwargs
152
- ):
153
- super().__init__(message, status_code=401, **kwargs)
154
-
155
-
156
- class ServiceUnavailableException(APIException):
157
- """
158
- Exception raised when the TTS service is temporarily unavailable.
159
-
160
- This includes server maintenance, overload conditions, and
161
- temporary service outages.
162
- """
163
-
164
- def __init__(
165
- self,
166
- message: str = "Service temporarily unavailable",
167
- retry_after: Optional[int] = None,
168
- **kwargs
169
- ):
170
- super().__init__(message, status_code=503, **kwargs)
171
- self.retry_after = retry_after
172
-
173
-
174
- class QuotaExceededException(APIException):
175
- """
176
- Exception raised when usage quotas are exceeded.
177
-
178
- This includes monthly limits, character limits, and other
179
- usage-based restrictions.
180
- """
181
-
182
- def __init__(
183
- self,
184
- message: str = "Usage quota exceeded",
185
- quota_type: Optional[str] = None,
186
- limit: Optional[int] = None,
187
- used: Optional[int] = None,
188
- **kwargs
189
- ):
190
- super().__init__(message, status_code=402, **kwargs)
191
- self.quota_type = quota_type
192
- self.limit = limit
193
- self.used = used
194
-
195
-
196
- class AudioProcessingException(TTSException):
197
- """
198
- Exception raised for audio processing errors.
199
-
200
- This includes format conversion issues, audio generation failures,
201
- and output processing problems.
202
- """
203
-
204
- def __init__(
205
- self,
206
- message: str,
207
- audio_format: Optional[str] = None,
208
- **kwargs
209
- ):
210
- super().__init__(message, **kwargs)
211
- self.audio_format = audio_format
212
-
213
-
214
- def create_exception_from_response(
215
- status_code: int,
216
- response_data: Dict[str, Any],
217
- default_message: str = "API request failed"
218
- ) -> APIException:
219
- """
220
- Create appropriate exception from API response.
221
-
222
- Args:
223
- status_code: HTTP status code
224
- response_data: Response data from API
225
- default_message: Default message if none in response
226
-
227
- Returns:
228
- APIException: Appropriate exception instance
229
- """
230
- message = response_data.get("error", {}).get("message", default_message)
231
-
232
- if status_code == 401:
233
- return AuthenticationException(message, response_data=response_data)
234
- elif status_code == 402:
235
- return QuotaExceededException(message, response_data=response_data)
236
- elif status_code == 429:
237
- retry_after = response_data.get("retry_after")
238
- return RateLimitException(message, retry_after=retry_after, response_data=response_data)
239
- elif status_code == 503:
240
- retry_after = response_data.get("retry_after")
241
- return ServiceUnavailableException(message, retry_after=retry_after, response_data=response_data)
242
- else:
243
- return APIException(message, status_code=status_code, response_data=response_data)
 
1
+ """
2
+ Exception classes for the TTSFM package.
3
+
4
+ This module defines the exception hierarchy used throughout the package
5
+ for consistent error handling and reporting.
6
+ """
7
+
8
+ from typing import Optional, Dict, Any
9
+
10
+
11
+ class TTSException(Exception):
12
+ """
13
+ Base exception class for all TTSFM-related errors.
14
+
15
+ Attributes:
16
+ message: Human-readable error message
17
+ code: Error code for programmatic handling
18
+ details: Additional error details
19
+ """
20
+
21
+ def __init__(
22
+ self,
23
+ message: str,
24
+ code: Optional[str] = None,
25
+ details: Optional[Dict[str, Any]] = None
26
+ ):
27
+ super().__init__(message)
28
+ self.message = message
29
+ self.code = code or self.__class__.__name__
30
+ self.details = details or {}
31
+
32
+ def __str__(self) -> str:
33
+ if self.code:
34
+ return f"[{self.code}] {self.message}"
35
+ return self.message
36
+
37
+ def __repr__(self) -> str:
38
+ return f"{self.__class__.__name__}(message='{self.message}', code='{self.code}')"
39
+
40
+
41
+ class APIException(TTSException):
42
+ """
43
+ Exception raised for API-related errors.
44
+
45
+ This includes HTTP errors, invalid responses, and server-side issues.
46
+ """
47
+
48
+ def __init__(
49
+ self,
50
+ message: str,
51
+ status_code: Optional[int] = None,
52
+ response_data: Optional[Dict[str, Any]] = None,
53
+ **kwargs
54
+ ):
55
+ super().__init__(message, **kwargs)
56
+ self.status_code = status_code
57
+ self.response_data = response_data or {}
58
+
59
+ def __str__(self) -> str:
60
+ if self.status_code:
61
+ return f"[HTTP {self.status_code}] {self.message}"
62
+ return super().__str__()
63
+
64
+
65
+ class NetworkException(TTSException):
66
+ """
67
+ Exception raised for network-related errors.
68
+
69
+ This includes connection timeouts, DNS resolution failures, and other
70
+ network connectivity issues.
71
+ """
72
+
73
+ def __init__(
74
+ self,
75
+ message: str,
76
+ timeout: Optional[float] = None,
77
+ retry_count: int = 0,
78
+ **kwargs
79
+ ):
80
+ super().__init__(message, **kwargs)
81
+ self.timeout = timeout
82
+ self.retry_count = retry_count
83
+
84
+
85
+ class ValidationException(TTSException):
86
+ """
87
+ Exception raised for input validation errors.
88
+
89
+ This includes invalid parameters, missing required fields, and
90
+ data format issues.
91
+ """
92
+
93
+ def __init__(
94
+ self,
95
+ message: str,
96
+ field: Optional[str] = None,
97
+ value: Optional[Any] = None,
98
+ **kwargs
99
+ ):
100
+ super().__init__(message, **kwargs)
101
+ self.field = field
102
+ self.value = value
103
+
104
+ def __str__(self) -> str:
105
+ if self.field:
106
+ return f"Validation error for '{self.field}': {self.message}"
107
+ return f"Validation error: {self.message}"
108
+
109
+
110
+ class RateLimitException(APIException):
111
+ """
112
+ Exception raised when API rate limits are exceeded.
113
+
114
+ Attributes:
115
+ retry_after: Seconds to wait before retrying (if provided by server)
116
+ limit: Rate limit that was exceeded
117
+ remaining: Remaining requests in current window
118
+ """
119
+
120
+ def __init__(
121
+ self,
122
+ message: str = "Rate limit exceeded",
123
+ retry_after: Optional[int] = None,
124
+ limit: Optional[int] = None,
125
+ remaining: Optional[int] = None,
126
+ **kwargs
127
+ ):
128
+ super().__init__(message, status_code=429, **kwargs)
129
+ self.retry_after = retry_after
130
+ self.limit = limit
131
+ self.remaining = remaining
132
+
133
+ def __str__(self) -> str:
134
+ msg = super().__str__()
135
+ if self.retry_after:
136
+ msg += f" (retry after {self.retry_after}s)"
137
+ return msg
138
+
139
+
140
+ class AuthenticationException(APIException):
141
+ """
142
+ Exception raised for authentication and authorization errors.
143
+
144
+ This includes invalid API keys, expired tokens, and insufficient
145
+ permissions.
146
+ """
147
+
148
+ def __init__(
149
+ self,
150
+ message: str = "Authentication failed",
151
+ **kwargs
152
+ ):
153
+ super().__init__(message, status_code=401, **kwargs)
154
+
155
+
156
+ class ServiceUnavailableException(APIException):
157
+ """
158
+ Exception raised when the TTS service is temporarily unavailable.
159
+
160
+ This includes server maintenance, overload conditions, and
161
+ temporary service outages.
162
+ """
163
+
164
+ def __init__(
165
+ self,
166
+ message: str = "Service temporarily unavailable",
167
+ retry_after: Optional[int] = None,
168
+ **kwargs
169
+ ):
170
+ super().__init__(message, status_code=503, **kwargs)
171
+ self.retry_after = retry_after
172
+
173
+
174
+ class QuotaExceededException(APIException):
175
+ """
176
+ Exception raised when usage quotas are exceeded.
177
+
178
+ This includes monthly limits, character limits, and other
179
+ usage-based restrictions.
180
+ """
181
+
182
+ def __init__(
183
+ self,
184
+ message: str = "Usage quota exceeded",
185
+ quota_type: Optional[str] = None,
186
+ limit: Optional[int] = None,
187
+ used: Optional[int] = None,
188
+ **kwargs
189
+ ):
190
+ super().__init__(message, status_code=402, **kwargs)
191
+ self.quota_type = quota_type
192
+ self.limit = limit
193
+ self.used = used
194
+
195
+
196
+ class AudioProcessingException(TTSException):
197
+ """
198
+ Exception raised for audio processing errors.
199
+
200
+ This includes format conversion issues, audio generation failures,
201
+ and output processing problems.
202
+ """
203
+
204
+ def __init__(
205
+ self,
206
+ message: str,
207
+ audio_format: Optional[str] = None,
208
+ **kwargs
209
+ ):
210
+ super().__init__(message, **kwargs)
211
+ self.audio_format = audio_format
212
+
213
+
214
+ def create_exception_from_response(
215
+ status_code: int,
216
+ response_data: Dict[str, Any],
217
+ default_message: str = "API request failed"
218
+ ) -> APIException:
219
+ """
220
+ Create appropriate exception from API response.
221
+
222
+ Args:
223
+ status_code: HTTP status code
224
+ response_data: Response data from API
225
+ default_message: Default message if none in response
226
+
227
+ Returns:
228
+ APIException: Appropriate exception instance
229
+ """
230
+ message = response_data.get("error", {}).get("message", default_message)
231
+
232
+ if status_code == 401:
233
+ return AuthenticationException(message, response_data=response_data)
234
+ elif status_code == 402:
235
+ return QuotaExceededException(message, response_data=response_data)
236
+ elif status_code == 429:
237
+ retry_after = response_data.get("retry_after")
238
+ return RateLimitException(message, retry_after=retry_after, response_data=response_data)
239
+ elif status_code == 503:
240
+ retry_after = response_data.get("retry_after")
241
+ return ServiceUnavailableException(message, retry_after=retry_after, response_data=response_data)
242
+ else:
243
+ return APIException(message, status_code=status_code, response_data=response_data)
ttsfm/models.py CHANGED
@@ -1,283 +1,283 @@
1
- """
2
- Data models and types for the TTSFM package.
3
-
4
- This module defines the core data structures used throughout the package,
5
- including request/response models, enums, and error types.
6
- """
7
-
8
- from enum import Enum
9
- from typing import Optional, Dict, Any, Union
10
- from dataclasses import dataclass
11
- from datetime import datetime
12
-
13
-
14
- class Voice(str, Enum):
15
- """Available voice options for TTS generation."""
16
- ALLOY = "alloy"
17
- ASH = "ash"
18
- BALLAD = "ballad"
19
- CORAL = "coral"
20
- ECHO = "echo"
21
- FABLE = "fable"
22
- NOVA = "nova"
23
- ONYX = "onyx"
24
- SAGE = "sage"
25
- SHIMMER = "shimmer"
26
- VERSE = "verse"
27
-
28
-
29
- class AudioFormat(str, Enum):
30
- """Supported audio output formats."""
31
- MP3 = "mp3"
32
- WAV = "wav"
33
- OPUS = "opus"
34
- AAC = "aac"
35
- FLAC = "flac"
36
- PCM = "pcm"
37
-
38
-
39
- @dataclass
40
- class TTSRequest:
41
- """
42
- Request model for TTS generation.
43
-
44
- Attributes:
45
- input: Text to convert to speech
46
- voice: Voice to use for generation
47
- response_format: Audio format for output
48
- instructions: Optional instructions for voice modulation
49
- model: Model to use (for OpenAI compatibility, usually ignored)
50
- speed: Speech speed (for OpenAI compatibility, usually ignored)
51
- max_length: Maximum allowed text length (default: 4096 characters)
52
- validate_length: Whether to validate text length (default: True)
53
- """
54
- input: str
55
- voice: Union[Voice, str] = Voice.ALLOY
56
- response_format: Union[AudioFormat, str] = AudioFormat.MP3
57
- instructions: Optional[str] = None
58
- model: Optional[str] = None
59
- speed: Optional[float] = None
60
- max_length: int = 4096
61
- validate_length: bool = True
62
-
63
- def __post_init__(self):
64
- """Validate and normalize fields after initialization."""
65
- # Ensure voice is a valid Voice enum
66
- if isinstance(self.voice, str):
67
- try:
68
- self.voice = Voice(self.voice.lower())
69
- except ValueError:
70
- raise ValueError(f"Invalid voice: {self.voice}. Must be one of {list(Voice)}")
71
-
72
- # Ensure response_format is a valid AudioFormat enum
73
- if isinstance(self.response_format, str):
74
- try:
75
- self.response_format = AudioFormat(self.response_format.lower())
76
- except ValueError:
77
- raise ValueError(f"Invalid format: {self.response_format}. Must be one of {list(AudioFormat)}")
78
-
79
- # Validate input text
80
- if not self.input or not self.input.strip():
81
- raise ValueError("Input text cannot be empty")
82
-
83
- # Validate text length if enabled
84
- if self.validate_length:
85
- text_length = len(self.input)
86
- if text_length > self.max_length:
87
- raise ValueError(
88
- f"Input text is too long ({text_length} characters). "
89
- f"Maximum allowed length is {self.max_length} characters. "
90
- f"Consider splitting your text into smaller chunks or disable "
91
- f"length validation with validate_length=False."
92
- )
93
-
94
- # Validate max_length parameter
95
- if self.max_length <= 0:
96
- raise ValueError("max_length must be a positive integer")
97
-
98
- # Validate speed if provided
99
- if self.speed is not None and (self.speed < 0.25 or self.speed > 4.0):
100
- raise ValueError("Speed must be between 0.25 and 4.0")
101
-
102
- def to_dict(self) -> Dict[str, Any]:
103
- """Convert request to dictionary for API calls."""
104
- data = {
105
- "input": self.input,
106
- "voice": self.voice.value if isinstance(self.voice, Voice) else self.voice,
107
- "response_format": self.response_format.value if isinstance(self.response_format, AudioFormat) else self.response_format
108
- }
109
-
110
- if self.instructions:
111
- data["instructions"] = self.instructions
112
-
113
- if self.model:
114
- data["model"] = self.model
115
-
116
- if self.speed is not None:
117
- data["speed"] = self.speed
118
-
119
- return data
120
-
121
-
122
- @dataclass
123
- class TTSResponse:
124
- """
125
- Response model for TTS generation.
126
-
127
- Attributes:
128
- audio_data: Generated audio as bytes
129
- content_type: MIME type of the audio data
130
- format: Audio format used
131
- size: Size of audio data in bytes
132
- duration: Estimated duration in seconds (if available)
133
- metadata: Additional response metadata
134
- """
135
- audio_data: bytes
136
- content_type: str
137
- format: AudioFormat
138
- size: int
139
- duration: Optional[float] = None
140
- metadata: Optional[Dict[str, Any]] = None
141
-
142
- def __post_init__(self):
143
- """Calculate derived fields after initialization."""
144
- if self.size is None:
145
- self.size = len(self.audio_data)
146
-
147
- def save_to_file(self, filename: str) -> str:
148
- """
149
- Save audio data to a file.
150
-
151
- Args:
152
- filename: Target filename (extension will be added if missing)
153
-
154
- Returns:
155
- str: Final filename used
156
- """
157
- import os
158
-
159
- # Use the actual returned format for the extension, not any requested format
160
- expected_extension = f".{self.format.value}"
161
-
162
- # Check if filename already has the correct extension
163
- if filename.endswith(expected_extension):
164
- final_filename = filename
165
- else:
166
- # Remove any existing extension and add the correct one
167
- base_name = filename
168
- # Remove common audio extensions if present
169
- for ext in ['.mp3', '.wav', '.opus', '.aac', '.flac', '.pcm']:
170
- if base_name.endswith(ext):
171
- base_name = base_name[:-len(ext)]
172
- break
173
- final_filename = f"{base_name}{expected_extension}"
174
-
175
- # Create directory if it doesn't exist
176
- os.makedirs(os.path.dirname(final_filename) if os.path.dirname(final_filename) else ".", exist_ok=True)
177
-
178
- # Write audio data
179
- with open(final_filename, "wb") as f:
180
- f.write(self.audio_data)
181
-
182
- return final_filename
183
-
184
-
185
- @dataclass
186
- class TTSError:
187
- """
188
- Error information from TTS API.
189
-
190
- Attributes:
191
- code: Error code
192
- message: Human-readable error message
193
- type: Error type/category
194
- details: Additional error details
195
- timestamp: When the error occurred
196
- """
197
- code: str
198
- message: str
199
- type: Optional[str] = None
200
- details: Optional[Dict[str, Any]] = None
201
- timestamp: Optional[datetime] = None
202
-
203
- def __post_init__(self):
204
- """Set timestamp if not provided."""
205
- if self.timestamp is None:
206
- self.timestamp = datetime.now()
207
-
208
-
209
- @dataclass
210
- class APIError(TTSError):
211
- """API-specific error information."""
212
- status_code: int = 500
213
- headers: Optional[Dict[str, str]] = None
214
-
215
-
216
- @dataclass
217
- class NetworkError(TTSError):
218
- """Network-related error information."""
219
- timeout: Optional[float] = None
220
- retry_count: int = 0
221
-
222
-
223
- @dataclass
224
- class ValidationError(TTSError):
225
- """Validation error information."""
226
- field: Optional[str] = None
227
- value: Optional[Any] = None
228
-
229
-
230
- # Content type mappings for audio formats
231
- CONTENT_TYPE_MAP = {
232
- AudioFormat.MP3: "audio/mpeg",
233
- AudioFormat.OPUS: "audio/opus",
234
- AudioFormat.AAC: "audio/aac",
235
- AudioFormat.FLAC: "audio/flac",
236
- AudioFormat.WAV: "audio/wav",
237
- AudioFormat.PCM: "audio/pcm"
238
- }
239
-
240
- # Reverse mapping for content type to format
241
- FORMAT_FROM_CONTENT_TYPE = {v: k for k, v in CONTENT_TYPE_MAP.items()}
242
-
243
-
244
- def get_content_type(format: Union[AudioFormat, str]) -> str:
245
- """Get MIME content type for audio format."""
246
- if isinstance(format, str):
247
- format = AudioFormat(format.lower())
248
- return CONTENT_TYPE_MAP.get(format, "audio/mpeg")
249
-
250
-
251
- def get_format_from_content_type(content_type: str) -> AudioFormat:
252
- """Get audio format from MIME content type."""
253
- return FORMAT_FROM_CONTENT_TYPE.get(content_type, AudioFormat.MP3)
254
-
255
-
256
- def get_supported_format(requested_format: AudioFormat) -> AudioFormat:
257
- """
258
- Map requested format to supported format.
259
-
260
- Args:
261
- requested_format: The requested audio format
262
-
263
- Returns:
264
- AudioFormat: MP3 or WAV (the supported formats)
265
- """
266
- if requested_format == AudioFormat.MP3:
267
- return AudioFormat.MP3
268
- else:
269
- # All other formats (WAV, OPUS, AAC, FLAC, PCM) return WAV
270
- return AudioFormat.WAV
271
-
272
-
273
- def maps_to_wav(format_value: str) -> bool:
274
- """
275
- Check if a format maps to WAV.
276
-
277
- Args:
278
- format_value: Format string to check
279
-
280
- Returns:
281
- bool: True if the format maps to WAV
282
- """
283
- return format_value.lower() in ['wav', 'opus', 'aac', 'flac', 'pcm']
 
1
+ """
2
+ Data models and types for the TTSFM package.
3
+
4
+ This module defines the core data structures used throughout the package,
5
+ including request/response models, enums, and error types.
6
+ """
7
+
8
+ from enum import Enum
9
+ from typing import Optional, Dict, Any, Union
10
+ from dataclasses import dataclass
11
+ from datetime import datetime
12
+
13
+
14
+ class Voice(str, Enum):
15
+ """Available voice options for TTS generation."""
16
+ ALLOY = "alloy"
17
+ ASH = "ash"
18
+ BALLAD = "ballad"
19
+ CORAL = "coral"
20
+ ECHO = "echo"
21
+ FABLE = "fable"
22
+ NOVA = "nova"
23
+ ONYX = "onyx"
24
+ SAGE = "sage"
25
+ SHIMMER = "shimmer"
26
+ VERSE = "verse"
27
+
28
+
29
+ class AudioFormat(str, Enum):
30
+ """Supported audio output formats."""
31
+ MP3 = "mp3"
32
+ WAV = "wav"
33
+ OPUS = "opus"
34
+ AAC = "aac"
35
+ FLAC = "flac"
36
+ PCM = "pcm"
37
+
38
+
39
+ @dataclass
40
+ class TTSRequest:
41
+ """
42
+ Request model for TTS generation.
43
+
44
+ Attributes:
45
+ input: Text to convert to speech
46
+ voice: Voice to use for generation
47
+ response_format: Audio format for output
48
+ instructions: Optional instructions for voice modulation
49
+ model: Model to use (for OpenAI compatibility, usually ignored)
50
+ speed: Speech speed (for OpenAI compatibility, usually ignored)
51
+ max_length: Maximum allowed text length (default: 4096 characters)
52
+ validate_length: Whether to validate text length (default: True)
53
+ """
54
+ input: str
55
+ voice: Union[Voice, str] = Voice.ALLOY
56
+ response_format: Union[AudioFormat, str] = AudioFormat.MP3
57
+ instructions: Optional[str] = None
58
+ model: Optional[str] = None
59
+ speed: Optional[float] = None
60
+ max_length: int = 4096
61
+ validate_length: bool = True
62
+
63
+ def __post_init__(self):
64
+ """Validate and normalize fields after initialization."""
65
+ # Ensure voice is a valid Voice enum
66
+ if isinstance(self.voice, str):
67
+ try:
68
+ self.voice = Voice(self.voice.lower())
69
+ except ValueError:
70
+ raise ValueError(f"Invalid voice: {self.voice}. Must be one of {list(Voice)}")
71
+
72
+ # Ensure response_format is a valid AudioFormat enum
73
+ if isinstance(self.response_format, str):
74
+ try:
75
+ self.response_format = AudioFormat(self.response_format.lower())
76
+ except ValueError:
77
+ raise ValueError(f"Invalid format: {self.response_format}. Must be one of {list(AudioFormat)}")
78
+
79
+ # Validate input text
80
+ if not self.input or not self.input.strip():
81
+ raise ValueError("Input text cannot be empty")
82
+
83
+ # Validate text length if enabled
84
+ if self.validate_length:
85
+ text_length = len(self.input)
86
+ if text_length > self.max_length:
87
+ raise ValueError(
88
+ f"Input text is too long ({text_length} characters). "
89
+ f"Maximum allowed length is {self.max_length} characters. "
90
+ f"Consider splitting your text into smaller chunks or disable "
91
+ f"length validation with validate_length=False."
92
+ )
93
+
94
+ # Validate max_length parameter
95
+ if self.max_length <= 0:
96
+ raise ValueError("max_length must be a positive integer")
97
+
98
+ # Validate speed if provided
99
+ if self.speed is not None and (self.speed < 0.25 or self.speed > 4.0):
100
+ raise ValueError("Speed must be between 0.25 and 4.0")
101
+
102
+ def to_dict(self) -> Dict[str, Any]:
103
+ """Convert request to dictionary for API calls."""
104
+ data = {
105
+ "input": self.input,
106
+ "voice": self.voice.value if isinstance(self.voice, Voice) else self.voice,
107
+ "response_format": self.response_format.value if isinstance(self.response_format, AudioFormat) else self.response_format
108
+ }
109
+
110
+ if self.instructions:
111
+ data["instructions"] = self.instructions
112
+
113
+ if self.model:
114
+ data["model"] = self.model
115
+
116
+ if self.speed is not None:
117
+ data["speed"] = self.speed
118
+
119
+ return data
120
+
121
+
122
+ @dataclass
123
+ class TTSResponse:
124
+ """
125
+ Response model for TTS generation.
126
+
127
+ Attributes:
128
+ audio_data: Generated audio as bytes
129
+ content_type: MIME type of the audio data
130
+ format: Audio format used
131
+ size: Size of audio data in bytes
132
+ duration: Estimated duration in seconds (if available)
133
+ metadata: Additional response metadata
134
+ """
135
+ audio_data: bytes
136
+ content_type: str
137
+ format: AudioFormat
138
+ size: int
139
+ duration: Optional[float] = None
140
+ metadata: Optional[Dict[str, Any]] = None
141
+
142
+ def __post_init__(self):
143
+ """Calculate derived fields after initialization."""
144
+ if self.size is None:
145
+ self.size = len(self.audio_data)
146
+
147
+ def save_to_file(self, filename: str) -> str:
148
+ """
149
+ Save audio data to a file.
150
+
151
+ Args:
152
+ filename: Target filename (extension will be added if missing)
153
+
154
+ Returns:
155
+ str: Final filename used
156
+ """
157
+ import os
158
+
159
+ # Use the actual returned format for the extension, not any requested format
160
+ expected_extension = f".{self.format.value}"
161
+
162
+ # Check if filename already has the correct extension
163
+ if filename.endswith(expected_extension):
164
+ final_filename = filename
165
+ else:
166
+ # Remove any existing extension and add the correct one
167
+ base_name = filename
168
+ # Remove common audio extensions if present
169
+ for ext in ['.mp3', '.wav', '.opus', '.aac', '.flac', '.pcm']:
170
+ if base_name.endswith(ext):
171
+ base_name = base_name[:-len(ext)]
172
+ break
173
+ final_filename = f"{base_name}{expected_extension}"
174
+
175
+ # Create directory if it doesn't exist
176
+ os.makedirs(os.path.dirname(final_filename) if os.path.dirname(final_filename) else ".", exist_ok=True)
177
+
178
+ # Write audio data
179
+ with open(final_filename, "wb") as f:
180
+ f.write(self.audio_data)
181
+
182
+ return final_filename
183
+
184
+
185
+ @dataclass
186
+ class TTSError:
187
+ """
188
+ Error information from TTS API.
189
+
190
+ Attributes:
191
+ code: Error code
192
+ message: Human-readable error message
193
+ type: Error type/category
194
+ details: Additional error details
195
+ timestamp: When the error occurred
196
+ """
197
+ code: str
198
+ message: str
199
+ type: Optional[str] = None
200
+ details: Optional[Dict[str, Any]] = None
201
+ timestamp: Optional[datetime] = None
202
+
203
+ def __post_init__(self):
204
+ """Set timestamp if not provided."""
205
+ if self.timestamp is None:
206
+ self.timestamp = datetime.now()
207
+
208
+
209
+ @dataclass
210
+ class APIError(TTSError):
211
+ """API-specific error information."""
212
+ status_code: int = 500
213
+ headers: Optional[Dict[str, str]] = None
214
+
215
+
216
+ @dataclass
217
+ class NetworkError(TTSError):
218
+ """Network-related error information."""
219
+ timeout: Optional[float] = None
220
+ retry_count: int = 0
221
+
222
+
223
+ @dataclass
224
+ class ValidationError(TTSError):
225
+ """Validation error information."""
226
+ field: Optional[str] = None
227
+ value: Optional[Any] = None
228
+
229
+
230
+ # Content type mappings for audio formats
231
+ CONTENT_TYPE_MAP = {
232
+ AudioFormat.MP3: "audio/mpeg",
233
+ AudioFormat.OPUS: "audio/opus",
234
+ AudioFormat.AAC: "audio/aac",
235
+ AudioFormat.FLAC: "audio/flac",
236
+ AudioFormat.WAV: "audio/wav",
237
+ AudioFormat.PCM: "audio/pcm"
238
+ }
239
+
240
+ # Reverse mapping for content type to format
241
+ FORMAT_FROM_CONTENT_TYPE = {v: k for k, v in CONTENT_TYPE_MAP.items()}
242
+
243
+
244
+ def get_content_type(format: Union[AudioFormat, str]) -> str:
245
+ """Get MIME content type for audio format."""
246
+ if isinstance(format, str):
247
+ format = AudioFormat(format.lower())
248
+ return CONTENT_TYPE_MAP.get(format, "audio/mpeg")
249
+
250
+
251
+ def get_format_from_content_type(content_type: str) -> AudioFormat:
252
+ """Get audio format from MIME content type."""
253
+ return FORMAT_FROM_CONTENT_TYPE.get(content_type, AudioFormat.MP3)
254
+
255
+
256
+ def get_supported_format(requested_format: AudioFormat) -> AudioFormat:
257
+ """
258
+ Map requested format to supported format.
259
+
260
+ Args:
261
+ requested_format: The requested audio format
262
+
263
+ Returns:
264
+ AudioFormat: MP3 or WAV (the supported formats)
265
+ """
266
+ if requested_format == AudioFormat.MP3:
267
+ return AudioFormat.MP3
268
+ else:
269
+ # All other formats (WAV, OPUS, AAC, FLAC, PCM) return WAV
270
+ return AudioFormat.WAV
271
+
272
+
273
+ def maps_to_wav(format_value: str) -> bool:
274
+ """
275
+ Check if a format maps to WAV.
276
+
277
+ Args:
278
+ format_value: Format string to check
279
+
280
+ Returns:
281
+ bool: True if the format maps to WAV
282
+ """
283
+ return format_value.lower() in ['wav', 'opus', 'aac', 'flac', 'pcm']
ttsfm/utils.py CHANGED
@@ -1,421 +1,466 @@
1
- """
2
- Utility functions for the TTSFM package.
3
-
4
- This module provides common utility functions used throughout the package,
5
- including HTTP helpers, validation utilities, and configuration management.
6
- """
7
-
8
- import os
9
- import re
10
- import time
11
- import random
12
- import logging
13
- from typing import Dict, Any, Optional, Union, List
14
- from urllib.parse import urljoin, urlparse
15
-
16
-
17
- # Configure logging
18
- logger = logging.getLogger(__name__)
19
-
20
-
21
- def get_user_agent() -> str:
22
- """
23
- Generate a realistic User-Agent string.
24
-
25
- Returns:
26
- str: User-Agent string for HTTP requests
27
- """
28
- try:
29
- from fake_useragent import UserAgent
30
- ua = UserAgent()
31
- return ua.random
32
- except ImportError:
33
- # Fallback if fake_useragent is not available
34
- return "TTSFM-Client/3.0.0 (Python)"
35
-
36
-
37
- def get_realistic_headers() -> Dict[str, str]:
38
- """
39
- Generate realistic HTTP headers for requests.
40
-
41
- Returns:
42
- Dict[str, str]: HTTP headers dictionary
43
- """
44
- user_agent = get_user_agent()
45
-
46
- headers = {
47
- "Accept": "application/json, audio/*",
48
- "Accept-Encoding": "gzip, deflate, br",
49
- "Accept-Language": random.choice(["en-US,en;q=0.9", "en-GB,en;q=0.8", "en-CA,en;q=0.7"]),
50
- "Cache-Control": "no-cache",
51
- "DNT": "1",
52
- "Pragma": "no-cache",
53
- "User-Agent": user_agent,
54
- "X-Requested-With": "XMLHttpRequest",
55
- }
56
-
57
- # Add browser-specific headers for Chromium-based browsers
58
- if any(browser in user_agent.lower() for browser in ['chrome', 'edge', 'chromium']):
59
- version_match = re.search(r'(?:Chrome|Edge|Chromium)/(\d+)', user_agent)
60
- major_version = version_match.group(1) if version_match else "121"
61
-
62
- brands = []
63
- if 'google chrome' in user_agent.lower():
64
- brands.extend([
65
- f'"Google Chrome";v="{major_version}"',
66
- f'"Chromium";v="{major_version}"',
67
- '"Not A(Brand";v="99"'
68
- ])
69
- elif 'microsoft edge' in user_agent.lower():
70
- brands.extend([
71
- f'"Microsoft Edge";v="{major_version}"',
72
- f'"Chromium";v="{major_version}"',
73
- '"Not A(Brand";v="99"'
74
- ])
75
- else:
76
- brands.extend([
77
- f'"Chromium";v="{major_version}"',
78
- '"Not A(Brand";v="8"'
79
- ])
80
-
81
- headers.update({
82
- "Sec-Ch-Ua": ", ".join(brands),
83
- "Sec-Ch-Ua-Mobile": "?0",
84
- "Sec-Ch-Ua-Platform": random.choice(['"Windows"', '"macOS"', '"Linux"']),
85
- "Sec-Fetch-Dest": "empty",
86
- "Sec-Fetch-Mode": "cors",
87
- "Sec-Fetch-Site": "same-origin"
88
- })
89
-
90
- # Randomly add some optional headers
91
- if random.random() < 0.5:
92
- headers["Upgrade-Insecure-Requests"] = "1"
93
-
94
- return headers
95
-
96
-
97
- def validate_text_length(text: str, max_length: int = 4096, raise_error: bool = True) -> bool:
98
- """
99
- Validate text length against maximum allowed characters.
100
-
101
- Args:
102
- text: Text to validate
103
- max_length: Maximum allowed length in characters
104
- raise_error: Whether to raise an exception if validation fails
105
-
106
- Returns:
107
- bool: True if text is within limits, False otherwise
108
-
109
- Raises:
110
- ValueError: If text exceeds max_length and raise_error is True
111
- """
112
- if not text:
113
- return True
114
-
115
- text_length = len(text)
116
-
117
- if text_length > max_length:
118
- if raise_error:
119
- raise ValueError(
120
- f"Text is too long ({text_length} characters). "
121
- f"Maximum allowed length is {max_length} characters. "
122
- f"TTS models typically support up to 4096 characters per request."
123
- )
124
- return False
125
-
126
- return True
127
-
128
-
129
- def split_text_by_length(text: str, max_length: int = 4096, preserve_words: bool = True) -> List[str]:
130
- """
131
- Split text into chunks that don't exceed the maximum length.
132
-
133
- Args:
134
- text: Text to split
135
- max_length: Maximum length per chunk
136
- preserve_words: Whether to avoid splitting words
137
-
138
- Returns:
139
- List[str]: List of text chunks
140
- """
141
- if not text:
142
- return []
143
-
144
- if len(text) <= max_length:
145
- return [text]
146
-
147
- chunks = []
148
-
149
- if preserve_words:
150
- # Split by sentences first, then by words if needed
151
- sentences = re.split(r'[.!?]+', text)
152
- current_chunk = ""
153
-
154
- for sentence in sentences:
155
- sentence = sentence.strip()
156
- if not sentence:
157
- continue
158
-
159
- # Add sentence ending punctuation back
160
- if not sentence.endswith(('.', '!', '?')):
161
- sentence += '.'
162
-
163
- # Check if adding this sentence would exceed the limit
164
- test_chunk = current_chunk + (" " if current_chunk else "") + sentence
165
-
166
- if len(test_chunk) <= max_length:
167
- current_chunk = test_chunk
168
- else:
169
- # Save current chunk if it has content
170
- if current_chunk:
171
- chunks.append(current_chunk.strip())
172
-
173
- # If single sentence is too long, split by words
174
- if len(sentence) > max_length:
175
- word_chunks = _split_by_words(sentence, max_length)
176
- chunks.extend(word_chunks)
177
- current_chunk = ""
178
- else:
179
- current_chunk = sentence
180
-
181
- # Add remaining chunk
182
- if current_chunk:
183
- chunks.append(current_chunk.strip())
184
- else:
185
- # Simple character-based splitting
186
- for i in range(0, len(text), max_length):
187
- chunks.append(text[i:i + max_length])
188
-
189
- return [chunk for chunk in chunks if chunk.strip()]
190
-
191
-
192
- def _split_by_words(text: str, max_length: int) -> List[str]:
193
- """
194
- Split text by words when sentences are too long.
195
-
196
- Args:
197
- text: Text to split
198
- max_length: Maximum length per chunk
199
-
200
- Returns:
201
- List[str]: List of word-based chunks
202
- """
203
- words = text.split()
204
- chunks = []
205
- current_chunk = ""
206
-
207
- for word in words:
208
- test_chunk = current_chunk + (" " if current_chunk else "") + word
209
-
210
- if len(test_chunk) <= max_length:
211
- current_chunk = test_chunk
212
- else:
213
- if current_chunk:
214
- chunks.append(current_chunk)
215
-
216
- # If single word is too long, split it
217
- if len(word) > max_length:
218
- for i in range(0, len(word), max_length):
219
- chunks.append(word[i:i + max_length])
220
- current_chunk = ""
221
- else:
222
- current_chunk = word
223
-
224
- if current_chunk:
225
- chunks.append(current_chunk)
226
-
227
- return chunks
228
-
229
-
230
- def sanitize_text(text: str) -> str:
231
- """
232
- Sanitize input text for TTS processing.
233
-
234
- Args:
235
- text: Input text to sanitize
236
-
237
- Returns:
238
- str: Sanitized text
239
- """
240
- if not text:
241
- return ""
242
-
243
- # Remove HTML tags
244
- text = re.sub(r'<[^>]+>', '', text)
245
-
246
- # Remove script tags and content
247
- text = re.sub(r'<script.*?</script>', '', text, flags=re.DOTALL | re.IGNORECASE)
248
-
249
- # Remove potentially dangerous characters
250
- text = re.sub(r'[<>"\']', '', text)
251
-
252
- # Normalize whitespace
253
- text = re.sub(r'\s+', ' ', text)
254
-
255
- return text.strip()
256
-
257
-
258
- def validate_url(url: str) -> bool:
259
- """
260
- Validate if a URL is properly formatted.
261
-
262
- Args:
263
- url: URL to validate
264
-
265
- Returns:
266
- bool: True if URL is valid, False otherwise
267
- """
268
- try:
269
- result = urlparse(url)
270
- return all([result.scheme, result.netloc])
271
- except Exception:
272
- return False
273
-
274
-
275
- def build_url(base_url: str, path: str) -> str:
276
- """
277
- Build a complete URL from base URL and path.
278
-
279
- Args:
280
- base_url: Base URL
281
- path: Path to append
282
-
283
- Returns:
284
- str: Complete URL
285
- """
286
- # Ensure base_url ends with /
287
- if not base_url.endswith('/'):
288
- base_url += '/'
289
-
290
- # Ensure path doesn't start with /
291
- if path.startswith('/'):
292
- path = path[1:]
293
-
294
- return urljoin(base_url, path)
295
-
296
-
297
- def get_random_delay(min_delay: float = 1.0, max_delay: float = 5.0) -> float:
298
- """
299
- Get a random delay with jitter for rate limiting.
300
-
301
- Args:
302
- min_delay: Minimum delay in seconds
303
- max_delay: Maximum delay in seconds
304
-
305
- Returns:
306
- float: Random delay in seconds
307
- """
308
- base_delay = random.uniform(min_delay, max_delay)
309
- jitter = random.uniform(0.1, 0.5)
310
- return base_delay + jitter
311
-
312
-
313
- def exponential_backoff(attempt: int, base_delay: float = 1.0, max_delay: float = 60.0) -> float:
314
- """
315
- Calculate exponential backoff delay.
316
-
317
- Args:
318
- attempt: Attempt number (0-based)
319
- base_delay: Base delay in seconds
320
- max_delay: Maximum delay in seconds
321
-
322
- Returns:
323
- float: Delay in seconds
324
- """
325
- delay = base_delay * (2 ** attempt)
326
- jitter = random.uniform(0.1, 0.3) * delay
327
- return min(delay + jitter, max_delay)
328
-
329
-
330
- def load_config_from_env(prefix: str = "TTSFM_") -> Dict[str, Any]:
331
- """
332
- Load configuration from environment variables.
333
-
334
- Args:
335
- prefix: Prefix for environment variables
336
-
337
- Returns:
338
- Dict[str, Any]: Configuration dictionary
339
- """
340
- config = {}
341
-
342
- for key, value in os.environ.items():
343
- if key.startswith(prefix):
344
- config_key = key[len(prefix):].lower()
345
-
346
- # Try to convert to appropriate type
347
- if value.lower() in ('true', 'false'):
348
- config[config_key] = value.lower() == 'true'
349
- elif value.isdigit():
350
- config[config_key] = int(value)
351
- elif '.' in value and value.replace('.', '').isdigit():
352
- config[config_key] = float(value)
353
- else:
354
- config[config_key] = value
355
-
356
- return config
357
-
358
-
359
- def setup_logging(level: Union[str, int] = logging.INFO, format_string: Optional[str] = None) -> None:
360
- """
361
- Setup logging configuration for the package.
362
-
363
- Args:
364
- level: Logging level
365
- format_string: Custom format string
366
- """
367
- if format_string is None:
368
- format_string = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
369
-
370
- logging.basicConfig(
371
- level=level,
372
- format=format_string,
373
- handlers=[logging.StreamHandler()]
374
- )
375
-
376
-
377
- def estimate_audio_duration(text: str, words_per_minute: float = 150.0) -> float:
378
- """
379
- Estimate audio duration based on text length.
380
-
381
- Args:
382
- text: Input text
383
- words_per_minute: Average speaking rate
384
-
385
- Returns:
386
- float: Estimated duration in seconds
387
- """
388
- if not text:
389
- return 0.0
390
-
391
- # Count words (simple whitespace split)
392
- word_count = len(text.split())
393
-
394
- # Calculate duration in seconds
395
- duration = (word_count / words_per_minute) * 60.0
396
-
397
- # Add some buffer for pauses and processing
398
- return duration * 1.1
399
-
400
-
401
- def format_file_size(size_bytes: int) -> str:
402
- """
403
- Format file size in human-readable format.
404
-
405
- Args:
406
- size_bytes: Size in bytes
407
-
408
- Returns:
409
- str: Formatted size string
410
- """
411
- if size_bytes == 0:
412
- return "0 B"
413
-
414
- size_names = ["B", "KB", "MB", "GB"]
415
- i = 0
416
-
417
- while size_bytes >= 1024 and i < len(size_names) - 1:
418
- size_bytes /= 1024.0
419
- i += 1
420
-
421
- return f"{size_bytes:.1f} {size_names[i]}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Utility functions for the TTSFM package.
3
+
4
+ This module provides common utility functions used throughout the package,
5
+ including HTTP helpers, validation utilities, and configuration management.
6
+ """
7
+
8
+ import os
9
+ import re
10
+ import time
11
+ import random
12
+ import logging
13
+ from typing import Dict, Any, Optional, Union, List
14
+ from urllib.parse import urljoin, urlparse
15
+
16
+
17
+ # Configure logging
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ def get_user_agent() -> str:
22
+ """
23
+ Generate a realistic User-Agent string.
24
+
25
+ Returns:
26
+ str: User-Agent string for HTTP requests
27
+ """
28
+ try:
29
+ from fake_useragent import UserAgent
30
+ ua = UserAgent()
31
+ return ua.random
32
+ except ImportError:
33
+ # Fallback if fake_useragent is not available
34
+ return "TTSFM-Client/3.0.0 (Python)"
35
+
36
+
37
+ def get_realistic_headers() -> Dict[str, str]:
38
+ """
39
+ Generate realistic HTTP headers for requests.
40
+
41
+ Returns:
42
+ Dict[str, str]: HTTP headers dictionary
43
+ """
44
+ user_agent = get_user_agent()
45
+
46
+ headers = {
47
+ "Accept": "application/json, audio/*",
48
+ "Accept-Encoding": "gzip, deflate, br",
49
+ "Accept-Language": random.choice(["en-US,en;q=0.9", "en-GB,en;q=0.8", "en-CA,en;q=0.7"]),
50
+ "Cache-Control": "no-cache",
51
+ "DNT": "1",
52
+ "Pragma": "no-cache",
53
+ "User-Agent": user_agent,
54
+ "X-Requested-With": "XMLHttpRequest",
55
+ }
56
+
57
+ # Add browser-specific headers for Chromium-based browsers
58
+ if any(browser in user_agent.lower() for browser in ['chrome', 'edge', 'chromium']):
59
+ version_match = re.search(r'(?:Chrome|Edge|Chromium)/(\d+)', user_agent)
60
+ major_version = version_match.group(1) if version_match else "121"
61
+
62
+ brands = []
63
+ if 'google chrome' in user_agent.lower():
64
+ brands.extend([
65
+ f'"Google Chrome";v="{major_version}"',
66
+ f'"Chromium";v="{major_version}"',
67
+ '"Not A(Brand";v="99"'
68
+ ])
69
+ elif 'microsoft edge' in user_agent.lower():
70
+ brands.extend([
71
+ f'"Microsoft Edge";v="{major_version}"',
72
+ f'"Chromium";v="{major_version}"',
73
+ '"Not A(Brand";v="99"'
74
+ ])
75
+ else:
76
+ brands.extend([
77
+ f'"Chromium";v="{major_version}"',
78
+ '"Not A(Brand";v="8"'
79
+ ])
80
+
81
+ headers.update({
82
+ "Sec-Ch-Ua": ", ".join(brands),
83
+ "Sec-Ch-Ua-Mobile": "?0",
84
+ "Sec-Ch-Ua-Platform": random.choice(['"Windows"', '"macOS"', '"Linux"']),
85
+ "Sec-Fetch-Dest": "empty",
86
+ "Sec-Fetch-Mode": "cors",
87
+ "Sec-Fetch-Site": "same-origin"
88
+ })
89
+
90
+ # Randomly add some optional headers
91
+ if random.random() < 0.5:
92
+ headers["Upgrade-Insecure-Requests"] = "1"
93
+
94
+ return headers
95
+
96
+
97
+ def validate_text_length(text: str, max_length: int = 4096, raise_error: bool = True) -> bool:
98
+ """
99
+ Validate text length against maximum allowed characters.
100
+
101
+ Args:
102
+ text: Text to validate
103
+ max_length: Maximum allowed length in characters
104
+ raise_error: Whether to raise an exception if validation fails
105
+
106
+ Returns:
107
+ bool: True if text is within limits, False otherwise
108
+
109
+ Raises:
110
+ ValueError: If text exceeds max_length and raise_error is True
111
+ """
112
+ if not text:
113
+ return True
114
+
115
+ text_length = len(text)
116
+
117
+ if text_length > max_length:
118
+ if raise_error:
119
+ raise ValueError(
120
+ f"Text is too long ({text_length} characters). "
121
+ f"Maximum allowed length is {max_length} characters. "
122
+ f"TTS models typically support up to 4096 characters per request."
123
+ )
124
+ return False
125
+
126
+ return True
127
+
128
+
129
+ def split_text_by_length(text: str, max_length: int = 4096, preserve_words: bool = True) -> List[str]:
130
+ """
131
+ Split text into chunks that don't exceed the maximum length.
132
+
133
+ Args:
134
+ text: Text to split
135
+ max_length: Maximum length per chunk
136
+ preserve_words: Whether to avoid splitting words
137
+
138
+ Returns:
139
+ List[str]: List of text chunks
140
+ """
141
+ if not text:
142
+ return []
143
+
144
+ if len(text) <= max_length:
145
+ return [text]
146
+
147
+ chunks = []
148
+
149
+ if preserve_words:
150
+ # Split by sentences first, then by words if needed
151
+ sentences = re.split(r'[.!?]+', text)
152
+ current_chunk = ""
153
+
154
+ for sentence in sentences:
155
+ sentence = sentence.strip()
156
+ if not sentence:
157
+ continue
158
+
159
+ # Add sentence ending punctuation back
160
+ if not sentence.endswith(('.', '!', '?')):
161
+ sentence += '.'
162
+
163
+ # Check if adding this sentence would exceed the limit
164
+ test_chunk = current_chunk + (" " if current_chunk else "") + sentence
165
+
166
+ if len(test_chunk) <= max_length:
167
+ current_chunk = test_chunk
168
+ else:
169
+ # Save current chunk if it has content
170
+ if current_chunk:
171
+ chunks.append(current_chunk.strip())
172
+
173
+ # If single sentence is too long, split by words
174
+ if len(sentence) > max_length:
175
+ word_chunks = _split_by_words(sentence, max_length)
176
+ chunks.extend(word_chunks)
177
+ current_chunk = ""
178
+ else:
179
+ current_chunk = sentence
180
+
181
+ # Add remaining chunk
182
+ if current_chunk:
183
+ chunks.append(current_chunk.strip())
184
+ else:
185
+ # Simple character-based splitting
186
+ for i in range(0, len(text), max_length):
187
+ chunks.append(text[i:i + max_length])
188
+
189
+ return [chunk for chunk in chunks if chunk.strip()]
190
+
191
+
192
+ def _split_by_words(text: str, max_length: int) -> List[str]:
193
+ """
194
+ Split text by words when sentences are too long.
195
+
196
+ Args:
197
+ text: Text to split
198
+ max_length: Maximum length per chunk
199
+
200
+ Returns:
201
+ List[str]: List of word-based chunks
202
+ """
203
+ words = text.split()
204
+ chunks = []
205
+ current_chunk = ""
206
+
207
+ for word in words:
208
+ test_chunk = current_chunk + (" " if current_chunk else "") + word
209
+
210
+ if len(test_chunk) <= max_length:
211
+ current_chunk = test_chunk
212
+ else:
213
+ if current_chunk:
214
+ chunks.append(current_chunk)
215
+
216
+ # If single word is too long, split it
217
+ if len(word) > max_length:
218
+ for i in range(0, len(word), max_length):
219
+ chunks.append(word[i:i + max_length])
220
+ current_chunk = ""
221
+ else:
222
+ current_chunk = word
223
+
224
+ if current_chunk:
225
+ chunks.append(current_chunk)
226
+
227
+ return chunks
228
+
229
+
230
+ def sanitize_text(text: str) -> str:
231
+ """
232
+ Sanitize input text for TTS processing.
233
+
234
+ Removes HTML markup and potentially problematic characters to ensure
235
+ clean text input for text-to-speech generation. Uses safe regex patterns
236
+ to prevent ReDoS attacks.
237
+
238
+ Args:
239
+ text: Input text to sanitize
240
+
241
+ Returns:
242
+ str: Sanitized text safe for TTS processing
243
+
244
+ Raises:
245
+ ValueError: If input text is too long (>50000 characters)
246
+ """
247
+ if not text:
248
+ return ""
249
+
250
+ # Prevent ReDoS attacks by limiting input length
251
+ if len(text) > 50000:
252
+ raise ValueError("Input text too long for sanitization (max 50000 characters)")
253
+
254
+ # Use a simple character-by-character approach to remove HTML-like content
255
+ # This avoids complex regex patterns that can cause ReDoS
256
+ result = []
257
+ i = 0
258
+ while i < len(text):
259
+ if text[i] == '<':
260
+ # Find the end of the tag
261
+ j = i + 1
262
+ while j < len(text) and text[j] != '>':
263
+ j += 1
264
+ if j < len(text):
265
+ # Skip the entire tag
266
+ i = j + 1
267
+ else:
268
+ # No closing >, treat as regular character
269
+ result.append(text[i])
270
+ i += 1
271
+ elif text[i] == '&':
272
+ # Handle HTML entities
273
+ j = i + 1
274
+ while j < len(text) and j < i + 10 and text[j] not in ' \t\n\r<>&':
275
+ j += 1
276
+ if j < len(text) and text[j] == ';':
277
+ # Skip the entity
278
+ i = j + 1
279
+ else:
280
+ # Not a valid entity, keep the &
281
+ result.append(' ') # Replace with space for TTS
282
+ i += 1
283
+ else:
284
+ # Regular character
285
+ char = text[i]
286
+ # Normalize quotes for TTS
287
+ if char in '""''`':
288
+ result.append('"')
289
+ elif char in '<>':
290
+ # Skip these characters
291
+ pass
292
+ else:
293
+ result.append(char)
294
+ i += 1
295
+
296
+ # Join and normalize whitespace using a safe regex
297
+ sanitized = ''.join(result)
298
+ sanitized = re.sub(r'[ \t\n\r\f\v]+', ' ', sanitized)
299
+
300
+ return sanitized.strip()
301
+
302
+
303
+ def validate_url(url: str) -> bool:
304
+ """
305
+ Validate if a URL is properly formatted.
306
+
307
+ Args:
308
+ url: URL to validate
309
+
310
+ Returns:
311
+ bool: True if URL is valid, False otherwise
312
+ """
313
+ try:
314
+ result = urlparse(url)
315
+ return all([result.scheme, result.netloc])
316
+ except Exception:
317
+ return False
318
+
319
+
320
+ def build_url(base_url: str, path: str) -> str:
321
+ """
322
+ Build a complete URL from base URL and path.
323
+
324
+ Args:
325
+ base_url: Base URL
326
+ path: Path to append
327
+
328
+ Returns:
329
+ str: Complete URL
330
+ """
331
+ # Ensure base_url ends with /
332
+ if not base_url.endswith('/'):
333
+ base_url += '/'
334
+
335
+ # Ensure path doesn't start with /
336
+ if path.startswith('/'):
337
+ path = path[1:]
338
+
339
+ return urljoin(base_url, path)
340
+
341
+
342
+ def get_random_delay(min_delay: float = 1.0, max_delay: float = 5.0) -> float:
343
+ """
344
+ Get a random delay with jitter for rate limiting.
345
+
346
+ Args:
347
+ min_delay: Minimum delay in seconds
348
+ max_delay: Maximum delay in seconds
349
+
350
+ Returns:
351
+ float: Random delay in seconds
352
+ """
353
+ base_delay = random.uniform(min_delay, max_delay)
354
+ jitter = random.uniform(0.1, 0.5)
355
+ return base_delay + jitter
356
+
357
+
358
+ def exponential_backoff(attempt: int, base_delay: float = 1.0, max_delay: float = 60.0) -> float:
359
+ """
360
+ Calculate exponential backoff delay.
361
+
362
+ Args:
363
+ attempt: Attempt number (0-based)
364
+ base_delay: Base delay in seconds
365
+ max_delay: Maximum delay in seconds
366
+
367
+ Returns:
368
+ float: Delay in seconds
369
+ """
370
+ delay = base_delay * (2 ** attempt)
371
+ jitter = random.uniform(0.1, 0.3) * delay
372
+ return min(delay + jitter, max_delay)
373
+
374
+
375
+ def load_config_from_env(prefix: str = "TTSFM_") -> Dict[str, Any]:
376
+ """
377
+ Load configuration from environment variables.
378
+
379
+ Args:
380
+ prefix: Prefix for environment variables
381
+
382
+ Returns:
383
+ Dict[str, Any]: Configuration dictionary
384
+ """
385
+ config = {}
386
+
387
+ for key, value in os.environ.items():
388
+ if key.startswith(prefix):
389
+ config_key = key[len(prefix):].lower()
390
+
391
+ # Try to convert to appropriate type
392
+ if value.lower() in ('true', 'false'):
393
+ config[config_key] = value.lower() == 'true'
394
+ elif value.isdigit():
395
+ config[config_key] = int(value)
396
+ elif '.' in value and value.replace('.', '').isdigit():
397
+ config[config_key] = float(value)
398
+ else:
399
+ config[config_key] = value
400
+
401
+ return config
402
+
403
+
404
+ def setup_logging(level: Union[str, int] = logging.INFO, format_string: Optional[str] = None) -> None:
405
+ """
406
+ Setup logging configuration for the package.
407
+
408
+ Args:
409
+ level: Logging level
410
+ format_string: Custom format string
411
+ """
412
+ if format_string is None:
413
+ format_string = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
414
+
415
+ logging.basicConfig(
416
+ level=level,
417
+ format=format_string,
418
+ handlers=[logging.StreamHandler()]
419
+ )
420
+
421
+
422
+ def estimate_audio_duration(text: str, words_per_minute: float = 150.0) -> float:
423
+ """
424
+ Estimate audio duration based on text length.
425
+
426
+ Args:
427
+ text: Input text
428
+ words_per_minute: Average speaking rate
429
+
430
+ Returns:
431
+ float: Estimated duration in seconds
432
+ """
433
+ if not text:
434
+ return 0.0
435
+
436
+ # Count words (simple whitespace split)
437
+ word_count = len(text.split())
438
+
439
+ # Calculate duration in seconds
440
+ duration = (word_count / words_per_minute) * 60.0
441
+
442
+ # Add some buffer for pauses and processing
443
+ return duration * 1.1
444
+
445
+
446
+ def format_file_size(size_bytes: int) -> str:
447
+ """
448
+ Format file size in human-readable format.
449
+
450
+ Args:
451
+ size_bytes: Size in bytes
452
+
453
+ Returns:
454
+ str: Formatted size string
455
+ """
456
+ if size_bytes == 0:
457
+ return "0 B"
458
+
459
+ size_names = ["B", "KB", "MB", "GB"]
460
+ i = 0
461
+
462
+ while size_bytes >= 1024 and i < len(size_names) - 1:
463
+ size_bytes /= 1024.0
464
+ i += 1
465
+
466
+ return f"{size_bytes:.1f} {size_names[i]}"
uv.lock ADDED
The diff for this file is too large to render. See raw diff