Upload 8 files
Browse files- .dockerignore +18 -0
- Dockerfile +23 -0
- README.md +78 -10
- docker-compose.yml +17 -0
- requirements.txt +8 -0
- src/__pycache__/main.cpython-311.pyc +0 -0
- src/main.py +690 -0
- src/scraper.py +949 -0
.dockerignore
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# .dockerignore
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.pyc
|
| 4 |
+
*.pyo
|
| 5 |
+
*.pyd
|
| 6 |
+
.git/
|
| 7 |
+
.gitignore
|
| 8 |
+
README.md
|
| 9 |
+
.env
|
| 10 |
+
.venv/
|
| 11 |
+
venv/
|
| 12 |
+
.pytest_cache/
|
| 13 |
+
.coverage
|
| 14 |
+
htmlcov/
|
| 15 |
+
.tox/
|
| 16 |
+
dist/
|
| 17 |
+
build/
|
| 18 |
+
*.egg-info/
|
Dockerfile
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Dockerfile
|
| 2 |
+
FROM python:3.11-slim
|
| 3 |
+
|
| 4 |
+
# Set working directory
|
| 5 |
+
WORKDIR /app
|
| 6 |
+
|
| 7 |
+
# Install system dependencies
|
| 8 |
+
RUN apt-get update && apt-get install -y \
|
| 9 |
+
curl \
|
| 10 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 11 |
+
|
| 12 |
+
# Copy requirements and install Python dependencies
|
| 13 |
+
COPY requirements.txt .
|
| 14 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 15 |
+
|
| 16 |
+
# Create directories
|
| 17 |
+
RUN mkdir -p /app/mappings /app/playlists
|
| 18 |
+
|
| 19 |
+
# Expose port
|
| 20 |
+
EXPOSE 6680
|
| 21 |
+
|
| 22 |
+
# Command to run the application
|
| 23 |
+
CMD ["python", "main.py"]
|
README.md
CHANGED
|
@@ -1,10 +1,78 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# README.md
|
| 2 |
+
# Complete FSTV Proxy Server
|
| 3 |
+
|
| 4 |
+
Docker-based IPTV proxy server with automated scraping and playlist generation.
|
| 5 |
+
|
| 6 |
+
## 🚀 Features
|
| 7 |
+
|
| 8 |
+
- **Encoded URLs**: Clean proxy URLs like `/match/a7k9mq3x.m3u8`
|
| 9 |
+
- **Auto-scraping**: Daily at 12:05 AM UTC
|
| 10 |
+
- **Download endpoints**: Direct playlist/EPG downloads (EPG Shows Live Events from when scraped -- it is messed up, no time to work on it ATM)
|
| 11 |
+
- **Sports + TV**: Combined matches and channels
|
| 12 |
+
- **Real-time streams**: Live HLS extraction from FSTV
|
| 13 |
+
|
| 14 |
+
## 📁 Setup
|
| 15 |
+
|
| 16 |
+
1. **Create structure:**
|
| 17 |
+
```bash
|
| 18 |
+
mkdir fstv-proxy && cd fstv-proxy
|
| 19 |
+
mkdir src mappings playlists
|
| 20 |
+
```
|
| 21 |
+
|
| 22 |
+
2. **Copy files:**
|
| 23 |
+
- Copy Docker files to root
|
| 24 |
+
- Copy Python files to `src/`
|
| 25 |
+
- Copy FSTV data files to root
|
| 26 |
+
|
| 27 |
+
3. **Build and run:**
|
| 28 |
+
```bash
|
| 29 |
+
docker-compose build
|
| 30 |
+
docker-compose up -d
|
| 31 |
+
```
|
| 32 |
+
|
| 33 |
+
## 🌐 Server Endpoints
|
| 34 |
+
|
| 35 |
+
**Base URL**: `http://your-server:6680`
|
| 36 |
+
|
| 37 |
+
### Download Endpoints:
|
| 38 |
+
- `GET /playlist/matches.m3u8` - Sports matches playlist
|
| 39 |
+
- `GET /playlist/channels.m3u8` - TV channels playlist
|
| 40 |
+
- `GET /playlist/combined.m3u8` - Combined playlist
|
| 41 |
+
- `GET /epg/matches.xml` - Sports EPG
|
| 42 |
+
|
| 43 |
+
### Streaming Endpoints:
|
| 44 |
+
- `GET /match/{id}.m3u8` - Match stream
|
| 45 |
+
- `GET /channel/{id}.m3u8` - TV channel stream
|
| 46 |
+
|
| 47 |
+
### Control Endpoints:
|
| 48 |
+
- `POST /scrape-now` - Manual scrape
|
| 49 |
+
- `GET /scrape-status` - Scrape info
|
| 50 |
+
- `GET /health` - Health check
|
| 51 |
+
- `GET /stats` - Server stats
|
| 52 |
+
|
| 53 |
+
## ⏰ Auto-Scraping
|
| 54 |
+
|
| 55 |
+
- **Schedule**: 12:05 AM UTC daily (Can add more/multiple times in main.py)
|
| 56 |
+
- **Covers**: Full day events (00:10 - 23:45)
|
| 57 |
+
- **Updates**: Mappings + playlists automatically
|
| 58 |
+
|
| 59 |
+
## 🔧 Development
|
| 60 |
+
|
| 61 |
+
- Edit `src/main.py` or `src/scraper.py`
|
| 62 |
+
- Run `docker-compose restart`
|
| 63 |
+
- No rebuilding needed!
|
| 64 |
+
|
| 65 |
+
## 📊 Usage Example
|
| 66 |
+
|
| 67 |
+
```bash
|
| 68 |
+
# Get playlists
|
| 69 |
+
curl http://your-server:6680/playlist/combined.m3u8
|
| 70 |
+
(/playlist/matches.m3u8 - Sports matches playlist)
|
| 71 |
+
(/playlist/channels.m3u8 - TV channels playlist)
|
| 72 |
+
|
| 73 |
+
# Manual scrape
|
| 74 |
+
curl -X POST http://your-server:6680/scrape-now
|
| 75 |
+
|
| 76 |
+
# Check status
|
| 77 |
+
curl http://your-server:6680/health
|
| 78 |
+
```
|
docker-compose.yml
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# docker-compose.yml
|
| 2 |
+
version: '3.8'
|
| 3 |
+
|
| 4 |
+
services:
|
| 5 |
+
fstv-proxy:
|
| 6 |
+
build: .
|
| 7 |
+
network_mode: "host"
|
| 8 |
+
volumes:
|
| 9 |
+
- ./src:/app
|
| 10 |
+
- ./mappings:/app/mappings
|
| 11 |
+
- ./playlists:/app/playlists
|
| 12 |
+
environment:
|
| 13 |
+
- PYTHONUNBUFFERED=1
|
| 14 |
+
- LOG_LEVEL=INFO
|
| 15 |
+
- TZ=UTC
|
| 16 |
+
restart: unless-stopped
|
| 17 |
+
container_name: fstv_proxy_server
|
requirements.txt
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# requirements.txt
|
| 2 |
+
fastapi==0.104.1
|
| 3 |
+
uvicorn[standard]==0.24.0
|
| 4 |
+
httpx==0.25.2
|
| 5 |
+
aiofiles==23.2.0
|
| 6 |
+
python-multipart==0.0.6
|
| 7 |
+
apscheduler==3.10.4
|
| 8 |
+
pytz==2023.3
|
src/__pycache__/main.cpython-311.pyc
ADDED
|
Binary file (30.7 kB). View file
|
|
|
src/main.py
ADDED
|
@@ -0,0 +1,690 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
FSTV Proxy Server with Integrated Scraper
|
| 4 |
+
- Handles encoded URLs for matches and TV channels
|
| 5 |
+
- Auto-scrapes FSTV at 12:05 AM daily
|
| 6 |
+
- Provides download endpoints for playlists and EPG
|
| 7 |
+
- Implements v3?v4 flower transformation
|
| 8 |
+
- Routes all streams through fast-fstv.duckdns.org:4123 proxy
|
| 9 |
+
- Server: http://fast-fstv.duckdns.org:6680
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
import json
|
| 13 |
+
import os
|
| 14 |
+
import re
|
| 15 |
+
import asyncio
|
| 16 |
+
import subprocess
|
| 17 |
+
import base64
|
| 18 |
+
from datetime import datetime, timezone
|
| 19 |
+
from urllib.parse import urljoin, urlparse, quote
|
| 20 |
+
from typing import Optional, Dict, Any
|
| 21 |
+
from contextlib import asynccontextmanager
|
| 22 |
+
|
| 23 |
+
import httpx
|
| 24 |
+
import uvicorn
|
| 25 |
+
from fastapi import FastAPI, HTTPException, Request
|
| 26 |
+
from fastapi.responses import Response, PlainTextResponse, FileResponse
|
| 27 |
+
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
| 28 |
+
import pytz
|
| 29 |
+
|
| 30 |
+
# Global variables
|
| 31 |
+
url_mappings: Dict[str, Dict[str, Any]] = {}
|
| 32 |
+
http_client: Optional[httpx.AsyncClient] = None
|
| 33 |
+
scheduler: Optional[AsyncIOScheduler] = None
|
| 34 |
+
last_scrape_info = {"status": "not_run", "timestamp": None, "mappings_count": 0}
|
| 35 |
+
|
| 36 |
+
# Configuration
|
| 37 |
+
FSTV_BASE_URL = "https://fstv.space"
|
| 38 |
+
USER_AGENT = "Mozilla/5.0 (X11; U; Linux x86_64; pl-PL; rv:2.0) Gecko/20110307 Firefox/4.0"
|
| 39 |
+
REQUEST_TIMEOUT = 15
|
| 40 |
+
SERVER_BASE_URL = "http://your-server:6680"
|
| 41 |
+
PROXY_SERVER = "http://m3u-playlist-server:4123"
|
| 42 |
+
|
| 43 |
+
# Scraping schedule - 12:05 AM daily
|
| 44 |
+
SCRAPE_TIMES = [
|
| 45 |
+
"05 00 * * *", # 12:05 AM - Right after midnight schedule refresh, add more if needed
|
| 46 |
+
]
|
| 47 |
+
|
| 48 |
+
@asynccontextmanager
|
| 49 |
+
async def lifespan(app: FastAPI):
|
| 50 |
+
"""Handle startup and shutdown events"""
|
| 51 |
+
global http_client, url_mappings, scheduler
|
| 52 |
+
|
| 53 |
+
# STARTUP
|
| 54 |
+
print("?? Starting FSTV Proxy Server...")
|
| 55 |
+
|
| 56 |
+
# Initialize HTTP client
|
| 57 |
+
http_client = httpx.AsyncClient(
|
| 58 |
+
headers={
|
| 59 |
+
"User-Agent": USER_AGENT,
|
| 60 |
+
"Referer": "https://fstv.space/",
|
| 61 |
+
"Origin": "https://fstv.space",
|
| 62 |
+
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
| 63 |
+
"Accept-Language": "en-US,en;q=0.5",
|
| 64 |
+
"Cache-Control": "no-cache"
|
| 65 |
+
},
|
| 66 |
+
timeout=REQUEST_TIMEOUT,
|
| 67 |
+
follow_redirects=True
|
| 68 |
+
)
|
| 69 |
+
|
| 70 |
+
# Load URL mappings
|
| 71 |
+
await load_url_mappings()
|
| 72 |
+
|
| 73 |
+
# Run initial scrape if no mappings found
|
| 74 |
+
if len(url_mappings) == 0:
|
| 75 |
+
print("?? No existing mappings found - running initial scrape...")
|
| 76 |
+
await run_automatic_scraper()
|
| 77 |
+
print(f"? Initial scrape complete - loaded {len(url_mappings)} mappings")
|
| 78 |
+
|
| 79 |
+
# Initialize scheduler
|
| 80 |
+
scheduler = AsyncIOScheduler(timezone=pytz.UTC)
|
| 81 |
+
|
| 82 |
+
# Add scraping jobs
|
| 83 |
+
for scrape_time in SCRAPE_TIMES:
|
| 84 |
+
minute, hour, day, month, day_of_week = scrape_time.split()
|
| 85 |
+
scheduler.add_job(
|
| 86 |
+
run_automatic_scraper,
|
| 87 |
+
'cron',
|
| 88 |
+
hour=int(hour),
|
| 89 |
+
minute=int(minute),
|
| 90 |
+
id=f"scraper_{hour}_{minute}"
|
| 91 |
+
)
|
| 92 |
+
print(f"? Scheduled scraper for {hour}:{minute:0>2} UTC daily")
|
| 93 |
+
|
| 94 |
+
scheduler.start()
|
| 95 |
+
|
| 96 |
+
print(f"? Server initialized")
|
| 97 |
+
print(f" ?? {len(url_mappings)} URL mappings loaded")
|
| 98 |
+
print(f" ?? Auto-scraper scheduled for 12:05 AM UTC daily")
|
| 99 |
+
print(f" ?? Server URL: {SERVER_BASE_URL}")
|
| 100 |
+
|
| 101 |
+
yield
|
| 102 |
+
|
| 103 |
+
# SHUTDOWN
|
| 104 |
+
if scheduler:
|
| 105 |
+
scheduler.shutdown()
|
| 106 |
+
|
| 107 |
+
if http_client:
|
| 108 |
+
await http_client.aclose()
|
| 109 |
+
|
| 110 |
+
print("?? FSTV Proxy Server shut down")
|
| 111 |
+
|
| 112 |
+
app = FastAPI(
|
| 113 |
+
title="FSTV Proxy Server",
|
| 114 |
+
description="Complete IPTV proxy server for FSTV sports and TV channels",
|
| 115 |
+
version="2.0.0",
|
| 116 |
+
lifespan=lifespan
|
| 117 |
+
)
|
| 118 |
+
|
| 119 |
+
async def load_url_mappings():
|
| 120 |
+
"""Load URL mappings from JSON files"""
|
| 121 |
+
global url_mappings
|
| 122 |
+
|
| 123 |
+
url_mappings = {}
|
| 124 |
+
mapping_files = [
|
| 125 |
+
"/app/mappings/url_mappings_matches.json",
|
| 126 |
+
"/app/mappings/url_mappings_channels.json"
|
| 127 |
+
]
|
| 128 |
+
|
| 129 |
+
for file_path in mapping_files:
|
| 130 |
+
if os.path.exists(file_path):
|
| 131 |
+
try:
|
| 132 |
+
with open(file_path, 'r', encoding='utf-8') as f:
|
| 133 |
+
file_mappings = json.load(f)
|
| 134 |
+
url_mappings.update(file_mappings)
|
| 135 |
+
print(f"? Loaded {len(file_mappings)} mappings from {os.path.basename(file_path)}")
|
| 136 |
+
except Exception as e:
|
| 137 |
+
print(f"? Failed to load {file_path}: {e}")
|
| 138 |
+
else:
|
| 139 |
+
print(f"?? Mapping file not found: {file_path}")
|
| 140 |
+
|
| 141 |
+
async def run_automatic_scraper():
|
| 142 |
+
"""Run the scraper automatically"""
|
| 143 |
+
global last_scrape_info
|
| 144 |
+
|
| 145 |
+
print("?? Running automatic scraper...")
|
| 146 |
+
|
| 147 |
+
try:
|
| 148 |
+
# Run the scraper
|
| 149 |
+
result = subprocess.run(
|
| 150 |
+
["python", "/app/scraper.py"],
|
| 151 |
+
capture_output=True,
|
| 152 |
+
text=True,
|
| 153 |
+
cwd="/app"
|
| 154 |
+
)
|
| 155 |
+
|
| 156 |
+
if result.returncode == 0:
|
| 157 |
+
print("? Automatic scraper completed successfully")
|
| 158 |
+
await load_url_mappings()
|
| 159 |
+
|
| 160 |
+
last_scrape_info = {
|
| 161 |
+
"status": "success",
|
| 162 |
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
| 163 |
+
"mappings_count": len(url_mappings),
|
| 164 |
+
"output": result.stdout[-500:] if result.stdout else "" # Last 500 chars
|
| 165 |
+
}
|
| 166 |
+
else:
|
| 167 |
+
print(f"? Scraper failed with return code {result.returncode}")
|
| 168 |
+
print(f"Error: {result.stderr}")
|
| 169 |
+
|
| 170 |
+
last_scrape_info = {
|
| 171 |
+
"status": "error",
|
| 172 |
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
| 173 |
+
"error": result.stderr,
|
| 174 |
+
"return_code": result.returncode
|
| 175 |
+
}
|
| 176 |
+
|
| 177 |
+
except Exception as e:
|
| 178 |
+
print(f"? Error running automatic scraper: {e}")
|
| 179 |
+
last_scrape_info = {
|
| 180 |
+
"status": "exception",
|
| 181 |
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
| 182 |
+
"error": str(e)
|
| 183 |
+
}
|
| 184 |
+
|
| 185 |
+
# ============================================================================
|
| 186 |
+
# STREAMING HELPER FUNCTIONS
|
| 187 |
+
# ============================================================================
|
| 188 |
+
|
| 189 |
+
def encode_headers_for_proxy(headers: dict) -> str:
|
| 190 |
+
"""Encode headers as base64 for proxy server"""
|
| 191 |
+
try:
|
| 192 |
+
# Convert headers to pipe-separated format
|
| 193 |
+
header_parts = []
|
| 194 |
+
for key, value in headers.items():
|
| 195 |
+
header_parts.append(f"{key}={value}")
|
| 196 |
+
|
| 197 |
+
header_string = "|".join(header_parts)
|
| 198 |
+
# Base64 encode
|
| 199 |
+
encoded = base64.b64encode(header_string.encode('utf-8')).decode('utf-8')
|
| 200 |
+
return encoded
|
| 201 |
+
except Exception as e:
|
| 202 |
+
print(f"? Error encoding headers: {e}")
|
| 203 |
+
return ""
|
| 204 |
+
|
| 205 |
+
async def fetch_v3_to_v4_url(v3_url: str) -> Optional[str]:
|
| 206 |
+
"""Fetch v3 URL and extract v4 variant URL"""
|
| 207 |
+
try:
|
| 208 |
+
print(f" ?? Fetching v3 master: {v3_url}")
|
| 209 |
+
|
| 210 |
+
response = await http_client.get(v3_url)
|
| 211 |
+
if response.status_code != 200:
|
| 212 |
+
print(f" ? HTTP {response.status_code} - Failed to fetch v3 master")
|
| 213 |
+
return None
|
| 214 |
+
|
| 215 |
+
content = response.text
|
| 216 |
+
|
| 217 |
+
# Extract v4 path from master playlist
|
| 218 |
+
v4_path = extract_v4_path_from_playlist(content)
|
| 219 |
+
if not v4_path:
|
| 220 |
+
print(f" ? No v4 path found in master playlist")
|
| 221 |
+
return None
|
| 222 |
+
|
| 223 |
+
# Build complete v4 URL
|
| 224 |
+
parsed_v3 = urlparse(v3_url)
|
| 225 |
+
base_url = f"{parsed_v3.scheme}://{parsed_v3.netloc}"
|
| 226 |
+
v4_url = base_url + v4_path
|
| 227 |
+
|
| 228 |
+
print(f" ? Extracted v4 URL: {v4_url}")
|
| 229 |
+
return v4_url
|
| 230 |
+
|
| 231 |
+
except Exception as e:
|
| 232 |
+
print(f" ? Error fetching v3 URL: {e}")
|
| 233 |
+
return None
|
| 234 |
+
|
| 235 |
+
def extract_v4_path_from_playlist(playlist_content: str) -> Optional[str]:
|
| 236 |
+
"""Extract v4 path from master playlist"""
|
| 237 |
+
try:
|
| 238 |
+
lines = playlist_content.split('\n')
|
| 239 |
+
best_bandwidth = 0
|
| 240 |
+
best_path = None
|
| 241 |
+
|
| 242 |
+
for i, line in enumerate(lines):
|
| 243 |
+
line = line.strip()
|
| 244 |
+
|
| 245 |
+
if line.startswith('#EXT-X-STREAM-INF'):
|
| 246 |
+
# Extract bandwidth
|
| 247 |
+
bandwidth_match = re.search(r'BANDWIDTH=(\d+)', line)
|
| 248 |
+
if bandwidth_match:
|
| 249 |
+
bandwidth = int(bandwidth_match.group(1))
|
| 250 |
+
|
| 251 |
+
# Get the next line which should be the path
|
| 252 |
+
if i + 1 < len(lines):
|
| 253 |
+
path_line = lines[i + 1].strip()
|
| 254 |
+
if path_line and not path_line.startswith('#'):
|
| 255 |
+
# Look for v4 paths specifically
|
| 256 |
+
if '/v4/' in path_line:
|
| 257 |
+
if bandwidth > best_bandwidth:
|
| 258 |
+
best_bandwidth = bandwidth
|
| 259 |
+
best_path = path_line
|
| 260 |
+
|
| 261 |
+
if best_path:
|
| 262 |
+
print(f" ?? Best v4 path: {best_bandwidth} bps -> {best_path}")
|
| 263 |
+
return best_path
|
| 264 |
+
else:
|
| 265 |
+
print(f" ?? No v4 paths found in playlist")
|
| 266 |
+
return None
|
| 267 |
+
|
| 268 |
+
except Exception as e:
|
| 269 |
+
print(f" ? Error extracting v4 path: {e}")
|
| 270 |
+
return None
|
| 271 |
+
|
| 272 |
+
async def proxy_stream_through_4123(stream_url: str, headers: dict) -> Optional[str]:
|
| 273 |
+
try:
|
| 274 |
+
encoded_url = quote(stream_url, safe='')
|
| 275 |
+
encoded_headers = encode_headers_for_proxy(headers)
|
| 276 |
+
proxy_url = f"{PROXY_SERVER}?url={encoded_url}&data={encoded_headers}"
|
| 277 |
+
|
| 278 |
+
# Browser-like headers for 4123 server
|
| 279 |
+
browser_headers = {
|
| 280 |
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
| 281 |
+
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
|
| 282 |
+
"Accept-Language": "en-US,en;q=0.5",
|
| 283 |
+
"Accept-Encoding": "gzip, deflate",
|
| 284 |
+
"DNT": "1",
|
| 285 |
+
"Connection": "keep-alive",
|
| 286 |
+
"Upgrade-Insecure-Requests": "1"
|
| 287 |
+
}
|
| 288 |
+
|
| 289 |
+
response = await http_client.get(proxy_url, headers=browser_headers)
|
| 290 |
+
|
| 291 |
+
if response.status_code == 200:
|
| 292 |
+
print(f" ? Proxy returned HLS content")
|
| 293 |
+
return response.text
|
| 294 |
+
else:
|
| 295 |
+
print(f" ? Proxy failed: HTTP {response.status_code}")
|
| 296 |
+
print(f" ?? Response: {response.text[:200]}")
|
| 297 |
+
return None
|
| 298 |
+
|
| 299 |
+
except Exception as e:
|
| 300 |
+
print(f" ? Error proxying stream: {e}")
|
| 301 |
+
return None
|
| 302 |
+
|
| 303 |
+
async def extract_streaming_urls_from_html(html_content: str) -> list:
|
| 304 |
+
"""Extract streaming URLs from match page HTML"""
|
| 305 |
+
streaming_urls = []
|
| 306 |
+
|
| 307 |
+
# Pattern 1: Look for direct .m3u8 URLs
|
| 308 |
+
m3u8_pattern = r'https://[^"\'\s<>]+\.m3u8[^"\'\s<>]*'
|
| 309 |
+
m3u8_matches = re.findall(m3u8_pattern, html_content)
|
| 310 |
+
|
| 311 |
+
for url in m3u8_matches:
|
| 312 |
+
url = url.strip()
|
| 313 |
+
if is_valid_streaming_url(url):
|
| 314 |
+
streaming_urls.append(url)
|
| 315 |
+
|
| 316 |
+
# Pattern 2: Look for JavaScript variables
|
| 317 |
+
js_url_pattern = r'["\']https://[^"\'<>]+\.m3u8[^"\'<>]*["\']'
|
| 318 |
+
js_matches = re.findall(js_url_pattern, html_content)
|
| 319 |
+
|
| 320 |
+
for match in js_matches:
|
| 321 |
+
url = match.strip('"\'')
|
| 322 |
+
if is_valid_streaming_url(url) and url not in streaming_urls:
|
| 323 |
+
streaming_urls.append(url)
|
| 324 |
+
|
| 325 |
+
return streaming_urls
|
| 326 |
+
|
| 327 |
+
def is_valid_streaming_url(url: str) -> bool:
|
| 328 |
+
"""Check if URL looks like a valid streaming URL"""
|
| 329 |
+
try:
|
| 330 |
+
parsed = urlparse(url)
|
| 331 |
+
if not parsed.scheme or not parsed.netloc:
|
| 332 |
+
return False
|
| 333 |
+
|
| 334 |
+
if not url.endswith('.m3u8'):
|
| 335 |
+
return False
|
| 336 |
+
|
| 337 |
+
bad_patterns = ['javascript:', 'data:', 'blob:', 'about:']
|
| 338 |
+
for pattern in bad_patterns:
|
| 339 |
+
if url.lower().startswith(pattern):
|
| 340 |
+
return False
|
| 341 |
+
|
| 342 |
+
return True
|
| 343 |
+
except:
|
| 344 |
+
return False
|
| 345 |
+
|
| 346 |
+
# ============================================================================
|
| 347 |
+
# API ENDPOINTS
|
| 348 |
+
# ============================================================================
|
| 349 |
+
|
| 350 |
+
@app.get("/")
|
| 351 |
+
async def root():
|
| 352 |
+
"""Root endpoint with server info"""
|
| 353 |
+
return {
|
| 354 |
+
"service": "FSTV Proxy Server",
|
| 355 |
+
"version": "2.0.0",
|
| 356 |
+
"status": "running",
|
| 357 |
+
"server_url": SERVER_BASE_URL,
|
| 358 |
+
"mappings_loaded": len(url_mappings),
|
| 359 |
+
"last_scrape": last_scrape_info,
|
| 360 |
+
"endpoints": {
|
| 361 |
+
"streaming": [
|
| 362 |
+
"/match/{encoded_id}.m3u8",
|
| 363 |
+
"/channel/{encoded_id}.m3u8"
|
| 364 |
+
],
|
| 365 |
+
"downloads": [
|
| 366 |
+
"/playlist/matches.m3u8",
|
| 367 |
+
"/playlist/channels.m3u8",
|
| 368 |
+
"/playlist/combined.m3u8",
|
| 369 |
+
"/epg/matches.xml"
|
| 370 |
+
],
|
| 371 |
+
"control": [
|
| 372 |
+
"/scrape-now",
|
| 373 |
+
"/scrape-status",
|
| 374 |
+
"/health",
|
| 375 |
+
"/stats"
|
| 376 |
+
]
|
| 377 |
+
}
|
| 378 |
+
}
|
| 379 |
+
|
| 380 |
+
@app.get("/health")
|
| 381 |
+
async def health_check():
|
| 382 |
+
"""Health check endpoint"""
|
| 383 |
+
return {
|
| 384 |
+
"status": "healthy",
|
| 385 |
+
"timestamp": datetime.utcnow().isoformat(),
|
| 386 |
+
"mappings_count": len(url_mappings),
|
| 387 |
+
"last_scrape_status": last_scrape_info.get("status", "unknown")
|
| 388 |
+
}
|
| 389 |
+
|
| 390 |
+
@app.get("/stats")
|
| 391 |
+
async def get_stats():
|
| 392 |
+
"""Get detailed server statistics"""
|
| 393 |
+
match_count = sum(1 for mapping in url_mappings.values() if mapping.get('type') == 'match')
|
| 394 |
+
channel_count = sum(1 for mapping in url_mappings.values() if mapping.get('type') == 'channel')
|
| 395 |
+
|
| 396 |
+
return {
|
| 397 |
+
"total_mappings": len(url_mappings),
|
| 398 |
+
"matches": match_count,
|
| 399 |
+
"channels": channel_count,
|
| 400 |
+
"server_time": datetime.utcnow().isoformat(),
|
| 401 |
+
"last_scrape": last_scrape_info,
|
| 402 |
+
"scheduled_scrapes": [
|
| 403 |
+
{"time": "00:05 UTC", "description": "Daily automatic scrape"}
|
| 404 |
+
]
|
| 405 |
+
}
|
| 406 |
+
|
| 407 |
+
# ============================================================================
|
| 408 |
+
# DOWNLOAD ENDPOINTS
|
| 409 |
+
# ============================================================================
|
| 410 |
+
|
| 411 |
+
@app.get("/playlist/matches.m3u8")
|
| 412 |
+
async def download_matches_playlist():
|
| 413 |
+
"""Download matches M3U playlist"""
|
| 414 |
+
file_path = "/app/playlists/fstv_matches_encoded.m3u"
|
| 415 |
+
|
| 416 |
+
if not os.path.exists(file_path):
|
| 417 |
+
raise HTTPException(status_code=404, detail="Matches playlist not found. Run scraper first.")
|
| 418 |
+
|
| 419 |
+
return FileResponse(
|
| 420 |
+
path=file_path,
|
| 421 |
+
media_type="application/vnd.apple.mpegurl",
|
| 422 |
+
filename="fstv_matches.m3u"
|
| 423 |
+
)
|
| 424 |
+
|
| 425 |
+
@app.get("/playlist/channels.m3u8")
|
| 426 |
+
async def download_channels_playlist():
|
| 427 |
+
"""Download TV channels M3U playlist"""
|
| 428 |
+
file_path = "/app/playlists/fstv_tv_channels_encoded.m3u"
|
| 429 |
+
|
| 430 |
+
if not os.path.exists(file_path):
|
| 431 |
+
raise HTTPException(status_code=404, detail="Channels playlist not found. Run scraper first.")
|
| 432 |
+
|
| 433 |
+
return FileResponse(
|
| 434 |
+
path=file_path,
|
| 435 |
+
media_type="application/vnd.apple.mpegurl",
|
| 436 |
+
filename="fstv_channels.m3u"
|
| 437 |
+
)
|
| 438 |
+
|
| 439 |
+
@app.get("/playlist/combined.m3u8")
|
| 440 |
+
async def download_combined_playlist():
|
| 441 |
+
"""Download combined matches + channels playlist"""
|
| 442 |
+
try:
|
| 443 |
+
combined_content = "#EXTM3U url-tvg=\"http://fast-fstv.duckdns.org:6680/epg/matches.xml\"\n"
|
| 444 |
+
|
| 445 |
+
# Add matches
|
| 446 |
+
matches_file = "/app/playlists/fstv_matches_encoded.m3u"
|
| 447 |
+
if os.path.exists(matches_file):
|
| 448 |
+
with open(matches_file, 'r', encoding='utf-8') as f:
|
| 449 |
+
content = f.read()
|
| 450 |
+
# Skip the #EXTM3U line and add the rest
|
| 451 |
+
lines = content.split('\n')[1:]
|
| 452 |
+
combined_content += '\n'.join(lines) + '\n'
|
| 453 |
+
|
| 454 |
+
# Add channels
|
| 455 |
+
channels_file = "/app/playlists/fstv_tv_channels_encoded.m3u"
|
| 456 |
+
if os.path.exists(channels_file):
|
| 457 |
+
with open(channels_file, 'r', encoding='utf-8') as f:
|
| 458 |
+
content = f.read()
|
| 459 |
+
# Skip the #EXTM3U line and add the rest
|
| 460 |
+
lines = content.split('\n')[1:]
|
| 461 |
+
combined_content += '\n'.join(lines) + '\n'
|
| 462 |
+
|
| 463 |
+
return PlainTextResponse(
|
| 464 |
+
content=combined_content,
|
| 465 |
+
media_type="application/vnd.apple.mpegurl",
|
| 466 |
+
headers={"Content-Disposition": "attachment; filename=fstv_combined.m3u"}
|
| 467 |
+
)
|
| 468 |
+
|
| 469 |
+
except Exception as e:
|
| 470 |
+
raise HTTPException(status_code=500, detail=f"Failed to generate combined playlist: {str(e)}")
|
| 471 |
+
|
| 472 |
+
@app.get("/epg/matches.xml")
|
| 473 |
+
async def download_matches_epg():
|
| 474 |
+
"""Download matches EPG/XMLTV file"""
|
| 475 |
+
file_path = "/app/playlists/fstv_matches_encoded.xml"
|
| 476 |
+
|
| 477 |
+
if not os.path.exists(file_path):
|
| 478 |
+
raise HTTPException(status_code=404, detail="EPG file not found. Run scraper first.")
|
| 479 |
+
|
| 480 |
+
return FileResponse(
|
| 481 |
+
path=file_path,
|
| 482 |
+
media_type="application/xml",
|
| 483 |
+
filename="fstv_epg.xml"
|
| 484 |
+
)
|
| 485 |
+
|
| 486 |
+
# ============================================================================
|
| 487 |
+
# SCRAPER CONTROL ENDPOINTS
|
| 488 |
+
# ============================================================================
|
| 489 |
+
|
| 490 |
+
@app.post("/scrape-now")
|
| 491 |
+
async def manual_scrape():
|
| 492 |
+
"""Manually trigger scraper"""
|
| 493 |
+
print("?? Manual scrape triggered...")
|
| 494 |
+
|
| 495 |
+
try:
|
| 496 |
+
result = subprocess.run(
|
| 497 |
+
["python", "/app/scraper.py"],
|
| 498 |
+
capture_output=True,
|
| 499 |
+
text=True,
|
| 500 |
+
cwd="/app"
|
| 501 |
+
)
|
| 502 |
+
|
| 503 |
+
if result.returncode == 0:
|
| 504 |
+
await load_url_mappings()
|
| 505 |
+
|
| 506 |
+
return {
|
| 507 |
+
"status": "success",
|
| 508 |
+
"message": f"Scraper completed. Loaded {len(url_mappings)} mappings.",
|
| 509 |
+
"timestamp": datetime.utcnow().isoformat(),
|
| 510 |
+
"output": result.stdout[-1000:] if result.stdout else ""
|
| 511 |
+
}
|
| 512 |
+
else:
|
| 513 |
+
return {
|
| 514 |
+
"status": "error",
|
| 515 |
+
"message": "Scraper failed",
|
| 516 |
+
"error": result.stderr,
|
| 517 |
+
"return_code": result.returncode,
|
| 518 |
+
"timestamp": datetime.utcnow().isoformat()
|
| 519 |
+
}
|
| 520 |
+
|
| 521 |
+
except Exception as e:
|
| 522 |
+
return {
|
| 523 |
+
"status": "exception",
|
| 524 |
+
"message": str(e),
|
| 525 |
+
"timestamp": datetime.utcnow().isoformat()
|
| 526 |
+
}
|
| 527 |
+
|
| 528 |
+
@app.get("/scrape-status")
|
| 529 |
+
async def get_scrape_status():
|
| 530 |
+
"""Get last scrape status and info"""
|
| 531 |
+
return last_scrape_info
|
| 532 |
+
|
| 533 |
+
# ============================================================================
|
| 534 |
+
# STREAMING ENDPOINTS
|
| 535 |
+
# ============================================================================
|
| 536 |
+
|
| 537 |
+
@app.get("/match/{encoded_id}.m3u8")
|
| 538 |
+
async def get_match_stream(encoded_id: str, request: Request):
|
| 539 |
+
"""Handle match stream requests with v3?v4?4123 flow"""
|
| 540 |
+
print(f"?? Match request: {encoded_id} from {request.client.host}")
|
| 541 |
+
|
| 542 |
+
# Look up the encoded ID
|
| 543 |
+
if encoded_id not in url_mappings:
|
| 544 |
+
print(f"? Match ID not found: {encoded_id}")
|
| 545 |
+
raise HTTPException(status_code=404, detail="Match not found")
|
| 546 |
+
|
| 547 |
+
mapping = url_mappings[encoded_id]
|
| 548 |
+
|
| 549 |
+
if mapping.get('type') != 'match':
|
| 550 |
+
print(f"? Invalid type for match request: {mapping.get('type')}")
|
| 551 |
+
raise HTTPException(status_code=400, detail="Invalid match ID")
|
| 552 |
+
|
| 553 |
+
fstv_path = mapping.get('fstv_path')
|
| 554 |
+
if not fstv_path:
|
| 555 |
+
print(f"? No FSTV path found for: {encoded_id}")
|
| 556 |
+
raise HTTPException(status_code=500, detail="Invalid mapping data")
|
| 557 |
+
|
| 558 |
+
try:
|
| 559 |
+
# Step 1: Fetch FSTV match page to extract v3 URL
|
| 560 |
+
match_url = FSTV_BASE_URL + fstv_path
|
| 561 |
+
print(f" ??? Fetching match page: {match_url}")
|
| 562 |
+
|
| 563 |
+
response = await http_client.get(match_url)
|
| 564 |
+
if response.status_code == 404:
|
| 565 |
+
print(f" ?? Match page not found (404) - may not be live yet")
|
| 566 |
+
raise HTTPException(status_code=503, detail="Match not available yet")
|
| 567 |
+
|
| 568 |
+
if response.status_code != 200:
|
| 569 |
+
print(f" ? HTTP {response.status_code} - Failed to fetch match page")
|
| 570 |
+
raise HTTPException(status_code=503, detail="Match page unavailable")
|
| 571 |
+
|
| 572 |
+
# Step 2: Extract v3 streaming URLs from page
|
| 573 |
+
streaming_urls = await extract_streaming_urls_from_html(response.text)
|
| 574 |
+
if not streaming_urls:
|
| 575 |
+
print(f" ?? No streaming URLs found in match page")
|
| 576 |
+
raise HTTPException(status_code=503, detail="No stream available")
|
| 577 |
+
|
| 578 |
+
v3_url = streaming_urls[0] # Use first found URL
|
| 579 |
+
print(f" ? Found v3 URL: {v3_url}")
|
| 580 |
+
|
| 581 |
+
# Step 3: Transform v3 ? v4
|
| 582 |
+
v4_url = await fetch_v3_to_v4_url(v3_url)
|
| 583 |
+
if not v4_url:
|
| 584 |
+
print(f" ? Failed to get v4 URL")
|
| 585 |
+
raise HTTPException(status_code=503, detail="Stream transformation failed")
|
| 586 |
+
|
| 587 |
+
# Step 4: Proxy through 4123 with headers
|
| 588 |
+
headers = {
|
| 589 |
+
"Referer": "https://fstv.space",
|
| 590 |
+
"Origin": "https://fstv.space",
|
| 591 |
+
"User-Agent": USER_AGENT
|
| 592 |
+
}
|
| 593 |
+
|
| 594 |
+
hls_content = await proxy_stream_through_4123(v4_url, headers)
|
| 595 |
+
if not hls_content:
|
| 596 |
+
print(f" ? Failed to proxy stream through 4123")
|
| 597 |
+
raise HTTPException(status_code=503, detail="Stream proxy failed")
|
| 598 |
+
|
| 599 |
+
print(f"? Match stream delivered: {encoded_id}")
|
| 600 |
+
return PlainTextResponse(
|
| 601 |
+
content=hls_content,
|
| 602 |
+
media_type="application/vnd.apple.mpegurl",
|
| 603 |
+
headers={
|
| 604 |
+
"Cache-Control": "no-cache, no-store, must-revalidate",
|
| 605 |
+
"Pragma": "no-cache",
|
| 606 |
+
"Expires": "0"
|
| 607 |
+
}
|
| 608 |
+
)
|
| 609 |
+
|
| 610 |
+
except HTTPException:
|
| 611 |
+
raise
|
| 612 |
+
except Exception as e:
|
| 613 |
+
print(f"? Error processing match {encoded_id}: {e}")
|
| 614 |
+
raise HTTPException(status_code=500, detail="Stream processing failed")
|
| 615 |
+
|
| 616 |
+
@app.get("/channel/{encoded_id}.m3u8")
|
| 617 |
+
async def get_channel_stream(encoded_id: str, request: Request):
|
| 618 |
+
"""Handle TV channel stream requests with v3?v4?4123 flow"""
|
| 619 |
+
print(f"?? Channel request: {encoded_id} from {request.client.host}")
|
| 620 |
+
|
| 621 |
+
# Look up the encoded ID
|
| 622 |
+
if encoded_id not in url_mappings:
|
| 623 |
+
print(f"? Channel ID not found: {encoded_id}")
|
| 624 |
+
raise HTTPException(status_code=404, detail="Channel not found")
|
| 625 |
+
|
| 626 |
+
mapping = url_mappings[encoded_id]
|
| 627 |
+
|
| 628 |
+
if mapping.get('type') != 'channel':
|
| 629 |
+
print(f"? Invalid type for channel request: {mapping.get('type')}")
|
| 630 |
+
raise HTTPException(status_code=400, detail="Invalid channel ID")
|
| 631 |
+
|
| 632 |
+
original_stream_url = mapping.get('original_stream_url')
|
| 633 |
+
if not original_stream_url:
|
| 634 |
+
print(f"? No stream URL found for: {encoded_id}")
|
| 635 |
+
raise HTTPException(status_code=500, detail="Invalid mapping data")
|
| 636 |
+
|
| 637 |
+
try:
|
| 638 |
+
# Step 1: We already have the v3 URL from channel mapping
|
| 639 |
+
v3_url = original_stream_url
|
| 640 |
+
print(f" ?? Channel v3 URL: {v3_url}")
|
| 641 |
+
|
| 642 |
+
# Step 2: Transform v3 ? v4
|
| 643 |
+
v4_url = await fetch_v3_to_v4_url(v3_url)
|
| 644 |
+
if not v4_url:
|
| 645 |
+
print(f" ? Failed to get v4 URL for channel")
|
| 646 |
+
raise HTTPException(status_code=503, detail="Channel transformation failed")
|
| 647 |
+
|
| 648 |
+
# Step 3: Proxy through 4123 with headers
|
| 649 |
+
headers = {
|
| 650 |
+
"Referer": "https://fstv.space",
|
| 651 |
+
"Origin": "https://fstv.space",
|
| 652 |
+
"User-Agent": USER_AGENT
|
| 653 |
+
}
|
| 654 |
+
|
| 655 |
+
hls_content = await proxy_stream_through_4123(v4_url, headers)
|
| 656 |
+
if not hls_content:
|
| 657 |
+
print(f" ? Failed to proxy channel through 4123")
|
| 658 |
+
raise HTTPException(status_code=503, detail="Channel proxy failed")
|
| 659 |
+
|
| 660 |
+
print(f"? Channel stream delivered: {encoded_id}")
|
| 661 |
+
return PlainTextResponse(
|
| 662 |
+
content=hls_content,
|
| 663 |
+
media_type="application/vnd.apple.mpegurl",
|
| 664 |
+
headers={
|
| 665 |
+
"Cache-Control": "no-cache, no-store, must-revalidate",
|
| 666 |
+
"Pragma": "no-cache",
|
| 667 |
+
"Expires": "0"
|
| 668 |
+
}
|
| 669 |
+
)
|
| 670 |
+
|
| 671 |
+
except HTTPException:
|
| 672 |
+
raise
|
| 673 |
+
except Exception as e:
|
| 674 |
+
print(f"? Error processing channel {encoded_id}: {e}")
|
| 675 |
+
raise HTTPException(status_code=500, detail="Channel processing failed")
|
| 676 |
+
|
| 677 |
+
if __name__ == "__main__":
|
| 678 |
+
print("?? Starting FSTV Proxy Server on port 6680...")
|
| 679 |
+
print("?? A CanBert ENT / Creation")
|
| 680 |
+
print("?? Playlist downloads available at /playlist/ endpoints")
|
| 681 |
+
print("?? Auto-scraper scheduled for 12:05 AM UTC daily, add more as needed.")
|
| 682 |
+
print("?? All streams proxy through m3u playlist proxy:4123")
|
| 683 |
+
|
| 684 |
+
uvicorn.run(
|
| 685 |
+
"main:app",
|
| 686 |
+
host="0.0.0.0",
|
| 687 |
+
port=6680,
|
| 688 |
+
reload=True,
|
| 689 |
+
log_level="info"
|
| 690 |
+
)
|
src/scraper.py
ADDED
|
@@ -0,0 +1,949 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Combined FSTV Live Scraper
|
| 4 |
+
- Fetches LIVE data from FSTV endpoints
|
| 5 |
+
- Scrapes both sports matches and TV channels
|
| 6 |
+
- Finds ALL matches (live, upcoming, finished) from all sections
|
| 7 |
+
- Generates encoded URLs with database mapping
|
| 8 |
+
- Outputs M3U playlists and EPG files
|
| 9 |
+
- Designed for automated daily execution at 12:05 AM
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
import re
|
| 13 |
+
import os
|
| 14 |
+
import json
|
| 15 |
+
import random
|
| 16 |
+
import string
|
| 17 |
+
import asyncio
|
| 18 |
+
import base64
|
| 19 |
+
from datetime import datetime, timezone, timedelta
|
| 20 |
+
import xml.etree.ElementTree as ET
|
| 21 |
+
from html import unescape
|
| 22 |
+
import pytz
|
| 23 |
+
import httpx
|
| 24 |
+
|
| 25 |
+
class CombinedFSTVScraper:
|
| 26 |
+
"""
|
| 27 |
+
Combined live scraper for FSTV matches and TV channels
|
| 28 |
+
"""
|
| 29 |
+
|
| 30 |
+
def __init__(self, debug=True, proxy_server="http://fast-fstv.duckdns.org:6680"):
|
| 31 |
+
self.debug = debug
|
| 32 |
+
self.mappings_dir = "/app/mappings"
|
| 33 |
+
self.playlists_dir = "/app/playlists"
|
| 34 |
+
self.proxy_server = proxy_server.rstrip('/')
|
| 35 |
+
|
| 36 |
+
# Timezone handling
|
| 37 |
+
self.api_timezone = pytz.timezone('US/Eastern')
|
| 38 |
+
self.utc = pytz.UTC
|
| 39 |
+
|
| 40 |
+
# HTTP client for fetching live data
|
| 41 |
+
self.http_client = None
|
| 42 |
+
|
| 43 |
+
# URL mappings
|
| 44 |
+
self.match_mappings = {}
|
| 45 |
+
self.channel_mappings = {}
|
| 46 |
+
|
| 47 |
+
# Stats
|
| 48 |
+
self.stats = {
|
| 49 |
+
"matches_found": 0,
|
| 50 |
+
"channels_found": 0,
|
| 51 |
+
"encoded_urls_generated": 0,
|
| 52 |
+
"files_generated": 0,
|
| 53 |
+
"http_requests": 0
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
# Base URL
|
| 57 |
+
self.base_url = "https://fstv.space"
|
| 58 |
+
|
| 59 |
+
async def init_http_client(self):
|
| 60 |
+
"""Initialize HTTP client for FSTV requests"""
|
| 61 |
+
self.http_client = httpx.AsyncClient(
|
| 62 |
+
headers={
|
| 63 |
+
"User-Agent": "Mozilla/5.0 (X11; U; Linux x86_64; pl-PL; rv:2.0) Gecko/20110307 Firefox/4.0",
|
| 64 |
+
"Referer": "https://fstv.space/",
|
| 65 |
+
"Origin": "https://fstv.space",
|
| 66 |
+
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
| 67 |
+
"Accept-Language": "en-US,en;q=0.5",
|
| 68 |
+
"Cache-Control": "no-cache"
|
| 69 |
+
},
|
| 70 |
+
timeout=30.0,
|
| 71 |
+
follow_redirects=True
|
| 72 |
+
)
|
| 73 |
+
|
| 74 |
+
async def close_http_client(self):
|
| 75 |
+
"""Close HTTP client"""
|
| 76 |
+
if self.http_client:
|
| 77 |
+
await self.http_client.aclose()
|
| 78 |
+
|
| 79 |
+
def generate_encoded_id(self, length=8):
|
| 80 |
+
"""Generate random encoded ID"""
|
| 81 |
+
characters = string.ascii_lowercase + string.digits
|
| 82 |
+
return ''.join(random.choice(characters) for _ in range(length))
|
| 83 |
+
|
| 84 |
+
def ensure_unique_id(self, existing_mappings):
|
| 85 |
+
"""Generate unique ID across all mappings"""
|
| 86 |
+
all_mappings = {**self.match_mappings, **self.channel_mappings, **existing_mappings}
|
| 87 |
+
while True:
|
| 88 |
+
encoded_id = self.generate_encoded_id()
|
| 89 |
+
if encoded_id not in all_mappings:
|
| 90 |
+
return encoded_id
|
| 91 |
+
|
| 92 |
+
async def fetch_live_data(self):
|
| 93 |
+
"""Fetch LIVE data from FSTV endpoints"""
|
| 94 |
+
print("?? Fetching LIVE data from FSTV...")
|
| 95 |
+
|
| 96 |
+
try:
|
| 97 |
+
# Fetch main page (sports matches)
|
| 98 |
+
print(" ?? Fetching sports matches from https://fstv.space")
|
| 99 |
+
matches_response = await self.http_client.get("https://fstv.space")
|
| 100 |
+
self.stats["http_requests"] += 1
|
| 101 |
+
|
| 102 |
+
if matches_response.status_code == 200:
|
| 103 |
+
self.matches_html = matches_response.text
|
| 104 |
+
print(f" ? Sports data: {len(self.matches_html):,} characters")
|
| 105 |
+
else:
|
| 106 |
+
print(f" ? Failed to fetch sports data: HTTP {matches_response.status_code}")
|
| 107 |
+
self.matches_html = None
|
| 108 |
+
|
| 109 |
+
# Fetch TV channels page
|
| 110 |
+
print(" ?? Fetching TV channels from https://fstv.space/live-tv.html")
|
| 111 |
+
channels_response = await self.http_client.get("https://fstv.space/live-tv.html")
|
| 112 |
+
self.stats["http_requests"] += 1
|
| 113 |
+
|
| 114 |
+
if channels_response.status_code == 200:
|
| 115 |
+
self.channels_html = channels_response.text
|
| 116 |
+
print(f" ? Channels data: {len(self.channels_html):,} characters")
|
| 117 |
+
else:
|
| 118 |
+
print(f" ? Failed to fetch channels data: HTTP {channels_response.status_code}")
|
| 119 |
+
self.channels_html = None
|
| 120 |
+
|
| 121 |
+
return self.matches_html is not None or self.channels_html is not None
|
| 122 |
+
|
| 123 |
+
except Exception as e:
|
| 124 |
+
print(f"? Error fetching live data: {e}")
|
| 125 |
+
return False
|
| 126 |
+
|
| 127 |
+
def parse_timestamp(self, timestamp_str):
|
| 128 |
+
"""Convert Unix timestamp to UTC datetime"""
|
| 129 |
+
try:
|
| 130 |
+
timestamp = int(timestamp_str)
|
| 131 |
+
return datetime.fromtimestamp(timestamp, tz=self.utc)
|
| 132 |
+
except (ValueError, TypeError):
|
| 133 |
+
return None
|
| 134 |
+
|
| 135 |
+
def extract_matches_data(self):
|
| 136 |
+
"""Extract matches data from ALL sections of FSTV page"""
|
| 137 |
+
if not self.matches_html:
|
| 138 |
+
print("?? No matches data to process")
|
| 139 |
+
return []
|
| 140 |
+
|
| 141 |
+
print("?? Extracting matches data from all sections...")
|
| 142 |
+
matches = []
|
| 143 |
+
|
| 144 |
+
# Pattern 1: Featured slider matches (slide-item)
|
| 145 |
+
print(" ?? Searching slider matches...")
|
| 146 |
+
slide_pattern = r'<div class="slide-item">(.*?)</div>\s*(?=<div class="slide-item"|$)'
|
| 147 |
+
slide_blocks = re.findall(slide_pattern, self.matches_html, re.DOTALL)
|
| 148 |
+
print(f" Found {len(slide_blocks)} slider matches")
|
| 149 |
+
|
| 150 |
+
for block_html in slide_blocks:
|
| 151 |
+
match_data = self.parse_slide_block(block_html)
|
| 152 |
+
if match_data:
|
| 153 |
+
self.add_match_to_results(match_data, matches)
|
| 154 |
+
|
| 155 |
+
# Pattern 2: Common table rows (FIXED PATTERN)
|
| 156 |
+
print(" ?? Searching table row matches...")
|
| 157 |
+
table_pattern = r'<div[^>]*class="[^"]*common-table-row[^"]*table-row[^"]*"[^>]*onclick="window\.location\.href=\'([^\']+)\';?"[^>]*>(.*?)</div>'
|
| 158 |
+
table_matches = re.findall(table_pattern, self.matches_html, re.DOTALL)
|
| 159 |
+
print(f" Found {len(table_matches)} table row matches")
|
| 160 |
+
|
| 161 |
+
for match_path, block_html in table_matches:
|
| 162 |
+
match_data = self.parse_table_row_block(block_html, match_path)
|
| 163 |
+
if match_data:
|
| 164 |
+
self.add_match_to_results(match_data, matches)
|
| 165 |
+
|
| 166 |
+
# Pattern 3: Direct match links (comprehensive fallback)
|
| 167 |
+
print(" ?? Searching direct match links...")
|
| 168 |
+
link_pattern = r'<a[^>]*href="(/match/[^"]+)"[^>]*>.*?</a>'
|
| 169 |
+
link_matches = re.findall(link_pattern, self.matches_html, re.DOTALL)
|
| 170 |
+
unique_links = list(set(link_matches)) # Remove duplicates
|
| 171 |
+
print(f" Found {len(unique_links)} unique match links")
|
| 172 |
+
|
| 173 |
+
# For direct links, create basic match data
|
| 174 |
+
for match_path in unique_links:
|
| 175 |
+
# Skip if we already have this match
|
| 176 |
+
if any(m.get('match_path') == match_path for m in matches):
|
| 177 |
+
continue
|
| 178 |
+
|
| 179 |
+
# Extract info from URL
|
| 180 |
+
match_data = self.parse_match_path(match_path)
|
| 181 |
+
if match_data:
|
| 182 |
+
self.add_match_to_results(match_data, matches)
|
| 183 |
+
|
| 184 |
+
print(f"? Total processed {len(matches)} matches from all sections")
|
| 185 |
+
return matches
|
| 186 |
+
|
| 187 |
+
def add_match_to_results(self, match_data, matches):
|
| 188 |
+
"""Add match to results with encoded ID and mapping"""
|
| 189 |
+
# Generate encoded ID
|
| 190 |
+
encoded_id = self.ensure_unique_id({})
|
| 191 |
+
match_data['encoded_id'] = encoded_id
|
| 192 |
+
|
| 193 |
+
# Add to mappings
|
| 194 |
+
self.match_mappings[encoded_id] = {
|
| 195 |
+
'fstv_path': match_data['match_path'],
|
| 196 |
+
'type': 'match',
|
| 197 |
+
'name': self.generate_display_name(match_data),
|
| 198 |
+
'league': match_data.get('league', 'Unknown'),
|
| 199 |
+
'status': match_data.get('status', 'Unknown'),
|
| 200 |
+
'timestamp': match_data.get('timestamp').isoformat() if match_data.get('timestamp') else None,
|
| 201 |
+
'created_at': datetime.now(self.utc).isoformat()
|
| 202 |
+
}
|
| 203 |
+
|
| 204 |
+
matches.append(match_data)
|
| 205 |
+
self.stats["matches_found"] += 1
|
| 206 |
+
self.stats["encoded_urls_generated"] += 1
|
| 207 |
+
|
| 208 |
+
def parse_slide_block(self, block_html):
|
| 209 |
+
"""Parse individual slide-item block"""
|
| 210 |
+
try:
|
| 211 |
+
match_data = {}
|
| 212 |
+
|
| 213 |
+
# Extract timestamp
|
| 214 |
+
timestamp_match = re.search(r'data-timestamp="(\d+)"', block_html)
|
| 215 |
+
if timestamp_match:
|
| 216 |
+
match_data['timestamp'] = self.parse_timestamp(timestamp_match.group(1))
|
| 217 |
+
|
| 218 |
+
# Extract league from match-name
|
| 219 |
+
league_match = re.search(r'<span class="match-name">([^<]+)</span>', block_html)
|
| 220 |
+
if league_match:
|
| 221 |
+
match_data['league'] = league_match.group(1).strip()
|
| 222 |
+
|
| 223 |
+
# Extract match URL from btn-club link
|
| 224 |
+
url_match = re.search(r'<a class="btn-club[^"]*" href=([^>]+)>', block_html)
|
| 225 |
+
if url_match:
|
| 226 |
+
match_path = url_match.group(1).strip()
|
| 227 |
+
match_path = match_path.strip('\'"')
|
| 228 |
+
match_data['match_path'] = match_path
|
| 229 |
+
|
| 230 |
+
# Extract teams and scores
|
| 231 |
+
teams_data = self.extract_slide_teams(block_html)
|
| 232 |
+
if teams_data:
|
| 233 |
+
match_data.update(teams_data)
|
| 234 |
+
|
| 235 |
+
# Determine status based on scores and timestamp
|
| 236 |
+
self.determine_match_status(match_data)
|
| 237 |
+
|
| 238 |
+
if not match_data.get('league') or not match_data.get('teams'):
|
| 239 |
+
return None
|
| 240 |
+
|
| 241 |
+
return match_data
|
| 242 |
+
|
| 243 |
+
except Exception as e:
|
| 244 |
+
if self.debug:
|
| 245 |
+
print(f"?? Error parsing slide block: {e}")
|
| 246 |
+
return None
|
| 247 |
+
|
| 248 |
+
def parse_table_row_block(self, block_html, match_path):
|
| 249 |
+
"""Parse table row block"""
|
| 250 |
+
try:
|
| 251 |
+
match_data = {'match_path': match_path}
|
| 252 |
+
|
| 253 |
+
# Extract timestamp if present
|
| 254 |
+
timestamp_match = re.search(r'data-timestamp="(\d+)"', block_html)
|
| 255 |
+
if timestamp_match:
|
| 256 |
+
match_data['timestamp'] = self.parse_timestamp(timestamp_match.group(1))
|
| 257 |
+
|
| 258 |
+
# Extract league info (table format)
|
| 259 |
+
league_match = re.search(r'<a[^>]*class="league-name"[^>]*alt="([^"]*)"[^>]*>([^<]+)</a>', block_html)
|
| 260 |
+
if league_match:
|
| 261 |
+
match_data['league'] = league_match.group(1) or league_match.group(2)
|
| 262 |
+
|
| 263 |
+
# Extract status from title attribute (table format)
|
| 264 |
+
status_match = re.search(r'<span[^>]*title="([^"]*)"[^>]*class="text-overflow">([^<]*)</span>', block_html)
|
| 265 |
+
if status_match:
|
| 266 |
+
title_status = status_match.group(1).strip()
|
| 267 |
+
if "Not Started" in title_status:
|
| 268 |
+
match_data['status'] = 'Upcoming'
|
| 269 |
+
elif "Live" in title_status:
|
| 270 |
+
match_data['status'] = 'Live'
|
| 271 |
+
elif "Finished" in title_status or "Final" in title_status:
|
| 272 |
+
match_data['status'] = 'FT'
|
| 273 |
+
else:
|
| 274 |
+
match_data['status'] = 'Unknown'
|
| 275 |
+
|
| 276 |
+
# Extract teams from table row structure
|
| 277 |
+
teams_data = self.extract_table_teams(block_html)
|
| 278 |
+
if teams_data:
|
| 279 |
+
match_data.update(teams_data)
|
| 280 |
+
else:
|
| 281 |
+
# Fallback to URL parsing
|
| 282 |
+
url_data = self.parse_match_path(match_path)
|
| 283 |
+
if url_data:
|
| 284 |
+
match_data.update(url_data)
|
| 285 |
+
|
| 286 |
+
return match_data if match_data.get('teams') else None
|
| 287 |
+
|
| 288 |
+
except Exception as e:
|
| 289 |
+
if self.debug:
|
| 290 |
+
print(f"?? Error parsing table row: {e}")
|
| 291 |
+
return None
|
| 292 |
+
|
| 293 |
+
def parse_match_path(self, match_path):
|
| 294 |
+
"""Extract info from match URL path"""
|
| 295 |
+
try:
|
| 296 |
+
path_parts = match_path.strip('/').split('/')
|
| 297 |
+
if len(path_parts) < 2:
|
| 298 |
+
return None
|
| 299 |
+
|
| 300 |
+
match_part = path_parts[1]
|
| 301 |
+
parts = match_part.rsplit('-', 1)
|
| 302 |
+
if len(parts) < 2:
|
| 303 |
+
return None
|
| 304 |
+
|
| 305 |
+
name_sport_part = parts[0]
|
| 306 |
+
|
| 307 |
+
# Find sport - safe pattern
|
| 308 |
+
pattern = r'-([a-zA-Z]+)$'
|
| 309 |
+
sport_match = re.search(pattern, name_sport_part)
|
| 310 |
+
sport = sport_match.group(1) if sport_match else 'Sports'
|
| 311 |
+
|
| 312 |
+
# Remove sport to get team names
|
| 313 |
+
remove_pattern = r'-[a-zA-Z]+$'
|
| 314 |
+
team_part = re.sub(remove_pattern, '', name_sport_part)
|
| 315 |
+
|
| 316 |
+
if '-vs-' in team_part:
|
| 317 |
+
team_names = team_part.split('-vs-')
|
| 318 |
+
if len(team_names) == 2:
|
| 319 |
+
home_team = team_names[0].replace('-', ' ').title()
|
| 320 |
+
away_team = team_names[1].replace('-', ' ').title()
|
| 321 |
+
|
| 322 |
+
return {
|
| 323 |
+
'match_path': match_path,
|
| 324 |
+
'league': sport.title(),
|
| 325 |
+
'teams': {
|
| 326 |
+
'home': {'name': home_team},
|
| 327 |
+
'away': {'name': away_team}
|
| 328 |
+
},
|
| 329 |
+
'match_type': 'vs',
|
| 330 |
+
'status': 'Upcoming'
|
| 331 |
+
}
|
| 332 |
+
|
| 333 |
+
return None
|
| 334 |
+
|
| 335 |
+
except Exception as e:
|
| 336 |
+
if self.debug:
|
| 337 |
+
print(f"Error parsing match path: {e}")
|
| 338 |
+
return None
|
| 339 |
+
|
| 340 |
+
def extract_slide_teams(self, block_html):
|
| 341 |
+
"""Extract teams and scores from slide block"""
|
| 342 |
+
teams_data = {}
|
| 343 |
+
|
| 344 |
+
# Pattern to find club containers
|
| 345 |
+
club_pattern = r'<div class="club">(.*?)</div>'
|
| 346 |
+
clubs = re.findall(club_pattern, block_html, re.DOTALL)
|
| 347 |
+
|
| 348 |
+
if len(clubs) >= 2:
|
| 349 |
+
home_team = self.extract_slide_team_info(clubs[0])
|
| 350 |
+
away_team = self.extract_slide_team_info(clubs[1])
|
| 351 |
+
|
| 352 |
+
if home_team and away_team:
|
| 353 |
+
teams_data['teams'] = {'home': home_team, 'away': away_team}
|
| 354 |
+
teams_data['match_type'] = 'vs'
|
| 355 |
+
return teams_data
|
| 356 |
+
|
| 357 |
+
return teams_data
|
| 358 |
+
|
| 359 |
+
def extract_table_teams(self, block_html):
|
| 360 |
+
"""Extract teams from table row structure"""
|
| 361 |
+
teams_data = {}
|
| 362 |
+
|
| 363 |
+
# Look for club-item pattern (different from slider)
|
| 364 |
+
club_pattern = r'<div class="club-item[^"]*">(.*?)</div>'
|
| 365 |
+
clubs = re.findall(club_pattern, block_html, re.DOTALL)
|
| 366 |
+
|
| 367 |
+
if len(clubs) >= 2:
|
| 368 |
+
home_team = self.extract_table_team_info(clubs[0])
|
| 369 |
+
away_team = self.extract_table_team_info(clubs[1])
|
| 370 |
+
|
| 371 |
+
if home_team and away_team:
|
| 372 |
+
teams_data['teams'] = {'home': home_team, 'away': away_team}
|
| 373 |
+
teams_data['match_type'] = 'vs'
|
| 374 |
+
return teams_data
|
| 375 |
+
|
| 376 |
+
return teams_data
|
| 377 |
+
|
| 378 |
+
def extract_slide_team_info(self, club_html):
|
| 379 |
+
"""Extract team info from slide club block"""
|
| 380 |
+
team_info = {}
|
| 381 |
+
|
| 382 |
+
# Team name from club-name span
|
| 383 |
+
name_match = re.search(r'<div class="club-name text-overflow">\s*([^<]+)\s*</div>', club_html)
|
| 384 |
+
if name_match:
|
| 385 |
+
team_info['name'] = name_match.group(1).strip()
|
| 386 |
+
|
| 387 |
+
# Score from score span
|
| 388 |
+
score_match = re.search(r'<span class="score">(\d+)</span>', club_html)
|
| 389 |
+
if score_match:
|
| 390 |
+
team_info['score'] = score_match.group(1)
|
| 391 |
+
|
| 392 |
+
# Fallback: extract team name from img alt attribute
|
| 393 |
+
if not team_info.get('name'):
|
| 394 |
+
alt_match = re.search(r'<img[^>]*alt="([^"]+)"[^>]*>', club_html)
|
| 395 |
+
if alt_match:
|
| 396 |
+
full_name = alt_match.group(1).strip()
|
| 397 |
+
team_info['name'] = full_name[:3].upper()
|
| 398 |
+
|
| 399 |
+
return team_info if team_info.get('name') else None
|
| 400 |
+
|
| 401 |
+
def extract_table_team_info(self, club_html):
|
| 402 |
+
"""Extract team info from table club block"""
|
| 403 |
+
team_info = {}
|
| 404 |
+
|
| 405 |
+
# Team name from club-name div
|
| 406 |
+
name_match = re.search(r'<div class="club-name[^"]*"[^>]*>\s*([^<]+)\s*</div>', club_html)
|
| 407 |
+
if name_match:
|
| 408 |
+
team_info['name'] = name_match.group(1).strip()
|
| 409 |
+
|
| 410 |
+
# Score from b-text-dark span
|
| 411 |
+
score_match = re.search(r'<span class="b-text-dark">(\d+)</span>', club_html)
|
| 412 |
+
if score_match:
|
| 413 |
+
team_info['score'] = score_match.group(1)
|
| 414 |
+
|
| 415 |
+
return team_info if team_info.get('name') else None
|
| 416 |
+
|
| 417 |
+
def determine_match_status(self, match_data):
|
| 418 |
+
"""Determine match status based on available data"""
|
| 419 |
+
if match_data.get('teams'):
|
| 420 |
+
home_score = match_data['teams']['home'].get('score')
|
| 421 |
+
away_score = match_data['teams']['away'].get('score')
|
| 422 |
+
|
| 423 |
+
if home_score and away_score:
|
| 424 |
+
if int(home_score) > 0 or int(away_score) > 0:
|
| 425 |
+
match_data['status'] = 'Live' # Has non-zero scores
|
| 426 |
+
else:
|
| 427 |
+
match_data['status'] = 'Upcoming' # 0-0 likely upcoming
|
| 428 |
+
else:
|
| 429 |
+
match_data['status'] = 'Upcoming' # No scores = upcoming
|
| 430 |
+
else:
|
| 431 |
+
match_data['status'] = 'Upcoming' # Default
|
| 432 |
+
|
| 433 |
+
def generate_display_name(self, match_data):
|
| 434 |
+
"""Generate display name"""
|
| 435 |
+
if match_data.get('match_type') == 'vs' and match_data.get('teams'):
|
| 436 |
+
home = match_data['teams']['home']['name']
|
| 437 |
+
away = match_data['teams']['away']['name']
|
| 438 |
+
return f"{home} vs {away}"
|
| 439 |
+
elif match_data.get('event_name'):
|
| 440 |
+
return match_data['event_name']
|
| 441 |
+
return "Unknown Event"
|
| 442 |
+
|
| 443 |
+
def extract_channels_data(self):
|
| 444 |
+
"""Extract TV channels data and generate mappings"""
|
| 445 |
+
if not self.channels_html:
|
| 446 |
+
print("?? No channels data to process")
|
| 447 |
+
return []
|
| 448 |
+
|
| 449 |
+
print("?? Extracting TV channels data...")
|
| 450 |
+
channels = []
|
| 451 |
+
|
| 452 |
+
# Pattern for channels
|
| 453 |
+
channel_pattern = r'<div class="item-channel"[^>]*?data-id\s*=\s*"([^"]*)"[^>]*?data-link="([^"]*)"[^>]*?data-logo="([^"]*)"[^>]*?title="([^"]*)"[^>]*?>'
|
| 454 |
+
channel_matches = re.findall(channel_pattern, self.channels_html, re.DOTALL)
|
| 455 |
+
|
| 456 |
+
print(f"?? Found {len(channel_matches)} TV channels")
|
| 457 |
+
|
| 458 |
+
for data_id, data_link, data_logo, title in channel_matches:
|
| 459 |
+
channel_data = self.parse_channel_data(data_id, data_link, data_logo, title)
|
| 460 |
+
if channel_data:
|
| 461 |
+
# Generate STABLE encoded ID using base64 of data_id
|
| 462 |
+
encoded_id = base64.urlsafe_b64encode(data_id.encode()).decode().rstrip('=')
|
| 463 |
+
channel_data['encoded_id'] = encoded_id
|
| 464 |
+
|
| 465 |
+
# Add to mappings
|
| 466 |
+
self.channel_mappings[encoded_id] = {
|
| 467 |
+
'fstv_data_id': data_id,
|
| 468 |
+
'original_stream_url': data_link,
|
| 469 |
+
'type': 'channel',
|
| 470 |
+
'name': channel_data['name'],
|
| 471 |
+
'clean_name': channel_data['clean_name'],
|
| 472 |
+
'logo': data_logo,
|
| 473 |
+
'category': channel_data['category'],
|
| 474 |
+
'country': channel_data['country'],
|
| 475 |
+
'created_at': datetime.now(timezone.utc).isoformat()
|
| 476 |
+
}
|
| 477 |
+
|
| 478 |
+
channels.append(channel_data)
|
| 479 |
+
self.stats["channels_found"] += 1
|
| 480 |
+
self.stats["encoded_urls_generated"] += 1
|
| 481 |
+
|
| 482 |
+
print(f"? Processed {len(channels)} channels with stable IDs")
|
| 483 |
+
return channels
|
| 484 |
+
|
| 485 |
+
def parse_channel_data(self, data_id, data_link, data_logo, title):
|
| 486 |
+
"""Parse channel data"""
|
| 487 |
+
try:
|
| 488 |
+
channel_name = unescape(title.strip())
|
| 489 |
+
|
| 490 |
+
if not channel_name or not data_link:
|
| 491 |
+
return None
|
| 492 |
+
|
| 493 |
+
channel_info = self.categorize_channel(channel_name)
|
| 494 |
+
|
| 495 |
+
return {
|
| 496 |
+
'id': data_id,
|
| 497 |
+
'name': channel_name,
|
| 498 |
+
'logo': data_logo,
|
| 499 |
+
'original_stream_url': data_link,
|
| 500 |
+
'category': channel_info['category'],
|
| 501 |
+
'country': channel_info['country'],
|
| 502 |
+
'clean_name': self.clean_channel_name(channel_name)
|
| 503 |
+
}
|
| 504 |
+
|
| 505 |
+
except Exception as e:
|
| 506 |
+
if self.debug:
|
| 507 |
+
print(f"?? Error parsing channel: {e}")
|
| 508 |
+
return None
|
| 509 |
+
|
| 510 |
+
def categorize_channel(self, channel_name):
|
| 511 |
+
"""Categorize channel"""
|
| 512 |
+
name_lower = channel_name.lower()
|
| 513 |
+
|
| 514 |
+
# Country
|
| 515 |
+
if any(x in name_lower for x in ['usa', 'cbs', 'nbc', 'abc', 'fox', 'espn']):
|
| 516 |
+
country = "USA"
|
| 517 |
+
elif any(x in name_lower for x in ['uk', 'itv', 'bbc', 'sky']):
|
| 518 |
+
country = "UK"
|
| 519 |
+
else:
|
| 520 |
+
country = "International"
|
| 521 |
+
|
| 522 |
+
# Category
|
| 523 |
+
if any(x in name_lower for x in ['sport', 'espn', 'fox sports', 'nfl', 'nba']):
|
| 524 |
+
category = "Sports"
|
| 525 |
+
elif any(x in name_lower for x in ['news', 'cnn', 'fox news', 'msnbc']):
|
| 526 |
+
category = "News"
|
| 527 |
+
else:
|
| 528 |
+
category = "Entertainment"
|
| 529 |
+
|
| 530 |
+
return {'category': category, 'country': country}
|
| 531 |
+
|
| 532 |
+
def clean_channel_name(self, name):
|
| 533 |
+
"""Clean channel name"""
|
| 534 |
+
clean = re.sub(r'[^\w\s\-]', '', name)
|
| 535 |
+
clean = re.sub(r'\s+', '-', clean.strip())
|
| 536 |
+
clean = clean.lower()
|
| 537 |
+
clean = re.sub(r'^(ve-|cdn-|3uk-)', '', clean)
|
| 538 |
+
clean = re.sub(r'(-sv\d+|-\(sv\d+\))$', '', clean)
|
| 539 |
+
return clean
|
| 540 |
+
|
| 541 |
+
def generate_files(self, matches, channels):
|
| 542 |
+
"""Generate all M3U and XML files"""
|
| 543 |
+
print("?? Generating playlist and EPG files...")
|
| 544 |
+
|
| 545 |
+
# Ensure directories exist
|
| 546 |
+
os.makedirs(self.mappings_dir, exist_ok=True)
|
| 547 |
+
os.makedirs(self.playlists_dir, exist_ok=True)
|
| 548 |
+
|
| 549 |
+
# Save mappings
|
| 550 |
+
self.save_mappings()
|
| 551 |
+
|
| 552 |
+
# Generate matches M3U and EPG
|
| 553 |
+
if matches:
|
| 554 |
+
self.generate_matches_files(matches)
|
| 555 |
+
|
| 556 |
+
# Generate channels M3U
|
| 557 |
+
if channels:
|
| 558 |
+
self.generate_channels_files(channels)
|
| 559 |
+
|
| 560 |
+
print(f"? Generated {self.stats['files_generated']} files")
|
| 561 |
+
|
| 562 |
+
def save_mappings(self):
|
| 563 |
+
"""Save URL mappings to JSON files"""
|
| 564 |
+
# Save matches mappings
|
| 565 |
+
if self.match_mappings:
|
| 566 |
+
matches_file = os.path.join(self.mappings_dir, "url_mappings_matches.json")
|
| 567 |
+
with open(matches_file, 'w', encoding='utf-8') as f:
|
| 568 |
+
json.dump(self.match_mappings, f, indent=2, ensure_ascii=False)
|
| 569 |
+
print(f"? Saved {len(self.match_mappings)} match mappings")
|
| 570 |
+
self.stats["files_generated"] += 1
|
| 571 |
+
|
| 572 |
+
# Save channels mappings
|
| 573 |
+
if self.channel_mappings:
|
| 574 |
+
channels_file = os.path.join(self.mappings_dir, "url_mappings_channels.json")
|
| 575 |
+
with open(channels_file, 'w', encoding='utf-8') as f:
|
| 576 |
+
json.dump(self.channel_mappings, f, indent=2, ensure_ascii=False)
|
| 577 |
+
print(f"? Saved {len(self.channel_mappings)} channel mappings")
|
| 578 |
+
self.stats["files_generated"] += 1
|
| 579 |
+
|
| 580 |
+
def generate_matches_files(self, matches):
|
| 581 |
+
"""Generate matches M3U and EPG files"""
|
| 582 |
+
# Generate M3U
|
| 583 |
+
m3u_content = self.generate_matches_m3u(matches)
|
| 584 |
+
m3u_file = os.path.join(self.playlists_dir, "fstv_matches_encoded.m3u")
|
| 585 |
+
with open(m3u_file, 'w', encoding='utf-8') as f:
|
| 586 |
+
f.write(m3u_content)
|
| 587 |
+
print(f"? Saved matches M3U: {len(matches)} channels")
|
| 588 |
+
self.stats["files_generated"] += 1
|
| 589 |
+
|
| 590 |
+
# Generate EPG
|
| 591 |
+
epg_xml = self.generate_matches_epg(matches)
|
| 592 |
+
epg_file = os.path.join(self.playlists_dir, "fstv_matches_encoded.xml")
|
| 593 |
+
tree = ET.ElementTree(epg_xml)
|
| 594 |
+
ET.indent(tree, space=" ", level=0)
|
| 595 |
+
tree.write(epg_file, encoding="utf-8", xml_declaration=True)
|
| 596 |
+
print(f"? Saved matches EPG")
|
| 597 |
+
self.stats["files_generated"] += 1
|
| 598 |
+
|
| 599 |
+
def generate_matches_m3u(self, matches):
|
| 600 |
+
"""Generate matches M3U content"""
|
| 601 |
+
m3u_content = f'#EXTM3U url-tvg="{self.proxy_server}/epg/matches.xml"\n'
|
| 602 |
+
channel_number = 3000
|
| 603 |
+
|
| 604 |
+
for match in matches:
|
| 605 |
+
# Generate channel info
|
| 606 |
+
if match.get('match_type') == 'vs' and match.get('teams'):
|
| 607 |
+
home_team = match['teams']['home']['name']
|
| 608 |
+
away_team = match['teams']['away']['name']
|
| 609 |
+
home_score = match['teams']['home'].get('score')
|
| 610 |
+
away_score = match['teams']['away'].get('score')
|
| 611 |
+
|
| 612 |
+
if match.get('status') == 'Live' and home_score and away_score:
|
| 613 |
+
channel_name = f"?? LIVE: {home_team} {home_score} - {away_score} {away_team}"
|
| 614 |
+
elif match.get('status') == 'Live':
|
| 615 |
+
channel_name = f"?? LIVE: {home_team} vs {away_team}"
|
| 616 |
+
elif match.get('status') == 'FT' and home_score and away_score:
|
| 617 |
+
channel_name = f"Final: {home_team} {home_score} - {away_score} {away_team}"
|
| 618 |
+
else:
|
| 619 |
+
channel_name = f"{home_team} vs {away_team}"
|
| 620 |
+
else:
|
| 621 |
+
event_name = match.get('event_name', 'Unknown Event')
|
| 622 |
+
if match.get('status') == 'Live':
|
| 623 |
+
channel_name = f"?? LIVE: {event_name}"
|
| 624 |
+
else:
|
| 625 |
+
channel_name = event_name
|
| 626 |
+
|
| 627 |
+
# Clean and encode
|
| 628 |
+
channel_name_clean = re.sub(r'[^\w\s\-\(\):]', '', channel_name).strip()
|
| 629 |
+
encoded_url = f"{self.proxy_server}/match/{match['encoded_id']}.m3u8"
|
| 630 |
+
tvg_id = f"fstv_{match['encoded_id']}"
|
| 631 |
+
league = match.get('league', 'FSTV Sports')
|
| 632 |
+
group_title = f"FSTV - {league}"
|
| 633 |
+
|
| 634 |
+
m3u_content += f'#EXTINF:-1 tvg-chno="{channel_number}" tvg-id="{tvg_id}" '
|
| 635 |
+
m3u_content += f'tvg-name="{channel_name_clean}" tvg-logo="https://www.pngall.com/wp-content/uploads/1/Sports-PNG-Image.png" '
|
| 636 |
+
m3u_content += f'group-title="{group_title}",{channel_name_clean}\n'
|
| 637 |
+
m3u_content += f"{encoded_url}\n"
|
| 638 |
+
|
| 639 |
+
channel_number += 1
|
| 640 |
+
|
| 641 |
+
return m3u_content
|
| 642 |
+
|
| 643 |
+
def format_time_for_canadian_zones(self, dt):
|
| 644 |
+
"""Format datetime for Canadian time zones"""
|
| 645 |
+
if not dt:
|
| 646 |
+
return "TBD"
|
| 647 |
+
|
| 648 |
+
# Canadian time zones
|
| 649 |
+
pst = pytz.timezone('US/Pacific')
|
| 650 |
+
cst = pytz.timezone('US/Central')
|
| 651 |
+
est = pytz.timezone('US/Eastern')
|
| 652 |
+
|
| 653 |
+
pst_time = dt.astimezone(pst).strftime("%I:%M %p")
|
| 654 |
+
cst_time = dt.astimezone(cst).strftime("%I:%M %p")
|
| 655 |
+
est_time = dt.astimezone(est).strftime("%I:%M %p")
|
| 656 |
+
|
| 657 |
+
return f"{est_time} EST / {cst_time} CST / {pst_time} PST"
|
| 658 |
+
|
| 659 |
+
def generate_matches_epg(self, matches):
|
| 660 |
+
"""Generate matches EPG/XMLTV - Timmys Format"""
|
| 661 |
+
tv = ET.Element("tv")
|
| 662 |
+
tv.set("generator-info-name", "FSTV Combined Scraper")
|
| 663 |
+
tv.set("generator-info-url", "https://fstv.space")
|
| 664 |
+
|
| 665 |
+
# Timmys format: 36 hours starting 2 hours before scrape time
|
| 666 |
+
current_time = datetime.now(self.utc)
|
| 667 |
+
epg_start_time = current_time - timedelta(hours=2)
|
| 668 |
+
epg_end_time = epg_start_time + timedelta(hours=36)
|
| 669 |
+
|
| 670 |
+
for match in matches:
|
| 671 |
+
# Channel definition
|
| 672 |
+
if match.get('match_type') == 'vs' and match.get('teams'):
|
| 673 |
+
home_team = match['teams']['home']['name']
|
| 674 |
+
away_team = match['teams']['away']['name']
|
| 675 |
+
channel_display = f"{home_team} vs {away_team}"
|
| 676 |
+
else:
|
| 677 |
+
channel_display = match.get('event_name', 'Unknown Event')
|
| 678 |
+
|
| 679 |
+
tvg_id = f"fstv_{match['encoded_id']}"
|
| 680 |
+
|
| 681 |
+
channel = ET.SubElement(tv, "channel")
|
| 682 |
+
channel.set("id", tvg_id)
|
| 683 |
+
display_name = ET.SubElement(channel, "display-name")
|
| 684 |
+
display_name.text = re.sub(r'[^\w\s\-\(\):]', '', channel_display).strip()
|
| 685 |
+
|
| 686 |
+
# Programme blocks - Timmys format
|
| 687 |
+
self.generate_match_programmes(tv, tvg_id, match, epg_start_time, epg_end_time)
|
| 688 |
+
|
| 689 |
+
return tv
|
| 690 |
+
|
| 691 |
+
def generate_match_programmes(self, tv_element, channel_id, match, epg_start_time, epg_end_time):
|
| 692 |
+
"""Generate EPG programme blocks for a match - Timmys Format"""
|
| 693 |
+
match_time = match.get('timestamp')
|
| 694 |
+
current_time = datetime.now(self.utc)
|
| 695 |
+
|
| 696 |
+
# Generate team names for title
|
| 697 |
+
if match.get('match_type') == 'vs' and match.get('teams'):
|
| 698 |
+
home_team = match['teams']['home']['name']
|
| 699 |
+
away_team = match['teams']['away']['name']
|
| 700 |
+
event_title = f"{home_team} vs {away_team}"
|
| 701 |
+
else:
|
| 702 |
+
event_title = match.get('event_name', 'Unknown Event')
|
| 703 |
+
|
| 704 |
+
# Simple time-based logic (ignore status, use only timestamp)
|
| 705 |
+
if match_time:
|
| 706 |
+
live_end_time = match_time + timedelta(hours=3)
|
| 707 |
+
|
| 708 |
+
# UPCOMING EVENT blocks (2-hour blocks before start time)
|
| 709 |
+
if current_time < match_time:
|
| 710 |
+
block_start = max(epg_start_time, current_time)
|
| 711 |
+
while block_start < match_time and block_start < epg_end_time:
|
| 712 |
+
block_end = min(block_start + timedelta(hours=2), match_time, epg_end_time)
|
| 713 |
+
|
| 714 |
+
programme = ET.SubElement(tv_element, "programme")
|
| 715 |
+
programme.set("start", block_start.strftime("%Y%m%d%H%M%S +0000"))
|
| 716 |
+
programme.set("stop", block_end.strftime("%Y%m%d%H%M%S +0000"))
|
| 717 |
+
programme.set("channel", channel_id)
|
| 718 |
+
|
| 719 |
+
title = ET.SubElement(programme, "title")
|
| 720 |
+
title.text = f"UPCOMING EVENT: {event_title}"
|
| 721 |
+
|
| 722 |
+
desc = ET.SubElement(programme, "desc")
|
| 723 |
+
time_str = self.format_time_for_canadian_zones(match_time)
|
| 724 |
+
desc.text = f"Event starts at {time_str}. Tune in to watch {event_title} live!"
|
| 725 |
+
|
| 726 |
+
category_elem = ET.SubElement(programme, "category")
|
| 727 |
+
category_elem.text = "Sports"
|
| 728 |
+
|
| 729 |
+
block_start = block_end
|
| 730 |
+
|
| 731 |
+
# EVENT NOW LIVE block (3-hour block during event)
|
| 732 |
+
if current_time >= match_time and current_time < live_end_time:
|
| 733 |
+
live_start = max(match_time, epg_start_time)
|
| 734 |
+
live_stop = min(live_end_time, epg_end_time)
|
| 735 |
+
|
| 736 |
+
if live_start < epg_end_time:
|
| 737 |
+
programme = ET.SubElement(tv_element, "programme")
|
| 738 |
+
programme.set("start", live_start.strftime("%Y%m%d%H%M%S +0000"))
|
| 739 |
+
programme.set("stop", live_stop.strftime("%Y%m%d%H%M%S +0000"))
|
| 740 |
+
programme.set("channel", channel_id)
|
| 741 |
+
|
| 742 |
+
title = ET.SubElement(programme, "title")
|
| 743 |
+
title.text = f"?? EVENT NOW LIVE FOR: {event_title}"
|
| 744 |
+
|
| 745 |
+
desc = ET.SubElement(programme, "desc")
|
| 746 |
+
desc.text = f"Live coverage of {event_title}. Watch all the action as it happens!"
|
| 747 |
+
|
| 748 |
+
category_elem = ET.SubElement(programme, "category")
|
| 749 |
+
category_elem.text = "Sports"
|
| 750 |
+
|
| 751 |
+
# EVENT HAS ENDED block (after live+3 hours)
|
| 752 |
+
if current_time >= live_end_time:
|
| 753 |
+
ended_start = max(live_end_time, epg_start_time)
|
| 754 |
+
ended_stop = epg_end_time
|
| 755 |
+
|
| 756 |
+
if ended_start < epg_end_time:
|
| 757 |
+
programme = ET.SubElement(tv_element, "programme")
|
| 758 |
+
programme.set("start", ended_start.strftime("%Y%m%d%H%M%S +0000"))
|
| 759 |
+
programme.set("stop", ended_stop.strftime("%Y%m%d%H%M%S +0000"))
|
| 760 |
+
programme.set("channel", channel_id)
|
| 761 |
+
|
| 762 |
+
title = ET.SubElement(programme, "title")
|
| 763 |
+
title.text = f"EVENT HAS ENDED: {event_title}"
|
| 764 |
+
|
| 765 |
+
desc = ET.SubElement(programme, "desc")
|
| 766 |
+
desc.text = f"Event has ended: {event_title}"
|
| 767 |
+
|
| 768 |
+
category_elem = ET.SubElement(programme, "category")
|
| 769 |
+
category_elem.text = "Sports"
|
| 770 |
+
|
| 771 |
+
else:
|
| 772 |
+
# No timestamp available - fill with default programming
|
| 773 |
+
programme = ET.SubElement(tv_element, "programme")
|
| 774 |
+
programme.set("start", epg_start_time.strftime("%Y%m%d%H%M%S +0000"))
|
| 775 |
+
programme.set("stop", epg_end_time.strftime("%Y%m%d%H%M%S +0000"))
|
| 776 |
+
programme.set("channel", channel_id)
|
| 777 |
+
|
| 778 |
+
title = ET.SubElement(programme, "title")
|
| 779 |
+
title.text = event_title
|
| 780 |
+
|
| 781 |
+
desc = ET.SubElement(programme, "desc")
|
| 782 |
+
desc.text = f"Sports programming: {event_title}"
|
| 783 |
+
|
| 784 |
+
category_elem = ET.SubElement(programme, "category")
|
| 785 |
+
category_elem.text = "Sports"
|
| 786 |
+
|
| 787 |
+
def generate_match_title(self, match_data):
|
| 788 |
+
"""Generate match title - Timmys Format"""
|
| 789 |
+
match_time = match_data.get('timestamp')
|
| 790 |
+
current_time = datetime.now(self.utc)
|
| 791 |
+
|
| 792 |
+
if match_data.get('match_type') == 'vs' and match_data.get('teams'):
|
| 793 |
+
home_team = match_data['teams']['home']['name']
|
| 794 |
+
away_team = match_data['teams']['away']['name']
|
| 795 |
+
event_title = f"{home_team} vs {away_team}"
|
| 796 |
+
else:
|
| 797 |
+
event_title = match_data.get('event_name', 'Unknown Event')
|
| 798 |
+
|
| 799 |
+
if match_time:
|
| 800 |
+
live_end_time = match_time + timedelta(hours=3)
|
| 801 |
+
|
| 802 |
+
if current_time < match_time:
|
| 803 |
+
return f"UPCOMING EVENT: {event_title}"
|
| 804 |
+
elif current_time >= match_time and current_time < live_end_time:
|
| 805 |
+
return f"?? EVENT NOW LIVE FOR: {event_title}"
|
| 806 |
+
else:
|
| 807 |
+
return f"EVENT HAS ENDED: {event_title}"
|
| 808 |
+
|
| 809 |
+
return event_title
|
| 810 |
+
|
| 811 |
+
def generate_match_description(self, match_data):
|
| 812 |
+
"""Generate match description - Timmys Format"""
|
| 813 |
+
match_time = match_data.get('timestamp')
|
| 814 |
+
current_time = datetime.now(self.utc)
|
| 815 |
+
league = match_data.get('league', 'Unknown League')
|
| 816 |
+
|
| 817 |
+
if match_data.get('match_type') == 'vs' and match_data.get('teams'):
|
| 818 |
+
home_team = match_data['teams']['home']['name']
|
| 819 |
+
away_team = match_data['teams']['away']['name']
|
| 820 |
+
event_title = f"{home_team} vs {away_team}"
|
| 821 |
+
else:
|
| 822 |
+
event_title = match_data.get('event_name', 'Unknown Event')
|
| 823 |
+
|
| 824 |
+
if match_time:
|
| 825 |
+
live_end_time = match_time + timedelta(hours=3)
|
| 826 |
+
time_str = self.format_time_for_canadian_zones(match_time)
|
| 827 |
+
|
| 828 |
+
if current_time < match_time:
|
| 829 |
+
return f"Event starts at {time_str}. Tune in to watch {event_title} live in {league}!"
|
| 830 |
+
elif current_time >= match_time and current_time < live_end_time:
|
| 831 |
+
return f"Live coverage of {event_title} in {league}. Watch all the action as it happens!"
|
| 832 |
+
else:
|
| 833 |
+
return f"Event has ended: {event_title} in {league}."
|
| 834 |
+
|
| 835 |
+
return f"Sports programming: {event_title} in {league}."
|
| 836 |
+
|
| 837 |
+
def generate_channels_files(self, channels):
|
| 838 |
+
"""Generate channels M3U file"""
|
| 839 |
+
# Generate M3U
|
| 840 |
+
m3u_content = self.generate_channels_m3u(channels)
|
| 841 |
+
m3u_file = os.path.join(self.playlists_dir, "fstv_tv_channels_encoded.m3u")
|
| 842 |
+
with open(m3u_file, 'w', encoding='utf-8') as f:
|
| 843 |
+
f.write(m3u_content)
|
| 844 |
+
print(f"? Saved channels M3U: {len(channels)} channels")
|
| 845 |
+
self.stats["files_generated"] += 1
|
| 846 |
+
|
| 847 |
+
def generate_channels_m3u(self, channels):
|
| 848 |
+
"""Generate channels M3U content"""
|
| 849 |
+
# Sort channels
|
| 850 |
+
sorted_channels = sorted(channels, key=lambda x: (x['country'], x['category'], x['name']))
|
| 851 |
+
|
| 852 |
+
m3u_content = '#EXTM3U url-tvg=""\n'
|
| 853 |
+
channel_number = 1000
|
| 854 |
+
|
| 855 |
+
for channel in sorted_channels:
|
| 856 |
+
display_name = self.clean_display_name(channel['name'])
|
| 857 |
+
encoded_url = f"{self.proxy_server}/channel/{channel['encoded_id']}.m3u8"
|
| 858 |
+
tvg_id = f"fstv_tv_{channel['encoded_id']}"
|
| 859 |
+
group_title = f"FSTV TV - {channel['country']} {channel['category']}"
|
| 860 |
+
|
| 861 |
+
m3u_content += f'#EXTINF:-1 tvg-chno="{channel_number}" tvg-id="{tvg_id}" '
|
| 862 |
+
m3u_content += f'tvg-name="{display_name}" tvg-logo="{channel["logo"]}" '
|
| 863 |
+
m3u_content += f'group-title="{group_title}",{display_name}\n'
|
| 864 |
+
m3u_content += f"{encoded_url}\n"
|
| 865 |
+
|
| 866 |
+
channel_number += 1
|
| 867 |
+
|
| 868 |
+
return m3u_content
|
| 869 |
+
|
| 870 |
+
def clean_display_name(self, name):
|
| 871 |
+
"""Clean channel name for display"""
|
| 872 |
+
display = re.sub(r'^(VE-|CDN-|3uk-)', '', name, flags=re.IGNORECASE)
|
| 873 |
+
display = re.sub(r'\s*\(sv\d+\)\s*$', '', display, flags=re.IGNORECASE)
|
| 874 |
+
display = re.sub(r'\s*-\s*sv\d+\s*$', '', display, flags=re.IGNORECASE)
|
| 875 |
+
display = display.strip()
|
| 876 |
+
|
| 877 |
+
# Special cases
|
| 878 |
+
replacements = {
|
| 879 |
+
'usanetwork': 'USA Network',
|
| 880 |
+
'cbssport': 'CBS Sports Network',
|
| 881 |
+
'cbs los angeles': 'CBS Los Angeles',
|
| 882 |
+
'itv1': 'ITV1', 'itv2': 'ITV2', 'itv3': 'ITV3', 'itv4': 'ITV4',
|
| 883 |
+
'lfctv': 'Liverpool FC TV'
|
| 884 |
+
}
|
| 885 |
+
|
| 886 |
+
display_lower = display.lower()
|
| 887 |
+
for key, value in replacements.items():
|
| 888 |
+
if key in display_lower:
|
| 889 |
+
display = value
|
| 890 |
+
break
|
| 891 |
+
|
| 892 |
+
return display
|
| 893 |
+
|
| 894 |
+
def print_stats(self):
|
| 895 |
+
"""Print final statistics"""
|
| 896 |
+
print(f"\n?? SCRAPING COMPLETE:")
|
| 897 |
+
print(f" ?? Matches found: {self.stats['matches_found']}")
|
| 898 |
+
print(f" ?? Channels found: {self.stats['channels_found']}")
|
| 899 |
+
print(f" ?? Encoded URLs generated: {self.stats['encoded_urls_generated']}")
|
| 900 |
+
print(f" ?? Files generated: {self.stats['files_generated']}")
|
| 901 |
+
print(f" ?? HTTP requests made: {self.stats['http_requests']}")
|
| 902 |
+
|
| 903 |
+
async def run(self):
|
| 904 |
+
"""Main scraper execution"""
|
| 905 |
+
print("?? FSTV Combined Scraper Starting")
|
| 906 |
+
print("? Scheduled for 12:05 AM daily execution")
|
| 907 |
+
print("=" * 50)
|
| 908 |
+
|
| 909 |
+
try:
|
| 910 |
+
# Initialize HTTP client
|
| 911 |
+
await self.init_http_client()
|
| 912 |
+
|
| 913 |
+
# Fetch LIVE data from FSTV
|
| 914 |
+
if not await self.fetch_live_data():
|
| 915 |
+
print("? Failed to fetch live data from FSTV. Exiting.")
|
| 916 |
+
return False
|
| 917 |
+
|
| 918 |
+
# Extract data
|
| 919 |
+
matches = self.extract_matches_data()
|
| 920 |
+
channels = self.extract_channels_data()
|
| 921 |
+
|
| 922 |
+
if not matches and not channels:
|
| 923 |
+
print("?? No data extracted. Exiting.")
|
| 924 |
+
return False
|
| 925 |
+
|
| 926 |
+
# Generate files
|
| 927 |
+
self.generate_files(matches, channels)
|
| 928 |
+
|
| 929 |
+
# Print stats
|
| 930 |
+
self.print_stats()
|
| 931 |
+
|
| 932 |
+
print("\n?? Scraping completed successfully!")
|
| 933 |
+
return True
|
| 934 |
+
|
| 935 |
+
except Exception as e:
|
| 936 |
+
print(f"? Scraper error: {e}")
|
| 937 |
+
return False
|
| 938 |
+
finally:
|
| 939 |
+
# Clean up HTTP client
|
| 940 |
+
await self.close_http_client()
|
| 941 |
+
|
| 942 |
+
async def main():
|
| 943 |
+
"""Entry point"""
|
| 944 |
+
scraper = CombinedFSTVScraper(debug=True)
|
| 945 |
+
success = await scraper.run()
|
| 946 |
+
exit(0 if success else 1)
|
| 947 |
+
|
| 948 |
+
if __name__ == "__main__":
|
| 949 |
+
asyncio.run(main())
|