Update app.py
Browse files
app.py
CHANGED
@@ -13,8 +13,15 @@ from threading import Lock
|
|
13 |
import logging
|
14 |
import sys
|
15 |
from functools import partial
|
|
|
16 |
|
17 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
logging.basicConfig(
|
19 |
level=logging.INFO,
|
20 |
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
@@ -27,6 +34,15 @@ logger = logging.getLogger('website_requester')
|
|
27 |
|
28 |
app = Flask(__name__)
|
29 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
# Variables globales pour stocker l'état, utilisation d'un gestionnaire multiprocessing
|
31 |
manager = multiprocessing.Manager()
|
32 |
requests_in_progress = manager.Value('b', False)
|
@@ -37,15 +53,15 @@ requests_lock = manager.Lock() # Pour sécuriser l'accès aux données partagé
|
|
37 |
process_pool = None
|
38 |
background_tasks = []
|
39 |
|
40 |
-
#
|
41 |
-
MAX_CONNECTIONS = 1000
|
42 |
-
SAVE_FREQUENCY = 100
|
43 |
-
CHUNK_SIZE = 5000
|
44 |
-
MAX_RETRIES = 5
|
45 |
-
RETRY_DELAY = 0.5
|
46 |
REQUESTS_FILE = "requetes_gabaohub.json"
|
47 |
-
IP_ROTATION_COUNT = 50
|
48 |
-
REQUEST_TIMEOUT = 15
|
49 |
|
50 |
def generate_gabonese_ip():
|
51 |
"""Génère une adresse IP du Gabon (plage 41.158.0.0/16)"""
|
@@ -100,13 +116,12 @@ async def with_retries(func, *args, max_retries=MAX_RETRIES, **kwargs):
|
|
100 |
|
101 |
async def create_session(request_counter):
|
102 |
"""Crée une session HTTP optimisée avec rotation d'IP"""
|
103 |
-
# Configurer la session avec un meilleur contrôle des connexions
|
104 |
connector = aiohttp.TCPConnector(
|
105 |
-
limit=MAX_CONNECTIONS,
|
106 |
-
ssl=False,
|
107 |
-
force_close=False,
|
108 |
-
use_dns_cache=True,
|
109 |
-
ttl_dns_cache=300
|
110 |
)
|
111 |
|
112 |
timeout = aiohttp.ClientTimeout(
|
@@ -131,13 +146,12 @@ async def create_session(request_counter):
|
|
131 |
}
|
132 |
)
|
133 |
|
134 |
-
# Middleware pour
|
135 |
original_request = session._request
|
136 |
|
137 |
async def request_middleware(method, url, **kwargs):
|
138 |
nonlocal request_counter
|
139 |
|
140 |
-
# Rotation des IPs, des User-Agents et des référents après un certain nombre de requêtes
|
141 |
if request_counter.value % IP_ROTATION_COUNT == 0:
|
142 |
kwargs.setdefault('headers', {}).update({
|
143 |
"X-Forwarded-For": generate_gabonese_ip(),
|
@@ -156,14 +170,10 @@ async def make_homepage_request(session, request_index, request_counter):
|
|
156 |
global requests_being_made
|
157 |
|
158 |
try:
|
159 |
-
# URL de la page d'accueil
|
160 |
url = "https://gabaohub.alwaysdata.net"
|
161 |
-
|
162 |
-
# Paramètres aléatoires pour simuler des utilisateurs réels
|
163 |
params = {}
|
164 |
|
165 |
-
|
166 |
-
if random.random() < 0.3: # 30% de chance
|
167 |
utm_sources = ["facebook", "twitter", "instagram", "direct", "google", "bing"]
|
168 |
utm_mediums = ["social", "cpc", "email", "referral", "organic"]
|
169 |
utm_campaigns = ["spring_promo", "launch", "awareness", "brand", "product"]
|
@@ -172,7 +182,6 @@ async def make_homepage_request(session, request_index, request_counter):
|
|
172 |
params["utm_medium"] = random.choice(utm_mediums)
|
173 |
params["utm_campaign"] = random.choice(utm_campaigns)
|
174 |
|
175 |
-
# Mettre à jour les informations de la requête dans la liste
|
176 |
with requests_lock:
|
177 |
if request_index < len(requests_being_made):
|
178 |
requests_being_made[request_index]["url"] = url
|
@@ -180,16 +189,13 @@ async def make_homepage_request(session, request_index, request_counter):
|
|
180 |
requests_being_made[request_index]["status"] = "in_progress"
|
181 |
requests_being_made[request_index]["start_time"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
182 |
|
183 |
-
# Effectuer la requête avec retries
|
184 |
try:
|
185 |
start_time = time.time()
|
186 |
async with session.get(url, params=params, allow_redirects=True) as response:
|
187 |
-
# Lire le contenu pour simuler le chargement complet de la page
|
188 |
content = await response.read()
|
189 |
end_time = time.time()
|
190 |
response_time = end_time - start_time
|
191 |
|
192 |
-
# Mettre à jour le statut
|
193 |
with requests_lock:
|
194 |
if request_index < len(requests_being_made):
|
195 |
requests_being_made[request_index]["status"] = "success"
|
@@ -217,13 +223,9 @@ async def make_homepage_request(session, request_index, request_counter):
|
|
217 |
async def process_request_chunk(start_index, chunk_size, process_id=0):
|
218 |
"""Traite un groupe de requêtes dans un processus séparé"""
|
219 |
logger.info(f"Processus {process_id}: Démarrage du traitement pour les indices {start_index} à {start_index+chunk_size-1}")
|
220 |
-
|
221 |
-
# Créer un compteur de requêtes partagé
|
222 |
request_counter = multiprocessing.Value('i', 0)
|
223 |
|
224 |
-
# Créer une nouvelle session HTTP pour ce processus
|
225 |
async with await create_session(request_counter) as session:
|
226 |
-
# Créer un sémaphore pour limiter les connexions simultanées
|
227 |
semaphore = asyncio.Semaphore(MAX_CONNECTIONS // multiprocessing.cpu_count())
|
228 |
|
229 |
async def process_request(i):
|
@@ -233,15 +235,10 @@ async def process_request_chunk(start_index, chunk_size, process_id=0):
|
|
233 |
|
234 |
async with semaphore:
|
235 |
await make_homepage_request(session, request_index, request_counter)
|
236 |
-
|
237 |
-
# Pause aléatoire pour éviter les modèles de trafic détectables
|
238 |
-
await asyncio.sleep(random.uniform(0.2, 2.0)) # Temps d'attente plus naturel entre les requêtes
|
239 |
-
|
240 |
-
# Sauvegarde périodique
|
241 |
if progress_counter.value % SAVE_FREQUENCY == 0:
|
242 |
save_results_to_file()
|
243 |
|
244 |
-
# Créer et exécuter toutes les tâches pour ce chunk
|
245 |
tasks = [process_request(i) for i in range(min(chunk_size, total_requests.value - start_index))]
|
246 |
await asyncio.gather(*tasks)
|
247 |
|
@@ -251,17 +248,11 @@ def save_results_to_file():
|
|
251 |
"""Sauvegarde l'état actuel dans un fichier JSON de manière thread-safe"""
|
252 |
with requests_lock:
|
253 |
try:
|
254 |
-
# Créer une copie des données actuelles
|
255 |
data_to_save = list(requests_being_made)
|
256 |
-
|
257 |
-
# Utiliser un fichier temporaire pour éviter la corruption
|
258 |
temp_file = f"{REQUESTS_FILE}.tmp"
|
259 |
with open(temp_file, "w", encoding="utf-8") as f:
|
260 |
-
json.dump(data_to_save, f, indent=2, ensure_ascii=False)
|
261 |
-
|
262 |
-
# Remplacer le fichier original par le fichier temporaire
|
263 |
os.replace(temp_file, REQUESTS_FILE)
|
264 |
-
|
265 |
logger.info(f"Sauvegarde effectuée: {progress_counter.value}/{total_requests.value} requêtes")
|
266 |
except Exception as e:
|
267 |
logger.error(f"Erreur lors de la sauvegarde: {str(e)}")
|
@@ -269,11 +260,8 @@ def save_results_to_file():
|
|
269 |
def run_request_process(start_index, chunk_size, process_id):
|
270 |
"""Fonction exécutée dans chaque processus pour effectuer des requêtes"""
|
271 |
try:
|
272 |
-
# Configurer la nouvelle boucle asyncio pour ce processus
|
273 |
loop = asyncio.new_event_loop()
|
274 |
asyncio.set_event_loop(loop)
|
275 |
-
|
276 |
-
# Exécuter le traitement du chunk
|
277 |
loop.run_until_complete(process_request_chunk(start_index, chunk_size, process_id))
|
278 |
loop.close()
|
279 |
except Exception as e:
|
@@ -283,19 +271,15 @@ def start_request_process(num_requests, concurrency):
|
|
283 |
"""Démarre le processus d'envoi de requêtes avec multiprocessing"""
|
284 |
global requests_in_progress, total_requests, progress_counter, requests_being_made, process_pool, background_tasks
|
285 |
|
286 |
-
# Réinitialiser les compteurs
|
287 |
with requests_lock:
|
288 |
progress_counter.value = 0
|
289 |
total_requests.value = num_requests
|
290 |
requests_being_made[:] = [{"status": "pending"} for _ in range(num_requests)]
|
291 |
|
292 |
-
# Déterminer le nombre optimal de processus
|
293 |
num_cpus = multiprocessing.cpu_count()
|
294 |
num_processes = min(num_cpus, (num_requests + CHUNK_SIZE - 1) // CHUNK_SIZE)
|
295 |
|
296 |
logger.info(f"Démarrage de l'envoi de {num_requests} requêtes avec {num_processes} processus et concurrence de {concurrency}")
|
297 |
-
|
298 |
-
# Diviser le travail en chunks
|
299 |
process_pool = ProcessPoolExecutor(max_workers=num_processes)
|
300 |
background_tasks = []
|
301 |
|
@@ -310,25 +294,17 @@ def start_request_process(num_requests, concurrency):
|
|
310 |
)
|
311 |
background_tasks.append(task)
|
312 |
|
313 |
-
# Démarrer un thread de surveillance pour les tâches en arrière-plan
|
314 |
monitor_thread = ThreadPoolExecutor(max_workers=1)
|
315 |
monitor_thread.submit(monitor_background_tasks)
|
316 |
|
317 |
def monitor_background_tasks():
|
318 |
"""Surveille les tâches en arrière-plan et marque le processus comme terminé lorsque tout est fait"""
|
319 |
global requests_in_progress, background_tasks
|
320 |
-
|
321 |
try:
|
322 |
-
# Attendre que toutes les tâches soient terminées
|
323 |
for task in background_tasks:
|
324 |
-
task.result()
|
325 |
-
|
326 |
logger.info(f"Toutes les tâches d'envoi de requêtes sont terminées. {progress_counter.value}/{total_requests.value} requêtes traitées.")
|
327 |
-
|
328 |
-
# Sauvegarde finale
|
329 |
save_results_to_file()
|
330 |
-
|
331 |
-
# Marquer le processus comme terminé
|
332 |
requests_in_progress.value = False
|
333 |
except Exception as e:
|
334 |
logger.error(f"Erreur lors de la surveillance des tâches: {str(e)}")
|
@@ -337,7 +313,6 @@ def monitor_background_tasks():
|
|
337 |
@app.route('/')
|
338 |
def index():
|
339 |
"""Page d'accueil"""
|
340 |
-
# Charger les requêtes existantes si elles existent
|
341 |
requests_data = []
|
342 |
if os.path.exists(REQUESTS_FILE):
|
343 |
try:
|
@@ -348,7 +323,7 @@ def index():
|
|
348 |
|
349 |
return render_template('index.html',
|
350 |
requests_in_progress=requests_in_progress.value,
|
351 |
-
requests=requests_data[:1000],
|
352 |
progress=progress_counter.value,
|
353 |
total=total_requests.value)
|
354 |
|
@@ -356,16 +331,10 @@ def index():
|
|
356 |
def start_requests():
|
357 |
"""Démarrer l'envoi de requêtes"""
|
358 |
if not requests_in_progress.value:
|
359 |
-
# Récupérer les paramètres
|
360 |
num_requests = int(request.form.get('num_requests', 1000))
|
361 |
concurrency = int(request.form.get('concurrency', MAX_CONNECTIONS))
|
362 |
-
|
363 |
-
# Marquer comme en cours
|
364 |
requests_in_progress.value = True
|
365 |
-
|
366 |
-
# Démarrer l'envoi de requêtes
|
367 |
start_request_process(num_requests, concurrency)
|
368 |
-
|
369 |
logger.info(f"Envoi de {num_requests} requêtes lancé avec concurrence {concurrency}")
|
370 |
else:
|
371 |
logger.warning("Un processus d'envoi de requêtes est déjà en cours")
|
@@ -375,7 +344,6 @@ def start_requests():
|
|
375 |
@app.route('/progress')
|
376 |
def get_progress():
|
377 |
"""Endpoint API pour obtenir la progression complète"""
|
378 |
-
# Charger les dernières données
|
379 |
requests_data = []
|
380 |
if os.path.exists(REQUESTS_FILE):
|
381 |
try:
|
@@ -388,7 +356,7 @@ def get_progress():
|
|
388 |
'requests_in_progress': requests_in_progress.value,
|
389 |
'progress': progress_counter.value,
|
390 |
'total': total_requests.value,
|
391 |
-
'requests': requests_data[:200]
|
392 |
})
|
393 |
|
394 |
@app.route('/status')
|
@@ -409,16 +377,12 @@ def reset():
|
|
409 |
global requests_in_progress, requests_being_made, progress_counter, total_requests, process_pool, background_tasks
|
410 |
|
411 |
if not requests_in_progress.value:
|
412 |
-
# Réinitialiser les compteurs et les données
|
413 |
with requests_lock:
|
414 |
requests_being_made[:] = []
|
415 |
progress_counter.value = 0
|
416 |
total_requests.value = 0
|
417 |
-
|
418 |
-
# Supprimer le fichier JSON s'il existe
|
419 |
if os.path.exists(REQUESTS_FILE):
|
420 |
os.remove(REQUESTS_FILE)
|
421 |
-
|
422 |
logger.info("Réinitialisation effectuée")
|
423 |
else:
|
424 |
logger.warning("Impossible de réinitialiser pendant un processus en cours")
|
@@ -429,15 +393,11 @@ def reset():
|
|
429 |
def stop_requests():
|
430 |
"""Arrête le processus d'envoi de requêtes en cours"""
|
431 |
global requests_in_progress, process_pool
|
432 |
-
|
433 |
if requests_in_progress.value and process_pool:
|
434 |
logger.info("Arrêt des processus d'envoi de requêtes...")
|
435 |
process_pool.shutdown(wait=False)
|
436 |
requests_in_progress.value = False
|
437 |
-
|
438 |
-
# Sauvegarde l'état actuel
|
439 |
save_results_to_file()
|
440 |
-
|
441 |
logger.info("Processus d'envoi de requêtes arrêté")
|
442 |
|
443 |
return redirect(url_for('index'))
|
@@ -453,23 +413,19 @@ def get_stats():
|
|
453 |
except json.JSONDecodeError:
|
454 |
requests_data = []
|
455 |
|
456 |
-
# Calculer les statistiques de base
|
457 |
successful_requests = [req for req in requests_data if req.get('status') == 'success']
|
458 |
failed_requests = [req for req in requests_data if req.get('status') == 'failed']
|
459 |
|
460 |
-
# Temps de réponse moyen pour les requêtes réussies
|
461 |
avg_response_time = 0
|
462 |
if successful_requests:
|
463 |
avg_response_time = sum(req.get('response_time', 0) for req in successful_requests) / len(successful_requests)
|
464 |
|
465 |
-
# Codes de statut des réponses
|
466 |
status_codes = {}
|
467 |
for req in successful_requests:
|
468 |
code = req.get('status_code')
|
469 |
if code:
|
470 |
status_codes[code] = status_codes.get(code, 0) + 1
|
471 |
|
472 |
-
# Types d'erreurs pour les requêtes échouées
|
473 |
error_types = {}
|
474 |
for req in failed_requests:
|
475 |
error = req.get('error', 'Unknown')
|
@@ -617,8 +573,7 @@ def get_template():
|
|
617 |
<th>Temps (s)</th>
|
618 |
<th>Heure</th>
|
619 |
</tr>
|
620 |
-
|
621 |
-
</thead>
|
622 |
<tbody id="requests-table">
|
623 |
<tr>
|
624 |
<td colspan="6" class="text-center">Chargement des données...</td>
|
@@ -632,17 +587,14 @@ def get_template():
|
|
632 |
|
633 |
<script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/js/bootstrap.bundle.min.js"></script>
|
634 |
<script>
|
635 |
-
// Variables globales
|
636 |
let isPolling = false;
|
637 |
let pollingInterval;
|
638 |
|
639 |
-
// Démarrer la mise à jour automatique
|
640 |
window.onload = function() {
|
641 |
updateProgress();
|
642 |
updateRequests();
|
643 |
updateStats();
|
644 |
|
645 |
-
// Mettre en place une mise à jour périodique
|
646 |
pollingInterval = setInterval(() => {
|
647 |
if (isPolling) {
|
648 |
updateProgress();
|
@@ -651,33 +603,24 @@ def get_template():
|
|
651 |
}, 5000);
|
652 |
};
|
653 |
|
654 |
-
// Mettre à jour les informations de progression
|
655 |
function updateProgress() {
|
656 |
fetch('/status')
|
657 |
.then(response => response.json())
|
658 |
.then(data => {
|
659 |
-
// Mettre à jour la barre de progression
|
660 |
const percentage = data.total > 0 ? (data.progress / data.total * 100) : 0;
|
661 |
document.getElementById('progress-bar').style.width = percentage + '%';
|
662 |
document.getElementById('progress-bar').setAttribute('aria-valuenow', percentage);
|
663 |
|
664 |
-
// Mettre à jour le texte de progression
|
665 |
document.getElementById('progress-text').textContent =
|
666 |
`${data.progress} / ${data.total} requêtes traitées (${percentage.toFixed(1)}%)`;
|
667 |
-
|
668 |
-
// Mettre à jour l'état
|
669 |
document.getElementById('status-text').textContent =
|
670 |
`État: ${data.requests_in_progress ? 'En cours' : 'Inactif'}`;
|
671 |
|
672 |
-
// Mettre à jour les compteurs
|
673 |
document.getElementById('success-count').textContent = data.success_count;
|
674 |
document.getElementById('failed-count').textContent = data.failed_count;
|
675 |
document.getElementById('pending-count').textContent = data.pending_count;
|
676 |
|
677 |
-
// Activer/désactiver le polling automatique
|
678 |
isPolling = data.requests_in_progress;
|
679 |
-
|
680 |
-
// Mettre à jour l'état des boutons
|
681 |
document.getElementById('start-button').disabled = data.requests_in_progress;
|
682 |
document.getElementById('stop-button').disabled = !data.requests_in_progress;
|
683 |
document.getElementById('reset-button').disabled = data.requests_in_progress;
|
@@ -687,30 +630,22 @@ def get_template():
|
|
687 |
});
|
688 |
}
|
689 |
|
690 |
-
// Mettre à jour la liste des requêtes
|
691 |
function updateRequests() {
|
692 |
fetch('/progress')
|
693 |
.then(response => response.json())
|
694 |
.then(data => {
|
695 |
const tableBody = document.getElementById('requests-table');
|
696 |
-
|
697 |
-
// Effacer le contenu actuel
|
698 |
tableBody.innerHTML = '';
|
699 |
|
700 |
-
// Si aucune requête, afficher un message
|
701 |
if (data.requests.length === 0) {
|
702 |
tableBody.innerHTML = '<tr><td colspan="6" class="text-center">Aucune requête effectuée.</td></tr>';
|
703 |
return;
|
704 |
}
|
705 |
|
706 |
-
// Ajouter chaque requête au tableau
|
707 |
data.requests.forEach((req, index) => {
|
708 |
const row = document.createElement('tr');
|
709 |
-
|
710 |
-
// Déterminer la classe de statut
|
711 |
const statusClass = `status-${req.status || 'pending'}`;
|
712 |
|
713 |
-
// Créer les cellules
|
714 |
row.innerHTML = `
|
715 |
<td>${index + 1}</td>
|
716 |
<td>${req.url || 'N/A'}</td>
|
@@ -728,14 +663,12 @@ def get_template():
|
|
728 |
});
|
729 |
}
|
730 |
|
731 |
-
// Mettre à jour les statistiques
|
732 |
function updateStats() {
|
733 |
fetch('/stats')
|
734 |
.then(response => response.json())
|
735 |
.then(data => {
|
736 |
const statsContainer = document.getElementById('stats-container');
|
737 |
|
738 |
-
// Formater les codes de statut
|
739 |
let statusCodeHtml = '<div class="mt-3"><h5>Codes de statut</h5>';
|
740 |
if (Object.keys(data.status_codes).length > 0) {
|
741 |
statusCodeHtml += '<div class="row">';
|
@@ -760,7 +693,6 @@ def get_template():
|
|
760 |
}
|
761 |
statusCodeHtml += '</div>';
|
762 |
|
763 |
-
// Formater les types d'erreurs
|
764 |
let errorTypesHtml = '<div class="mt-3"><h5>Types d\'erreurs</h5>';
|
765 |
if (Object.keys(data.error_types).length > 0) {
|
766 |
errorTypesHtml += '<ul class="list-group">';
|
@@ -778,7 +710,6 @@ def get_template():
|
|
778 |
}
|
779 |
errorTypesHtml += '</div>';
|
780 |
|
781 |
-
// Mettre à jour le contenu
|
782 |
statsContainer.innerHTML = `
|
783 |
<div class="row">
|
784 |
<div class="col-md-6">
|
@@ -822,4 +753,4 @@ def get_template():
|
|
822 |
"""
|
823 |
|
824 |
if __name__ == '__main__':
|
825 |
-
app.run(debug=True)
|
|
|
13 |
import logging
|
14 |
import sys
|
15 |
from functools import partial
|
16 |
+
from jinja2 import Undefined # Pour gérer les valeurs Undefined
|
17 |
|
18 |
+
# Définition d'une fonction pour convertir les objets Undefined en chaîne vide
|
19 |
+
def default_json(o):
|
20 |
+
if isinstance(o, Undefined):
|
21 |
+
return ''
|
22 |
+
raise TypeError(f"Object of type {type(o).__name__} is not JSON serializable")
|
23 |
+
|
24 |
+
# Configuration du logger
|
25 |
logging.basicConfig(
|
26 |
level=logging.INFO,
|
27 |
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
|
|
34 |
|
35 |
app = Flask(__name__)
|
36 |
|
37 |
+
# Définir un encodeur JSON personnalisé pour Flask
|
38 |
+
from flask.json import JSONEncoder
|
39 |
+
class CustomJSONEncoder(JSONEncoder):
|
40 |
+
def default(self, obj):
|
41 |
+
if isinstance(obj, Undefined):
|
42 |
+
return ''
|
43 |
+
return super().default(obj)
|
44 |
+
app.json_encoder = CustomJSONEncoder
|
45 |
+
|
46 |
# Variables globales pour stocker l'état, utilisation d'un gestionnaire multiprocessing
|
47 |
manager = multiprocessing.Manager()
|
48 |
requests_in_progress = manager.Value('b', False)
|
|
|
53 |
process_pool = None
|
54 |
background_tasks = []
|
55 |
|
56 |
+
# Paramètres avancés
|
57 |
+
MAX_CONNECTIONS = 1000 # Nombre maximal de connexions simultanées
|
58 |
+
SAVE_FREQUENCY = 100 # Fréquence de sauvegarde des résultats (tous les X requêtes)
|
59 |
+
CHUNK_SIZE = 5000 # Nombre de requêtes à traiter par processus
|
60 |
+
MAX_RETRIES = 5 # Nombre maximum de tentatives pour chaque opération
|
61 |
+
RETRY_DELAY = 0.5 # Délai entre les tentatives en secondes
|
62 |
REQUESTS_FILE = "requetes_gabaohub.json"
|
63 |
+
IP_ROTATION_COUNT = 50 # Rotation des IP après ce nombre de requêtes
|
64 |
+
REQUEST_TIMEOUT = 15 # Timeout des requêtes en secondes
|
65 |
|
66 |
def generate_gabonese_ip():
|
67 |
"""Génère une adresse IP du Gabon (plage 41.158.0.0/16)"""
|
|
|
116 |
|
117 |
async def create_session(request_counter):
|
118 |
"""Crée une session HTTP optimisée avec rotation d'IP"""
|
|
|
119 |
connector = aiohttp.TCPConnector(
|
120 |
+
limit=MAX_CONNECTIONS,
|
121 |
+
ssl=False,
|
122 |
+
force_close=False,
|
123 |
+
use_dns_cache=True,
|
124 |
+
ttl_dns_cache=300
|
125 |
)
|
126 |
|
127 |
timeout = aiohttp.ClientTimeout(
|
|
|
146 |
}
|
147 |
)
|
148 |
|
149 |
+
# Middleware pour la rotation des IP, User-Agent et référents
|
150 |
original_request = session._request
|
151 |
|
152 |
async def request_middleware(method, url, **kwargs):
|
153 |
nonlocal request_counter
|
154 |
|
|
|
155 |
if request_counter.value % IP_ROTATION_COUNT == 0:
|
156 |
kwargs.setdefault('headers', {}).update({
|
157 |
"X-Forwarded-For": generate_gabonese_ip(),
|
|
|
170 |
global requests_being_made
|
171 |
|
172 |
try:
|
|
|
173 |
url = "https://gabaohub.alwaysdata.net"
|
|
|
|
|
174 |
params = {}
|
175 |
|
176 |
+
if random.random() < 0.3:
|
|
|
177 |
utm_sources = ["facebook", "twitter", "instagram", "direct", "google", "bing"]
|
178 |
utm_mediums = ["social", "cpc", "email", "referral", "organic"]
|
179 |
utm_campaigns = ["spring_promo", "launch", "awareness", "brand", "product"]
|
|
|
182 |
params["utm_medium"] = random.choice(utm_mediums)
|
183 |
params["utm_campaign"] = random.choice(utm_campaigns)
|
184 |
|
|
|
185 |
with requests_lock:
|
186 |
if request_index < len(requests_being_made):
|
187 |
requests_being_made[request_index]["url"] = url
|
|
|
189 |
requests_being_made[request_index]["status"] = "in_progress"
|
190 |
requests_being_made[request_index]["start_time"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
191 |
|
|
|
192 |
try:
|
193 |
start_time = time.time()
|
194 |
async with session.get(url, params=params, allow_redirects=True) as response:
|
|
|
195 |
content = await response.read()
|
196 |
end_time = time.time()
|
197 |
response_time = end_time - start_time
|
198 |
|
|
|
199 |
with requests_lock:
|
200 |
if request_index < len(requests_being_made):
|
201 |
requests_being_made[request_index]["status"] = "success"
|
|
|
223 |
async def process_request_chunk(start_index, chunk_size, process_id=0):
|
224 |
"""Traite un groupe de requêtes dans un processus séparé"""
|
225 |
logger.info(f"Processus {process_id}: Démarrage du traitement pour les indices {start_index} à {start_index+chunk_size-1}")
|
|
|
|
|
226 |
request_counter = multiprocessing.Value('i', 0)
|
227 |
|
|
|
228 |
async with await create_session(request_counter) as session:
|
|
|
229 |
semaphore = asyncio.Semaphore(MAX_CONNECTIONS // multiprocessing.cpu_count())
|
230 |
|
231 |
async def process_request(i):
|
|
|
235 |
|
236 |
async with semaphore:
|
237 |
await make_homepage_request(session, request_index, request_counter)
|
238 |
+
await asyncio.sleep(random.uniform(0.2, 2.0))
|
|
|
|
|
|
|
|
|
239 |
if progress_counter.value % SAVE_FREQUENCY == 0:
|
240 |
save_results_to_file()
|
241 |
|
|
|
242 |
tasks = [process_request(i) for i in range(min(chunk_size, total_requests.value - start_index))]
|
243 |
await asyncio.gather(*tasks)
|
244 |
|
|
|
248 |
"""Sauvegarde l'état actuel dans un fichier JSON de manière thread-safe"""
|
249 |
with requests_lock:
|
250 |
try:
|
|
|
251 |
data_to_save = list(requests_being_made)
|
|
|
|
|
252 |
temp_file = f"{REQUESTS_FILE}.tmp"
|
253 |
with open(temp_file, "w", encoding="utf-8") as f:
|
254 |
+
json.dump(data_to_save, f, indent=2, ensure_ascii=False, default=default_json)
|
|
|
|
|
255 |
os.replace(temp_file, REQUESTS_FILE)
|
|
|
256 |
logger.info(f"Sauvegarde effectuée: {progress_counter.value}/{total_requests.value} requêtes")
|
257 |
except Exception as e:
|
258 |
logger.error(f"Erreur lors de la sauvegarde: {str(e)}")
|
|
|
260 |
def run_request_process(start_index, chunk_size, process_id):
|
261 |
"""Fonction exécutée dans chaque processus pour effectuer des requêtes"""
|
262 |
try:
|
|
|
263 |
loop = asyncio.new_event_loop()
|
264 |
asyncio.set_event_loop(loop)
|
|
|
|
|
265 |
loop.run_until_complete(process_request_chunk(start_index, chunk_size, process_id))
|
266 |
loop.close()
|
267 |
except Exception as e:
|
|
|
271 |
"""Démarre le processus d'envoi de requêtes avec multiprocessing"""
|
272 |
global requests_in_progress, total_requests, progress_counter, requests_being_made, process_pool, background_tasks
|
273 |
|
|
|
274 |
with requests_lock:
|
275 |
progress_counter.value = 0
|
276 |
total_requests.value = num_requests
|
277 |
requests_being_made[:] = [{"status": "pending"} for _ in range(num_requests)]
|
278 |
|
|
|
279 |
num_cpus = multiprocessing.cpu_count()
|
280 |
num_processes = min(num_cpus, (num_requests + CHUNK_SIZE - 1) // CHUNK_SIZE)
|
281 |
|
282 |
logger.info(f"Démarrage de l'envoi de {num_requests} requêtes avec {num_processes} processus et concurrence de {concurrency}")
|
|
|
|
|
283 |
process_pool = ProcessPoolExecutor(max_workers=num_processes)
|
284 |
background_tasks = []
|
285 |
|
|
|
294 |
)
|
295 |
background_tasks.append(task)
|
296 |
|
|
|
297 |
monitor_thread = ThreadPoolExecutor(max_workers=1)
|
298 |
monitor_thread.submit(monitor_background_tasks)
|
299 |
|
300 |
def monitor_background_tasks():
|
301 |
"""Surveille les tâches en arrière-plan et marque le processus comme terminé lorsque tout est fait"""
|
302 |
global requests_in_progress, background_tasks
|
|
|
303 |
try:
|
|
|
304 |
for task in background_tasks:
|
305 |
+
task.result()
|
|
|
306 |
logger.info(f"Toutes les tâches d'envoi de requêtes sont terminées. {progress_counter.value}/{total_requests.value} requêtes traitées.")
|
|
|
|
|
307 |
save_results_to_file()
|
|
|
|
|
308 |
requests_in_progress.value = False
|
309 |
except Exception as e:
|
310 |
logger.error(f"Erreur lors de la surveillance des tâches: {str(e)}")
|
|
|
313 |
@app.route('/')
|
314 |
def index():
|
315 |
"""Page d'accueil"""
|
|
|
316 |
requests_data = []
|
317 |
if os.path.exists(REQUESTS_FILE):
|
318 |
try:
|
|
|
323 |
|
324 |
return render_template('index.html',
|
325 |
requests_in_progress=requests_in_progress.value,
|
326 |
+
requests=requests_data[:1000],
|
327 |
progress=progress_counter.value,
|
328 |
total=total_requests.value)
|
329 |
|
|
|
331 |
def start_requests():
|
332 |
"""Démarrer l'envoi de requêtes"""
|
333 |
if not requests_in_progress.value:
|
|
|
334 |
num_requests = int(request.form.get('num_requests', 1000))
|
335 |
concurrency = int(request.form.get('concurrency', MAX_CONNECTIONS))
|
|
|
|
|
336 |
requests_in_progress.value = True
|
|
|
|
|
337 |
start_request_process(num_requests, concurrency)
|
|
|
338 |
logger.info(f"Envoi de {num_requests} requêtes lancé avec concurrence {concurrency}")
|
339 |
else:
|
340 |
logger.warning("Un processus d'envoi de requêtes est déjà en cours")
|
|
|
344 |
@app.route('/progress')
|
345 |
def get_progress():
|
346 |
"""Endpoint API pour obtenir la progression complète"""
|
|
|
347 |
requests_data = []
|
348 |
if os.path.exists(REQUESTS_FILE):
|
349 |
try:
|
|
|
356 |
'requests_in_progress': requests_in_progress.value,
|
357 |
'progress': progress_counter.value,
|
358 |
'total': total_requests.value,
|
359 |
+
'requests': requests_data[:200]
|
360 |
})
|
361 |
|
362 |
@app.route('/status')
|
|
|
377 |
global requests_in_progress, requests_being_made, progress_counter, total_requests, process_pool, background_tasks
|
378 |
|
379 |
if not requests_in_progress.value:
|
|
|
380 |
with requests_lock:
|
381 |
requests_being_made[:] = []
|
382 |
progress_counter.value = 0
|
383 |
total_requests.value = 0
|
|
|
|
|
384 |
if os.path.exists(REQUESTS_FILE):
|
385 |
os.remove(REQUESTS_FILE)
|
|
|
386 |
logger.info("Réinitialisation effectuée")
|
387 |
else:
|
388 |
logger.warning("Impossible de réinitialiser pendant un processus en cours")
|
|
|
393 |
def stop_requests():
|
394 |
"""Arrête le processus d'envoi de requêtes en cours"""
|
395 |
global requests_in_progress, process_pool
|
|
|
396 |
if requests_in_progress.value and process_pool:
|
397 |
logger.info("Arrêt des processus d'envoi de requêtes...")
|
398 |
process_pool.shutdown(wait=False)
|
399 |
requests_in_progress.value = False
|
|
|
|
|
400 |
save_results_to_file()
|
|
|
401 |
logger.info("Processus d'envoi de requêtes arrêté")
|
402 |
|
403 |
return redirect(url_for('index'))
|
|
|
413 |
except json.JSONDecodeError:
|
414 |
requests_data = []
|
415 |
|
|
|
416 |
successful_requests = [req for req in requests_data if req.get('status') == 'success']
|
417 |
failed_requests = [req for req in requests_data if req.get('status') == 'failed']
|
418 |
|
|
|
419 |
avg_response_time = 0
|
420 |
if successful_requests:
|
421 |
avg_response_time = sum(req.get('response_time', 0) for req in successful_requests) / len(successful_requests)
|
422 |
|
|
|
423 |
status_codes = {}
|
424 |
for req in successful_requests:
|
425 |
code = req.get('status_code')
|
426 |
if code:
|
427 |
status_codes[code] = status_codes.get(code, 0) + 1
|
428 |
|
|
|
429 |
error_types = {}
|
430 |
for req in failed_requests:
|
431 |
error = req.get('error', 'Unknown')
|
|
|
573 |
<th>Temps (s)</th>
|
574 |
<th>Heure</th>
|
575 |
</tr>
|
576 |
+
</thead>
|
|
|
577 |
<tbody id="requests-table">
|
578 |
<tr>
|
579 |
<td colspan="6" class="text-center">Chargement des données...</td>
|
|
|
587 |
|
588 |
<script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/js/bootstrap.bundle.min.js"></script>
|
589 |
<script>
|
|
|
590 |
let isPolling = false;
|
591 |
let pollingInterval;
|
592 |
|
|
|
593 |
window.onload = function() {
|
594 |
updateProgress();
|
595 |
updateRequests();
|
596 |
updateStats();
|
597 |
|
|
|
598 |
pollingInterval = setInterval(() => {
|
599 |
if (isPolling) {
|
600 |
updateProgress();
|
|
|
603 |
}, 5000);
|
604 |
};
|
605 |
|
|
|
606 |
function updateProgress() {
|
607 |
fetch('/status')
|
608 |
.then(response => response.json())
|
609 |
.then(data => {
|
|
|
610 |
const percentage = data.total > 0 ? (data.progress / data.total * 100) : 0;
|
611 |
document.getElementById('progress-bar').style.width = percentage + '%';
|
612 |
document.getElementById('progress-bar').setAttribute('aria-valuenow', percentage);
|
613 |
|
|
|
614 |
document.getElementById('progress-text').textContent =
|
615 |
`${data.progress} / ${data.total} requêtes traitées (${percentage.toFixed(1)}%)`;
|
|
|
|
|
616 |
document.getElementById('status-text').textContent =
|
617 |
`État: ${data.requests_in_progress ? 'En cours' : 'Inactif'}`;
|
618 |
|
|
|
619 |
document.getElementById('success-count').textContent = data.success_count;
|
620 |
document.getElementById('failed-count').textContent = data.failed_count;
|
621 |
document.getElementById('pending-count').textContent = data.pending_count;
|
622 |
|
|
|
623 |
isPolling = data.requests_in_progress;
|
|
|
|
|
624 |
document.getElementById('start-button').disabled = data.requests_in_progress;
|
625 |
document.getElementById('stop-button').disabled = !data.requests_in_progress;
|
626 |
document.getElementById('reset-button').disabled = data.requests_in_progress;
|
|
|
630 |
});
|
631 |
}
|
632 |
|
|
|
633 |
function updateRequests() {
|
634 |
fetch('/progress')
|
635 |
.then(response => response.json())
|
636 |
.then(data => {
|
637 |
const tableBody = document.getElementById('requests-table');
|
|
|
|
|
638 |
tableBody.innerHTML = '';
|
639 |
|
|
|
640 |
if (data.requests.length === 0) {
|
641 |
tableBody.innerHTML = '<tr><td colspan="6" class="text-center">Aucune requête effectuée.</td></tr>';
|
642 |
return;
|
643 |
}
|
644 |
|
|
|
645 |
data.requests.forEach((req, index) => {
|
646 |
const row = document.createElement('tr');
|
|
|
|
|
647 |
const statusClass = `status-${req.status || 'pending'}`;
|
648 |
|
|
|
649 |
row.innerHTML = `
|
650 |
<td>${index + 1}</td>
|
651 |
<td>${req.url || 'N/A'}</td>
|
|
|
663 |
});
|
664 |
}
|
665 |
|
|
|
666 |
function updateStats() {
|
667 |
fetch('/stats')
|
668 |
.then(response => response.json())
|
669 |
.then(data => {
|
670 |
const statsContainer = document.getElementById('stats-container');
|
671 |
|
|
|
672 |
let statusCodeHtml = '<div class="mt-3"><h5>Codes de statut</h5>';
|
673 |
if (Object.keys(data.status_codes).length > 0) {
|
674 |
statusCodeHtml += '<div class="row">';
|
|
|
693 |
}
|
694 |
statusCodeHtml += '</div>';
|
695 |
|
|
|
696 |
let errorTypesHtml = '<div class="mt-3"><h5>Types d\'erreurs</h5>';
|
697 |
if (Object.keys(data.error_types).length > 0) {
|
698 |
errorTypesHtml += '<ul class="list-group">';
|
|
|
710 |
}
|
711 |
errorTypesHtml += '</div>';
|
712 |
|
|
|
713 |
statsContainer.innerHTML = `
|
714 |
<div class="row">
|
715 |
<div class="col-md-6">
|
|
|
753 |
"""
|
754 |
|
755 |
if __name__ == '__main__':
|
756 |
+
app.run(debug=True)
|