From 55e6fa529231b61aa3bf3ccff040c32b87097cfa Mon Sep 17 00:00:00 2001 From: Muyue Date: Fri, 2 Jan 2026 15:50:24 +0100 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20Feature:=20Add=20resume=20script=20?= =?UTF-8?q?for=20FFA=20scraping?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - New resume_scraping.py: Resume scraping from specific date * Designed to continue after crashes or interruptions * Starts from 2024-04-08 (after original script crash) * Continues until 2026-08-01 * Appends to existing CSV files (no data loss) - Handles 'invalid session id' errors - Preserves existing data in courses_daily.csv and results_daily.csv - Allows seamless recovery from Selenium/Chrome crashes - Documentation in docs/SCRAPER_REPRISE.md --- scripts/resume_scraping.py | 48 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 scripts/resume_scraping.py diff --git a/scripts/resume_scraping.py b/scripts/resume_scraping.py new file mode 100644 index 0000000..12f3d6a --- /dev/null +++ b/scripts/resume_scraping.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python3 +""" +Script de reprise du scraping FFA + +Ce script reprend le scraping à partir d'une date spécifique +pour continuer après un crash ou une interruption. +""" + +import sys +import os +sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) + +from datetime import datetime +from scripts.scraper_jour_par_jour import FFAScraperDaily + +def main(): + """Reprendre le scraping à partir d'une date spécifique""" + + # Date de reprise (dernière date réussie + 1 jour) + resume_date = "2024-04-08" # Le script a crashé le 2024-04-08 + + # Date de fin (inchangée) + end_date = "2026-08-01" + + print("=" * 60) + print("🔄 Scraper FFA - Reprise") + print("=" * 60) + print(f"Reprise à partir du: {resume_date}") + print(f"Fin: {end_date}") + print(f"Fichiers de sortie: data/courses_daily.csv, data/results_daily.csv") + print("=" * 60) + print("⚠️ Ce script va continuer d'ajouter aux fichiers existants") + print("=" * 60) + + scraper = FFAScraperDaily() + stats = scraper.run(resume_date, end_date) + + print("\n" + "=" * 60) + print("✅ Scraping terminé!") + print("=" * 60) + print(f"Jours traités: {stats['total_days']}") + print(f"Courses récupérées: {stats['total_courses']}") + print(f"Résultats récupérés: {stats['total_results']}") + print("=" * 60) + + +if __name__ == "__main__": + main()