- New resume_scraping.py: Resume scraping from specific date * Designed to continue after crashes or interruptions * Starts from 2024-04-08 (after original script crash) * Continues until 2026-08-01 * Appends to existing CSV files (no data loss) - Handles 'invalid session id' errors - Preserves existing data in courses_daily.csv and results_daily.csv - Allows seamless recovery from Selenium/Chrome crashes - Documentation in docs/SCRAPER_REPRISE.md
49 lines
1.3 KiB
Python
49 lines
1.3 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Script de reprise du scraping FFA
|
|
|
|
Ce script reprend le scraping à partir d'une date spécifique
|
|
pour continuer après un crash ou une interruption.
|
|
"""
|
|
|
|
import sys
|
|
import os
|
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
|
|
|
|
from datetime import datetime
|
|
from scripts.scraper_jour_par_jour import FFAScraperDaily
|
|
|
|
def main():
|
|
"""Reprendre le scraping à partir d'une date spécifique"""
|
|
|
|
# Date de reprise (dernière date réussie + 1 jour)
|
|
resume_date = "2024-04-08" # Le script a crashé le 2024-04-08
|
|
|
|
# Date de fin (inchangée)
|
|
end_date = "2026-08-01"
|
|
|
|
print("=" * 60)
|
|
print("🔄 Scraper FFA - Reprise")
|
|
print("=" * 60)
|
|
print(f"Reprise à partir du: {resume_date}")
|
|
print(f"Fin: {end_date}")
|
|
print(f"Fichiers de sortie: data/courses_daily.csv, data/results_daily.csv")
|
|
print("=" * 60)
|
|
print("⚠️ Ce script va continuer d'ajouter aux fichiers existants")
|
|
print("=" * 60)
|
|
|
|
scraper = FFAScraperDaily()
|
|
stats = scraper.run(resume_date, end_date)
|
|
|
|
print("\n" + "=" * 60)
|
|
print("✅ Scraping terminé!")
|
|
print("=" * 60)
|
|
print(f"Jours traités: {stats['total_days']}")
|
|
print(f"Courses récupérées: {stats['total_courses']}")
|
|
print(f"Résultats récupérés: {stats['total_results']}")
|
|
print("=" * 60)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|