✨ Feature: Add resume script for FFA scraping
- New resume_scraping.py: Resume scraping from specific date * Designed to continue after crashes or interruptions * Starts from 2024-04-08 (after original script crash) * Continues until 2026-08-01 * Appends to existing CSV files (no data loss) - Handles 'invalid session id' errors - Preserves existing data in courses_daily.csv and results_daily.csv - Allows seamless recovery from Selenium/Chrome crashes - Documentation in docs/SCRAPER_REPRISE.md
This commit is contained in:
48
scripts/resume_scraping.py
Normal file
48
scripts/resume_scraping.py
Normal file
@@ -0,0 +1,48 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Script de reprise du scraping FFA
|
||||
|
||||
Ce script reprend le scraping à partir d'une date spécifique
|
||||
pour continuer après un crash ou une interruption.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
|
||||
|
||||
from datetime import datetime
|
||||
from scripts.scraper_jour_par_jour import FFAScraperDaily
|
||||
|
||||
def main():
|
||||
"""Reprendre le scraping à partir d'une date spécifique"""
|
||||
|
||||
# Date de reprise (dernière date réussie + 1 jour)
|
||||
resume_date = "2024-04-08" # Le script a crashé le 2024-04-08
|
||||
|
||||
# Date de fin (inchangée)
|
||||
end_date = "2026-08-01"
|
||||
|
||||
print("=" * 60)
|
||||
print("🔄 Scraper FFA - Reprise")
|
||||
print("=" * 60)
|
||||
print(f"Reprise à partir du: {resume_date}")
|
||||
print(f"Fin: {end_date}")
|
||||
print(f"Fichiers de sortie: data/courses_daily.csv, data/results_daily.csv")
|
||||
print("=" * 60)
|
||||
print("⚠️ Ce script va continuer d'ajouter aux fichiers existants")
|
||||
print("=" * 60)
|
||||
|
||||
scraper = FFAScraperDaily()
|
||||
stats = scraper.run(resume_date, end_date)
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("✅ Scraping terminé!")
|
||||
print("=" * 60)
|
||||
print(f"Jours traités: {stats['total_days']}")
|
||||
print(f"Courses récupérées: {stats['total_courses']}")
|
||||
print(f"Résultats récupérés: {stats['total_results']}")
|
||||
print("=" * 60)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user