114 lines
3.6 KiB
Python
Executable File
114 lines
3.6 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Multi-site turf scraper
|
|
Sources: Equidia, ZETurf, Canalturf, Boturfers
|
|
"""
|
|
import requests
|
|
from bs4 import BeautifulSoup
|
|
import json
|
|
from datetime import datetime
|
|
import re
|
|
|
|
HEADERS = {
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
|
|
}
|
|
|
|
def scrape_equidia():
|
|
"""Scrape Equidia - résultats détaillés"""
|
|
url = "https://www.equidia.fr/courses"
|
|
try:
|
|
r = requests.get(url, headers=HEADERS, timeout=10)
|
|
soup = BeautifulSoup(r.text, 'html.parser')
|
|
courses = []
|
|
for link in soup.select('a[href*="/courses/2026-"]'):
|
|
href = link.get('href', '')
|
|
if 'R1C' in href:
|
|
courses.append(f"https://www.equidia.fr{href}")
|
|
return list(set(courses))[:5]
|
|
except Exception as e:
|
|
print(f"Equidia error: {e}")
|
|
return []
|
|
|
|
def scrape_zeturf():
|
|
"""Scrape ZETurf - cotes"""
|
|
url = "https://www.zeturf.fr/fr/courses-du-jour"
|
|
try:
|
|
r = requests.get(url, headers=HEADERS, timeout=10)
|
|
soup = BeautifulSoup(r.text, 'html.parser')
|
|
courses = []
|
|
for link in soup.select('a[href*="/course-du-jour/"]'):
|
|
href = link.get('href', '')
|
|
courses.append(f"https://www.zeturf.fr{href}")
|
|
return list(set(courses))[:5]
|
|
except Exception as e:
|
|
print(f"ZETurf error: {e}")
|
|
return []
|
|
|
|
def scrape_canalturf():
|
|
"""Scrape Canalturf - pronostics"""
|
|
url = "https://www.canalturf.com/courses_chevaux_jour.php"
|
|
try:
|
|
r = requests.get(url, headers=HEADERS, timeout=10)
|
|
soup = BeautifulSoup(r.text, 'html.parser')
|
|
links = []
|
|
for link in soup.select('a'):
|
|
href = link.get('href', '')
|
|
if 'quinte' in href.lower():
|
|
links.append(f"https://www.canalturf.com{href}")
|
|
return list(set(links))[:5]
|
|
except Exception as e:
|
|
print(f"Canalturf error: {e}")
|
|
return []
|
|
|
|
def scrape_boturfers():
|
|
"""Scrape Boturfers - pronostics + stats"""
|
|
url = "https://www.boturfers.fr"
|
|
try:
|
|
r = requests.get(url, headers=HEADERS, timeout=10)
|
|
soup = BeautifulSoup(r.text, 'html.parser')
|
|
courses = []
|
|
for link in soup.select('a[href*="/quinte"]'):
|
|
href = link.get('href', '')
|
|
courses.append(f"https://www.boturfers.fr{href}")
|
|
return list(set(courses))[:5]
|
|
except Exception as e:
|
|
print(f"Boturfers error: {e}")
|
|
return []
|
|
|
|
def main():
|
|
print(f"=== Turf Multi-Scraper {datetime.now().strftime('%Y-%m-%d %H:%M')} ===")
|
|
|
|
print("\n[1/4] Scraping Equidia...")
|
|
equidia_courses = scrape_equidia()
|
|
print(f" Found {len(equidia_courses)} courses")
|
|
|
|
print("\n[2/4] Scraping ZETurf...")
|
|
zeturf_courses = scrape_zeturf()
|
|
print(f" Found {len(zeturf_courses)} courses")
|
|
|
|
print("\n[3/4] Scraping Canalturf...")
|
|
canalturf_courses = scrape_canalturf()
|
|
print(f" Found {len(canalturf_courses)} courses")
|
|
|
|
print("\n[4/4] Scraping Boturfers...")
|
|
boturfers_courses = scrape_boturfers()
|
|
print(f" Found {len(boturfers_courses)} courses")
|
|
|
|
data = {
|
|
'timestamp': datetime.now().isoformat(),
|
|
'equidia': equidia_courses,
|
|
'zeturf': zeturf_courses,
|
|
'canalturf': canalturf_courses,
|
|
'boturfers': boturfers_courses
|
|
}
|
|
|
|
output_file = f"/home/h3r7/turf_scraper/courses_{datetime.now().strftime('%Y%m%d_%H%M')}.json"
|
|
with open(output_file, 'w') as f:
|
|
json.dump(data, f, indent=2)
|
|
|
|
print(f"\n✅ Saved to {output_file}")
|
|
return data
|
|
|
|
if __name__ == "__main__":
|
|
main()
|