Initial commit: existing turf_saas codebase
Co-Authored-By: Paperclip <noreply@paperclip.ing>
This commit is contained in:
113
multi_scraper.py
Executable file
113
multi_scraper.py
Executable file
@@ -0,0 +1,113 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Multi-site turf scraper
|
||||
Sources: Equidia, ZETurf, Canalturf, Boturfers
|
||||
"""
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
import json
|
||||
from datetime import datetime
|
||||
import re
|
||||
|
||||
HEADERS = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
|
||||
}
|
||||
|
||||
def scrape_equidia():
|
||||
"""Scrape Equidia - résultats détaillés"""
|
||||
url = "https://www.equidia.fr/courses"
|
||||
try:
|
||||
r = requests.get(url, headers=HEADERS, timeout=10)
|
||||
soup = BeautifulSoup(r.text, 'html.parser')
|
||||
courses = []
|
||||
for link in soup.select('a[href*="/courses/2026-"]'):
|
||||
href = link.get('href', '')
|
||||
if 'R1C' in href:
|
||||
courses.append(f"https://www.equidia.fr{href}")
|
||||
return list(set(courses))[:5]
|
||||
except Exception as e:
|
||||
print(f"Equidia error: {e}")
|
||||
return []
|
||||
|
||||
def scrape_zeturf():
|
||||
"""Scrape ZETurf - cotes"""
|
||||
url = "https://www.zeturf.fr/fr/courses-du-jour"
|
||||
try:
|
||||
r = requests.get(url, headers=HEADERS, timeout=10)
|
||||
soup = BeautifulSoup(r.text, 'html.parser')
|
||||
courses = []
|
||||
for link in soup.select('a[href*="/course-du-jour/"]'):
|
||||
href = link.get('href', '')
|
||||
courses.append(f"https://www.zeturf.fr{href}")
|
||||
return list(set(courses))[:5]
|
||||
except Exception as e:
|
||||
print(f"ZETurf error: {e}")
|
||||
return []
|
||||
|
||||
def scrape_canalturf():
|
||||
"""Scrape Canalturf - pronostics"""
|
||||
url = "https://www.canalturf.com/courses_chevaux_jour.php"
|
||||
try:
|
||||
r = requests.get(url, headers=HEADERS, timeout=10)
|
||||
soup = BeautifulSoup(r.text, 'html.parser')
|
||||
links = []
|
||||
for link in soup.select('a'):
|
||||
href = link.get('href', '')
|
||||
if 'quinte' in href.lower():
|
||||
links.append(f"https://www.canalturf.com{href}")
|
||||
return list(set(links))[:5]
|
||||
except Exception as e:
|
||||
print(f"Canalturf error: {e}")
|
||||
return []
|
||||
|
||||
def scrape_boturfers():
|
||||
"""Scrape Boturfers - pronostics + stats"""
|
||||
url = "https://www.boturfers.fr"
|
||||
try:
|
||||
r = requests.get(url, headers=HEADERS, timeout=10)
|
||||
soup = BeautifulSoup(r.text, 'html.parser')
|
||||
courses = []
|
||||
for link in soup.select('a[href*="/quinte"]'):
|
||||
href = link.get('href', '')
|
||||
courses.append(f"https://www.boturfers.fr{href}")
|
||||
return list(set(courses))[:5]
|
||||
except Exception as e:
|
||||
print(f"Boturfers error: {e}")
|
||||
return []
|
||||
|
||||
def main():
|
||||
print(f"=== Turf Multi-Scraper {datetime.now().strftime('%Y-%m-%d %H:%M')} ===")
|
||||
|
||||
print("\n[1/4] Scraping Equidia...")
|
||||
equidia_courses = scrape_equidia()
|
||||
print(f" Found {len(equidia_courses)} courses")
|
||||
|
||||
print("\n[2/4] Scraping ZETurf...")
|
||||
zeturf_courses = scrape_zeturf()
|
||||
print(f" Found {len(zeturf_courses)} courses")
|
||||
|
||||
print("\n[3/4] Scraping Canalturf...")
|
||||
canalturf_courses = scrape_canalturf()
|
||||
print(f" Found {len(canalturf_courses)} courses")
|
||||
|
||||
print("\n[4/4] Scraping Boturfers...")
|
||||
boturfers_courses = scrape_boturfers()
|
||||
print(f" Found {len(boturfers_courses)} courses")
|
||||
|
||||
data = {
|
||||
'timestamp': datetime.now().isoformat(),
|
||||
'equidia': equidia_courses,
|
||||
'zeturf': zeturf_courses,
|
||||
'canalturf': canalturf_courses,
|
||||
'boturfers': boturfers_courses
|
||||
}
|
||||
|
||||
output_file = f"/home/h3r7/turf_scraper/courses_{datetime.now().strftime('%Y%m%d_%H%M')}.json"
|
||||
with open(output_file, 'w') as f:
|
||||
json.dump(data, f, indent=2)
|
||||
|
||||
print(f"\n✅ Saved to {output_file}")
|
||||
return data
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user