Initial commit: existing turf_saas codebase

Co-Authored-By: Paperclip <noreply@paperclip.ing>
This commit is contained in:
ML Engineer
2026-04-25 17:18:43 +02:00
commit ed07c8a3d1
137 changed files with 36398 additions and 0 deletions

113
multi_scraper.py Executable file
View File

@@ -0,0 +1,113 @@
#!/usr/bin/env python3
"""
Multi-site turf scraper
Sources: Equidia, ZETurf, Canalturf, Boturfers
"""
import requests
from bs4 import BeautifulSoup
import json
from datetime import datetime
import re
HEADERS = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
}
def scrape_equidia():
"""Scrape Equidia - résultats détaillés"""
url = "https://www.equidia.fr/courses"
try:
r = requests.get(url, headers=HEADERS, timeout=10)
soup = BeautifulSoup(r.text, 'html.parser')
courses = []
for link in soup.select('a[href*="/courses/2026-"]'):
href = link.get('href', '')
if 'R1C' in href:
courses.append(f"https://www.equidia.fr{href}")
return list(set(courses))[:5]
except Exception as e:
print(f"Equidia error: {e}")
return []
def scrape_zeturf():
"""Scrape ZETurf - cotes"""
url = "https://www.zeturf.fr/fr/courses-du-jour"
try:
r = requests.get(url, headers=HEADERS, timeout=10)
soup = BeautifulSoup(r.text, 'html.parser')
courses = []
for link in soup.select('a[href*="/course-du-jour/"]'):
href = link.get('href', '')
courses.append(f"https://www.zeturf.fr{href}")
return list(set(courses))[:5]
except Exception as e:
print(f"ZETurf error: {e}")
return []
def scrape_canalturf():
"""Scrape Canalturf - pronostics"""
url = "https://www.canalturf.com/courses_chevaux_jour.php"
try:
r = requests.get(url, headers=HEADERS, timeout=10)
soup = BeautifulSoup(r.text, 'html.parser')
links = []
for link in soup.select('a'):
href = link.get('href', '')
if 'quinte' in href.lower():
links.append(f"https://www.canalturf.com{href}")
return list(set(links))[:5]
except Exception as e:
print(f"Canalturf error: {e}")
return []
def scrape_boturfers():
"""Scrape Boturfers - pronostics + stats"""
url = "https://www.boturfers.fr"
try:
r = requests.get(url, headers=HEADERS, timeout=10)
soup = BeautifulSoup(r.text, 'html.parser')
courses = []
for link in soup.select('a[href*="/quinte"]'):
href = link.get('href', '')
courses.append(f"https://www.boturfers.fr{href}")
return list(set(courses))[:5]
except Exception as e:
print(f"Boturfers error: {e}")
return []
def main():
print(f"=== Turf Multi-Scraper {datetime.now().strftime('%Y-%m-%d %H:%M')} ===")
print("\n[1/4] Scraping Equidia...")
equidia_courses = scrape_equidia()
print(f" Found {len(equidia_courses)} courses")
print("\n[2/4] Scraping ZETurf...")
zeturf_courses = scrape_zeturf()
print(f" Found {len(zeturf_courses)} courses")
print("\n[3/4] Scraping Canalturf...")
canalturf_courses = scrape_canalturf()
print(f" Found {len(canalturf_courses)} courses")
print("\n[4/4] Scraping Boturfers...")
boturfers_courses = scrape_boturfers()
print(f" Found {len(boturfers_courses)} courses")
data = {
'timestamp': datetime.now().isoformat(),
'equidia': equidia_courses,
'zeturf': zeturf_courses,
'canalturf': canalturf_courses,
'boturfers': boturfers_courses
}
output_file = f"/home/h3r7/turf_scraper/courses_{datetime.now().strftime('%Y%m%d_%H%M')}.json"
with open(output_file, 'w') as f:
json.dump(data, f, indent=2)
print(f"\n✅ Saved to {output_file}")
return data
if __name__ == "__main__":
main()