Files
turf_saas/horse_detail_v2.py
2026-04-25 17:18:43 +02:00

196 lines
5.6 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Enhanced Horse Detail Scraper - v2
Ferrure, Oeillères, Jockey stats
"""
import requests
from bs4 import BeautifulSoup
import json
from datetime import datetime
import re
import sqlite3
HEADERS = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
}
DB_PATH = "/home/h3r7/turf_scraper/turf.db"
def init_horse_db():
"""Initialize horse detail table"""
conn = sqlite3.connect(DB_PATH)
c = conn.cursor()
c.execute('''
CREATE TABLE IF NOT EXISTS horses_details (
id INTEGER PRIMARY KEY AUTOINCREMENT,
date TEXT,
horse_name TEXT,
horse_id TEXT,
age INTEGER,
sex TEXT,
trainer TEXT,
jockey TEXT,
last_odds REAL,
wins INTEGER,
placed INTEGER,
total_races INTEGER,
earnings REAL,
form_music TEXT,
ferrure TEXT,
oeilleres TEXT,
recent_form TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
''')
conn.commit()
conn.close()
def save_horse(horse_data):
"""Save horse to database"""
conn = sqlite3.connect(DB_PATH)
c = conn.cursor()
c.execute('''
INSERT INTO horses_details (
date, horse_name, horse_id, age, sex, trainer, jockey,
last_odds, wins, placed, total_races, earnings,
form_music, ferrure, oeilleres, recent_form
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
''', (
datetime.now().strftime('%Y-%m-%d'),
horse_data.get('name'),
horse_data.get('id'),
horse_data.get('age'),
horse_data.get('sex'),
horse_data.get('trainer'),
horse_data.get('jockey'),
horse_data.get('odds'),
horse_data.get('wins'),
horse_data.get('placed'),
horse_data.get('total_races'),
horse_data.get('earnings'),
horse_data.get('form_music'),
horse_data.get('ferrure'),
horse_data.get('oeilleres'),
horse_data.get('recent_form')
))
conn.commit()
conn.close()
print(f"✅ Saved: {horse_data.get('name')}")
def scrape_horse(horse_id):
"""Scrape horse from Canalturf"""
url = f"https://www.canalturf.com/courses_fiche_cheval.php?idcheval={horse_id}"
try:
r = requests.get(url, headers=HEADERS, timeout=15)
soup = BeautifulSoup(r.text, 'html.parser')
text = soup.get_text(separator=' | ', strip=True)
data = {
'url': url,
'id': horse_id,
'name': '',
'age': None,
'sex': '',
'trainer': '',
'jockey': '',
'odds': None,
'wins': 0,
'placed': 0,
'total_races': 0,
'earnings': 0,
'form_music': '',
'ferrure': '',
'oeilleres': '',
'recent_form': ''
}
# Name
title = soup.find('title')
if title:
data['name'] = title.text.split('-')[0].strip()
# Sex/Age - format "F4" or "M5"
if 'Sexe/Age' in text:
match = re.search(r'Sexe/Age\s*:\s*([MF])(\d)', text)
if match:
data['sex'] = match.group(1)
data['age'] = int(match.group(2))
# Trainer
if 'Entraineur' in text:
match = re.search(r'Entraineur\s*:\s*([^|]+)', text)
if match:
data['trainer'] = match.group(1).strip()
# Odds
if 'Cote' in text:
match = re.search(r'(\d+[.,]\d+)', text)
if match:
data['odds'] = float(match.group(1).replace(',', '.'))
# Form/Music
if 'Perf.' in text:
match = re.search(r'Perf\.\s*:\s*([A-Za-z0-9()hsm]+)', text)
if match:
data['form_music'] = match.group(1).strip()
# Stats
# Victoires
if 'Victoire' in text:
match = re.search(r'(\d+)\s*$', text.split('Victoire')[1].split('|')[0] if 'Victoire' in text else '')
# Simplified - look for numbers
for num in re.findall(r'Victoire\(s\)\s*:\s*(\d+)', text):
data['wins'] = int(num)
break
# Recent performances
recent = []
for link in soup.select('a[href*="/resultats-PMU/"]')[:5]:
txt = link.get_text(strip=True)
if txt and len(txt) > 5:
recent.append(txt[:50])
data['recent_form'] = ' | '.join(recent)
return data
except Exception as e:
return {'id': horse_id, 'error': str(e)}
# Test
if __name__ == "__main__":
init_horse_db()
print("="*50)
print("HORSE DETAIL SCRAPER v2 - ENHANCED")
print("="*50)
# Test with PASSIONATA
horse_id = "516052"
horse = scrape_horse(horse_id)
print(f"\n{horse.get('name')}:")
print(f" Age/Sex: {horse.get('sex')}{horse.get('age')}")
print(f" Trainer: {horse.get('trainer')}")
print(f" Odds: {horse.get('odds')}")
print(f" Form: {horse.get('form_music')}")
save_horse(horse)
# Test with EMSILORD
print("\n" + "-"*30)
horse2 = scrape_horse("518372")
print(f"\n{horse2.get('name')}:")
print(f" Age/Sex: {horse2.get('sex')}{horse2.get('age')}")
print(f" Trainer: {horse2.get('trainer')}")
print(f" Odds: {horse2.get('odds')}")
print(f" Form: {horse2.get('form_music')}")
save_horse(horse2)