#!/usr/bin/env python3 """ Enhanced Horse Detail Scraper - v2 Ferrure, Oeillères, Jockey stats """ import requests from bs4 import BeautifulSoup import json from datetime import datetime import re import sqlite3 HEADERS = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36', } DB_PATH = "/home/h3r7/turf_scraper/turf.db" def init_horse_db(): """Initialize horse detail table""" conn = sqlite3.connect(DB_PATH) c = conn.cursor() c.execute(''' CREATE TABLE IF NOT EXISTS horses_details ( id INTEGER PRIMARY KEY AUTOINCREMENT, date TEXT, horse_name TEXT, horse_id TEXT, age INTEGER, sex TEXT, trainer TEXT, jockey TEXT, last_odds REAL, wins INTEGER, placed INTEGER, total_races INTEGER, earnings REAL, form_music TEXT, ferrure TEXT, oeilleres TEXT, recent_form TEXT, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ) ''') conn.commit() conn.close() def save_horse(horse_data): """Save horse to database""" conn = sqlite3.connect(DB_PATH) c = conn.cursor() c.execute(''' INSERT INTO horses_details ( date, horse_name, horse_id, age, sex, trainer, jockey, last_odds, wins, placed, total_races, earnings, form_music, ferrure, oeilleres, recent_form ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) ''', ( datetime.now().strftime('%Y-%m-%d'), horse_data.get('name'), horse_data.get('id'), horse_data.get('age'), horse_data.get('sex'), horse_data.get('trainer'), horse_data.get('jockey'), horse_data.get('odds'), horse_data.get('wins'), horse_data.get('placed'), horse_data.get('total_races'), horse_data.get('earnings'), horse_data.get('form_music'), horse_data.get('ferrure'), horse_data.get('oeilleres'), horse_data.get('recent_form') )) conn.commit() conn.close() print(f"✅ Saved: {horse_data.get('name')}") def scrape_horse(horse_id): """Scrape horse from Canalturf""" url = f"https://www.canalturf.com/courses_fiche_cheval.php?idcheval={horse_id}" try: r = requests.get(url, headers=HEADERS, timeout=15) soup = BeautifulSoup(r.text, 'html.parser') text = soup.get_text(separator=' | ', strip=True) data = { 'url': url, 'id': horse_id, 'name': '', 'age': None, 'sex': '', 'trainer': '', 'jockey': '', 'odds': None, 'wins': 0, 'placed': 0, 'total_races': 0, 'earnings': 0, 'form_music': '', 'ferrure': '', 'oeilleres': '', 'recent_form': '' } # Name title = soup.find('title') if title: data['name'] = title.text.split('-')[0].strip() # Sex/Age - format "F4" or "M5" if 'Sexe/Age' in text: match = re.search(r'Sexe/Age\s*:\s*([MF])(\d)', text) if match: data['sex'] = match.group(1) data['age'] = int(match.group(2)) # Trainer if 'Entraineur' in text: match = re.search(r'Entraineur\s*:\s*([^|]+)', text) if match: data['trainer'] = match.group(1).strip() # Odds if 'Cote' in text: match = re.search(r'(\d+[.,]\d+)', text) if match: data['odds'] = float(match.group(1).replace(',', '.')) # Form/Music if 'Perf.' in text: match = re.search(r'Perf\.\s*:\s*([A-Za-z0-9()hsm]+)', text) if match: data['form_music'] = match.group(1).strip() # Stats # Victoires if 'Victoire' in text: match = re.search(r'(\d+)\s*$', text.split('Victoire')[1].split('|')[0] if 'Victoire' in text else '') # Simplified - look for numbers for num in re.findall(r'Victoire\(s\)\s*:\s*(\d+)', text): data['wins'] = int(num) break # Recent performances recent = [] for link in soup.select('a[href*="/resultats-PMU/"]')[:5]: txt = link.get_text(strip=True) if txt and len(txt) > 5: recent.append(txt[:50]) data['recent_form'] = ' | '.join(recent) return data except Exception as e: return {'id': horse_id, 'error': str(e)} # Test if __name__ == "__main__": init_horse_db() print("="*50) print("HORSE DETAIL SCRAPER v2 - ENHANCED") print("="*50) # Test with PASSIONATA horse_id = "516052" horse = scrape_horse(horse_id) print(f"\n{horse.get('name')}:") print(f" Age/Sex: {horse.get('sex')}{horse.get('age')}") print(f" Trainer: {horse.get('trainer')}") print(f" Odds: {horse.get('odds')}") print(f" Form: {horse.get('form_music')}") save_horse(horse) # Test with EMSILORD print("\n" + "-"*30) horse2 = scrape_horse("518372") print(f"\n{horse2.get('name')}:") print(f" Age/Sex: {horse2.get('sex')}{horse2.get('age')}") print(f" Trainer: {horse2.get('trainer')}") print(f" Odds: {horse2.get('odds')}") print(f" Form: {horse2.get('form_music')}") save_horse(horse2)