196 lines
5.6 KiB
Python
Executable File
196 lines
5.6 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Enhanced Horse Detail Scraper - v2
|
|
Ferrure, Oeillères, Jockey stats
|
|
"""
|
|
import requests
|
|
from bs4 import BeautifulSoup
|
|
import json
|
|
from datetime import datetime
|
|
import re
|
|
import sqlite3
|
|
|
|
HEADERS = {
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
|
|
}
|
|
|
|
DB_PATH = "/home/h3r7/turf_scraper/turf.db"
|
|
|
|
def init_horse_db():
|
|
"""Initialize horse detail table"""
|
|
conn = sqlite3.connect(DB_PATH)
|
|
c = conn.cursor()
|
|
|
|
c.execute('''
|
|
CREATE TABLE IF NOT EXISTS horses_details (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
date TEXT,
|
|
horse_name TEXT,
|
|
horse_id TEXT,
|
|
age INTEGER,
|
|
sex TEXT,
|
|
trainer TEXT,
|
|
jockey TEXT,
|
|
last_odds REAL,
|
|
wins INTEGER,
|
|
placed INTEGER,
|
|
total_races INTEGER,
|
|
earnings REAL,
|
|
form_music TEXT,
|
|
ferrure TEXT,
|
|
oeilleres TEXT,
|
|
recent_form TEXT,
|
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
|
)
|
|
''')
|
|
|
|
conn.commit()
|
|
conn.close()
|
|
|
|
def save_horse(horse_data):
|
|
"""Save horse to database"""
|
|
conn = sqlite3.connect(DB_PATH)
|
|
c = conn.cursor()
|
|
|
|
c.execute('''
|
|
INSERT INTO horses_details (
|
|
date, horse_name, horse_id, age, sex, trainer, jockey,
|
|
last_odds, wins, placed, total_races, earnings,
|
|
form_music, ferrure, oeilleres, recent_form
|
|
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
''', (
|
|
datetime.now().strftime('%Y-%m-%d'),
|
|
horse_data.get('name'),
|
|
horse_data.get('id'),
|
|
horse_data.get('age'),
|
|
horse_data.get('sex'),
|
|
horse_data.get('trainer'),
|
|
horse_data.get('jockey'),
|
|
horse_data.get('odds'),
|
|
horse_data.get('wins'),
|
|
horse_data.get('placed'),
|
|
horse_data.get('total_races'),
|
|
horse_data.get('earnings'),
|
|
horse_data.get('form_music'),
|
|
horse_data.get('ferrure'),
|
|
horse_data.get('oeilleres'),
|
|
horse_data.get('recent_form')
|
|
))
|
|
|
|
conn.commit()
|
|
conn.close()
|
|
print(f"✅ Saved: {horse_data.get('name')}")
|
|
|
|
def scrape_horse(horse_id):
|
|
"""Scrape horse from Canalturf"""
|
|
url = f"https://www.canalturf.com/courses_fiche_cheval.php?idcheval={horse_id}"
|
|
|
|
try:
|
|
r = requests.get(url, headers=HEADERS, timeout=15)
|
|
soup = BeautifulSoup(r.text, 'html.parser')
|
|
|
|
text = soup.get_text(separator=' | ', strip=True)
|
|
|
|
data = {
|
|
'url': url,
|
|
'id': horse_id,
|
|
'name': '',
|
|
'age': None,
|
|
'sex': '',
|
|
'trainer': '',
|
|
'jockey': '',
|
|
'odds': None,
|
|
'wins': 0,
|
|
'placed': 0,
|
|
'total_races': 0,
|
|
'earnings': 0,
|
|
'form_music': '',
|
|
'ferrure': '',
|
|
'oeilleres': '',
|
|
'recent_form': ''
|
|
}
|
|
|
|
# Name
|
|
title = soup.find('title')
|
|
if title:
|
|
data['name'] = title.text.split('-')[0].strip()
|
|
|
|
# Sex/Age - format "F4" or "M5"
|
|
if 'Sexe/Age' in text:
|
|
match = re.search(r'Sexe/Age\s*:\s*([MF])(\d)', text)
|
|
if match:
|
|
data['sex'] = match.group(1)
|
|
data['age'] = int(match.group(2))
|
|
|
|
# Trainer
|
|
if 'Entraineur' in text:
|
|
match = re.search(r'Entraineur\s*:\s*([^|]+)', text)
|
|
if match:
|
|
data['trainer'] = match.group(1).strip()
|
|
|
|
# Odds
|
|
if 'Cote' in text:
|
|
match = re.search(r'(\d+[.,]\d+)', text)
|
|
if match:
|
|
data['odds'] = float(match.group(1).replace(',', '.'))
|
|
|
|
# Form/Music
|
|
if 'Perf.' in text:
|
|
match = re.search(r'Perf\.\s*:\s*([A-Za-z0-9()hsm]+)', text)
|
|
if match:
|
|
data['form_music'] = match.group(1).strip()
|
|
|
|
# Stats
|
|
# Victoires
|
|
if 'Victoire' in text:
|
|
match = re.search(r'(\d+)\s*$', text.split('Victoire')[1].split('|')[0] if 'Victoire' in text else '')
|
|
# Simplified - look for numbers
|
|
for num in re.findall(r'Victoire\(s\)\s*:\s*(\d+)', text):
|
|
data['wins'] = int(num)
|
|
break
|
|
|
|
# Recent performances
|
|
recent = []
|
|
for link in soup.select('a[href*="/resultats-PMU/"]')[:5]:
|
|
txt = link.get_text(strip=True)
|
|
if txt and len(txt) > 5:
|
|
recent.append(txt[:50])
|
|
|
|
data['recent_form'] = ' | '.join(recent)
|
|
|
|
return data
|
|
|
|
except Exception as e:
|
|
return {'id': horse_id, 'error': str(e)}
|
|
|
|
# Test
|
|
if __name__ == "__main__":
|
|
init_horse_db()
|
|
|
|
print("="*50)
|
|
print("HORSE DETAIL SCRAPER v2 - ENHANCED")
|
|
print("="*50)
|
|
|
|
# Test with PASSIONATA
|
|
horse_id = "516052"
|
|
horse = scrape_horse(horse_id)
|
|
|
|
print(f"\n{horse.get('name')}:")
|
|
print(f" Age/Sex: {horse.get('sex')}{horse.get('age')}")
|
|
print(f" Trainer: {horse.get('trainer')}")
|
|
print(f" Odds: {horse.get('odds')}")
|
|
print(f" Form: {horse.get('form_music')}")
|
|
|
|
save_horse(horse)
|
|
|
|
# Test with EMSILORD
|
|
print("\n" + "-"*30)
|
|
horse2 = scrape_horse("518372")
|
|
print(f"\n{horse2.get('name')}:")
|
|
print(f" Age/Sex: {horse2.get('sex')}{horse2.get('age')}")
|
|
print(f" Trainer: {horse2.get('trainer')}")
|
|
print(f" Odds: {horse2.get('odds')}")
|
|
print(f" Form: {horse2.get('form_music')}")
|
|
|
|
save_horse(horse2)
|