#!/usr/bin/env python3 """Parse Canalturf predictions and save to DB""" import json import sqlite3 from datetime import datetime DB_PATH = "/home/h3r7/turf_scraper/turf.db" # Load today's scraper output with open('/home/h3r7/turf_scraper/v5_20260224_100313.json') as f: data = json.load(f) # Find Canalturf quinté content quinte_data = None for page in data['pages']: if page['site'] == 'canalturf' and 'PRIX RAUBA CAPEU' in page['content']: quinte_data = page['content'] break if not quinte_data: print("No Quinté data found!") exit() # Parse predictions from Canalturf import re # Extract horses from the content horses = [] # Base (7 - I'M A BELIEVER) base_match = re.search(r'Base\(s\)\s+(\d+)\s+([^\(]+)\s+\(([^)]+)\)', quinte_data) if base_match: num, name, jockey = base_match.groups() horses.append((name.strip(), int(num), 90)) # 90% confidence for base # Chance régulière chance_matches = re.findall(r'Chance\(s\) régulière\(s\)\s+(\d+)\s+([^\(]+)\s+\(([^)]+)\)', quinte_data) for num, name, jockey in chance_matches: horses.append((name.strip(), int(num), 70)) # Outsider outsider_matches = re.findall(r'Outsider\(s\)\s+(\d+)\s+([^\(]+)\s+\(([^)]+)\)', quinte_data) for num, name, jockey in outsider_matches: horses.append((name.strip(), int(num), 50)) print(f"Parsed {len(horses)} horses from Canalturf") # Save to DB conn = sqlite3.connect(DB_PATH) c = conn.cursor() today = "2026-02-24" for name, num, conf in horses: c.execute(""" INSERT INTO predictions (date, race_name, horse_number, horse_name, odds, prediction_rank, confidence, source) VALUES (?, ?, ?, ?, ?, ?, ?, ?) """, (today, "Quinté Cagnes-sur-Mer", num, name, 0, len(horses)+1, conf, "canalturf")) conn.commit() print(f"Saved {len(horses)} predictions!") # Show what we saved c.execute("SELECT horse_name, horse_number, confidence, source FROM predictions WHERE date = ?", (today,)) for row in c.fetchall(): print(f" {row[1]} - {row[0]} ({row[2]}%) - {row[3]}") conn.close()