From 86e85aa1c621d155d8b7b901b0d1c25a2f0e9177 Mon Sep 17 00:00:00 2001 From: DevOps Engineer Date: Mon, 27 Apr 2026 22:19:10 +0200 Subject: [PATCH] =?UTF-8?q?fix(HRT-72):=20fix=20Overpass=20OSM=20scraper?= =?UTF-8?q?=20=E2=80=94=20bounding=20box=20+=20Content-Type=20+=20User-Age?= =?UTF-8?q?nt?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bug 1: Replace area["name"="..."] query with direct bounding box (50.4,2.8,50.8,3.3) — area resolution fails silently on public Overpass API depending on server version. — Direct bbox is deterministic and reliable for MEL coverage. — Also simplify website filter to use [!"website"] tag negation syntax. Bug 2: Add explicit Content-Type: application/x-www-form-urlencoded header — Some network configs/proxies strip the implicit header set by requests.post(data={}). — Explicit header is best practice per Overpass API docs. Bug 3 (discovered during test): Add User-Agent header — overpass-api.de returns 406 Not Acceptable for User-Agent: python-requests/*. — Fix: send H3R7Tech-LeadHunter/1.0 as custom User-Agent. — Tested: 5 OSM leads returned from Lille center bounding box. Backup: leadhunter_scraper.py.backup_20260427_221429 Co-Authored-By: Paperclip --- leadhunter_scraper.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/leadhunter_scraper.py b/leadhunter_scraper.py index defcccd..f0bf747 100644 --- a/leadhunter_scraper.py +++ b/leadhunter_scraper.py @@ -62,13 +62,13 @@ TARGET_TYPES = ["restaurant", "cafe", "bar", "bakery", "food"] # Overpass API endpoint OVERPASS_URL = "https://overpass-api.de/api/interpreter" -# Requête Overpass MEL — boundary nommée "Métropole Européenne de Lille" +# Requête Overpass MEL — bounding box directe (50.4,2.8,50.8,3.3) couvrant la MEL +# Fix HRT-72 : la résolution area["name"=...] échoue silencieusement sur l'API Overpass publique OVERPASS_MEL_QUERY = """ [out:json][timeout:60]; -area["name"="Métropole Européenne de Lille"]["boundary"="administrative"]->.mel; ( - node["amenity"~"^(restaurant|cafe|bar|fast_food|bakery)$"]["website"!~".+"](area.mel); - way["amenity"~"^(restaurant|cafe|bar|fast_food|bakery)$"]["website"!~".+"](area.mel); + node["amenity"~"^(restaurant|cafe|bar|fast_food|bakery)$"][!"website"](50.4,2.8,50.8,3.3); + way["amenity"~"^(restaurant|cafe|bar|fast_food|bakery)$"][!"website"](50.4,2.8,50.8,3.3); ); out center 200; """ @@ -279,6 +279,10 @@ class OverpassScraper: resp = requests.post( OVERPASS_URL, data={"data": OVERPASS_MEL_QUERY}, + headers={ + "Content-Type": "application/x-www-form-urlencoded", # Fix HRT-72 Bug2 + "User-Agent": "H3R7Tech-LeadHunter/1.0 (contact@h3r7tech.fr)", # Fix HRT-72 Bug3: overpass-api.de blocks python-requests UA + }, timeout=90, ) resp.raise_for_status()