Compare commits

...

2 Commits

Author SHA1 Message Date
DevOps Engineer
225295030b fix(HRT-73): refactor api_proxy — COMBINED_ROUTES tuple + align with turf_scraper fix #23
Some checks failed
CD / Deploy → Staging (push) Has been cancelled
CD / Smoke Tests on Staging (push) Has been cancelled
CD / Deploy → Production (push) Has been cancelled
CD / Rollback Production (push) Has been cancelled
- Replace if/elif chain with COMBINED_ROUTES tuple for maintainability
- Add missing routes to combined_api: races, race/, scores, ask, brave-search,
  execute-sql, send-email, report, ideas
- Functionally equivalent to turf_scraper commit 048b969

Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-04-27 22:38:32 +02:00
DevOps Engineer
86e85aa1c6 fix(HRT-72): fix Overpass OSM scraper — bounding box + Content-Type + User-Agent
Bug 1: Replace area["name"="..."] query with direct bounding box (50.4,2.8,50.8,3.3)
  — area resolution fails silently on public Overpass API depending on server version.
  — Direct bbox is deterministic and reliable for MEL coverage.
  — Also simplify website filter to use [!"website"] tag negation syntax.

Bug 2: Add explicit Content-Type: application/x-www-form-urlencoded header
  — Some network configs/proxies strip the implicit header set by requests.post(data={}).
  — Explicit header is best practice per Overpass API docs.

Bug 3 (discovered during test): Add User-Agent header
  — overpass-api.de returns 406 Not Acceptable for User-Agent: python-requests/*.
  — Fix: send H3R7Tech-LeadHunter/1.0 as custom User-Agent.
  — Tested: 5 OSM leads returned from Lille center bounding box.

Backup: leadhunter_scraper.py.backup_20260427_221429

Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-04-27 22:19:10 +02:00
2 changed files with 40 additions and 20 deletions

View File

@@ -62,13 +62,13 @@ TARGET_TYPES = ["restaurant", "cafe", "bar", "bakery", "food"]
# Overpass API endpoint # Overpass API endpoint
OVERPASS_URL = "https://overpass-api.de/api/interpreter" OVERPASS_URL = "https://overpass-api.de/api/interpreter"
# Requête Overpass MEL — boundary nommée "Métropole Européenne de Lille" # Requête Overpass MEL — bounding box directe (50.4,2.8,50.8,3.3) couvrant la MEL
# Fix HRT-72 : la résolution area["name"=...] échoue silencieusement sur l'API Overpass publique
OVERPASS_MEL_QUERY = """ OVERPASS_MEL_QUERY = """
[out:json][timeout:60]; [out:json][timeout:60];
area["name"="Métropole Européenne de Lille"]["boundary"="administrative"]->.mel;
( (
node["amenity"~"^(restaurant|cafe|bar|fast_food|bakery)$"]["website"!~".+"](area.mel); node["amenity"~"^(restaurant|cafe|bar|fast_food|bakery)$"][!"website"](50.4,2.8,50.8,3.3);
way["amenity"~"^(restaurant|cafe|bar|fast_food|bakery)$"]["website"!~".+"](area.mel); way["amenity"~"^(restaurant|cafe|bar|fast_food|bakery)$"][!"website"](50.4,2.8,50.8,3.3);
); );
out center 200; out center 200;
""" """
@@ -279,6 +279,10 @@ class OverpassScraper:
resp = requests.post( resp = requests.post(
OVERPASS_URL, OVERPASS_URL,
data={"data": OVERPASS_MEL_QUERY}, data={"data": OVERPASS_MEL_QUERY},
headers={
"Content-Type": "application/x-www-form-urlencoded", # Fix HRT-72 Bug2
"User-Agent": "H3R7Tech-LeadHunter/1.0 (contact@h3r7tech.fr)", # Fix HRT-72 Bug3: overpass-api.de blocks python-requests UA
},
timeout=90, timeout=90,
) )
resp.raise_for_status() resp.raise_for_status()

View File

@@ -743,19 +743,29 @@ def pod_static(filename=""):
@app.route("/turf/api/") @app.route("/turf/api/")
@app.route("/turf/api/<path:api_path>") @app.route("/turf/api/<path:api_path>")
def api_proxy(api_path=""): def api_proxy(api_path=""):
if api_path.startswith("vitesse"): # Routes servies par combined_api.py (port 8790) :
url = f"{COMBINED_API_URL}/turf/api/{api_path}" # backtest, stats, paris, parisroi, races, scores, report, ask, brave-search,
elif api_path.startswith("n8n-proxy"): # execute-sql, send-email, vitesse, n8n-proxy, predictions_analysis, ideas
url = f"{COMBINED_API_URL}/turf/api/{api_path}" # Fix HRT-73 : alignement complet avec turf_scraper fix #23
elif api_path.startswith("backtest"): COMBINED_ROUTES = (
url = f"{COMBINED_API_URL}/turf/api/{api_path}" "backtest",
elif api_path.startswith("stats"): "stats",
url = f"{COMBINED_API_URL}/turf/api/{api_path}" "parisroi",
elif api_path.startswith("predictions_analysis"): "paris",
url = f"{COMBINED_API_URL}/turf/api/{api_path}" "predictions_analysis",
elif api_path.startswith("parisroi"): "vitesse",
url = f"{COMBINED_API_URL}/turf/api/{api_path}" "n8n-proxy",
elif api_path.startswith("paris"): "races",
"race/",
"scores",
"ask",
"brave-search",
"execute-sql",
"send-email",
"report",
"ideas",
)
if any(api_path.startswith(r) for r in COMBINED_ROUTES):
url = f"{COMBINED_API_URL}/turf/api/{api_path}" url = f"{COMBINED_API_URL}/turf/api/{api_path}"
elif api_path.startswith("scoring"): elif api_path.startswith("scoring"):
url = f"{DASHBOARD_API_URL}/turf/api/{api_path}" url = f"{DASHBOARD_API_URL}/turf/api/{api_path}"
@@ -770,11 +780,17 @@ def api_proxy(api_path=""):
if fwd_method in ("POST", "PUT", "PATCH") if fwd_method in ("POST", "PUT", "PATCH")
else None else None
) )
# Forwarder Authorization header (combined_api.py exige Basic h3r7:h3r7 pour parisroi/paris)
fwd_headers = {"Content-Type": "application/json"} fwd_headers = {"Content-Type": "application/json"}
if request.headers.get("Authorization"): incoming_auth = request.headers.get("Authorization")
fwd_headers["Authorization"] = request.headers.get("Authorization") if incoming_auth:
fwd_headers["Authorization"] = incoming_auth
resp = requests.request( resp = requests.request(
method=fwd_method, url=url, json=fwd_json, timeout=30, headers=fwd_headers method=fwd_method,
url=url,
json=fwd_json,
timeout=30,
headers=fwd_headers,
) )
return resp.content, resp.status_code, {"Content-Type": "application/json"} return resp.content, resp.status_code, {"Content-Type": "application/json"}
except Exception as e: except Exception as e: