From e08bf9fd039c8561743e3bc5aee43745521fc031 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sascha=20I=C3=9Fbr=C3=BCcker?= Date: Sat, 21 May 2022 13:25:32 +0200 Subject: [PATCH] Fake request headers to reduce bot detection (#263) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Sascha Ißbrücker --- bookmarks/services/website_loader.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/bookmarks/services/website_loader.py b/bookmarks/services/website_loader.py index fef0d5b..30bbcc4 100644 --- a/bookmarks/services/website_loader.py +++ b/bookmarks/services/website_loader.py @@ -34,7 +34,8 @@ def load_website_metadata(url: str): def load_page(url: str): - r = requests.get(url, timeout=10) + headers = fake_request_headers() + r = requests.get(url, timeout=10, headers=headers) # Use charset_normalizer to determine encoding that best matches the response content # Several sites seem to specify the response encoding incorrectly, so we ignore it and use custom logic instead @@ -42,3 +43,13 @@ def load_page(url: str): # before trying to determine one results = from_bytes(r.content) return str(results.best()) + + +def fake_request_headers(): + return { + "Accept": "text/html,application/xhtml+xml,application/xml", + "Accept-Encoding": "gzip, deflate", + "Dnt": "1", + "Upgrade-Insecure-Requests": "1", + "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.0.0 Safari/537.36", + }