From 30da1880a50da0b8d85da48a419de4bde0fa813b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sascha=20I=C3=9Fbr=C3=BCcker?= Date: Fri, 20 Jan 2023 22:28:44 +0100 Subject: [PATCH] Cache website metadata to avoid duplicate scraping (#401) * Cache website metadata to avoid duplicate scraping * fix test setup --- bookmarks/services/website_loader.py | 4 ++++ bookmarks/tests/test_website_loader.py | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/bookmarks/services/website_loader.py b/bookmarks/services/website_loader.py index 90982f8..3a89b8b 100644 --- a/bookmarks/services/website_loader.py +++ b/bookmarks/services/website_loader.py @@ -1,5 +1,6 @@ import logging from dataclasses import dataclass +from functools import lru_cache import requests from bs4 import BeautifulSoup @@ -23,6 +24,9 @@ class WebsiteMetadata: } +# Caching metadata avoids scraping again when saving bookmarks, in case the +# metadata was already scraped to show preview values in the bookmark form +@lru_cache(maxsize=10) def load_website_metadata(url: str): title = None description = None diff --git a/bookmarks/tests/test_website_loader.py b/bookmarks/tests/test_website_loader.py index 6ee5325..eb10a7c 100644 --- a/bookmarks/tests/test_website_loader.py +++ b/bookmarks/tests/test_website_loader.py @@ -25,6 +25,10 @@ class MockStreamingResponse: class WebsiteLoaderTestCase(TestCase): + def setUp(self): + # clear cached metadata before test run + website_loader.load_website_metadata.cache_clear() + def render_html_document(self, title, description): return f'''