mirror of
https://github.com/sissbruecker/linkding.git
synced 2025-08-08 11:18:28 +02:00
Cache website metadata to avoid duplicate scraping (#401)
* Cache website metadata to avoid duplicate scraping * fix test setup
This commit is contained in:
@@ -1,5 +1,6 @@
|
|||||||
import logging
|
import logging
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
from functools import lru_cache
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
@@ -23,6 +24,9 @@ class WebsiteMetadata:
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# Caching metadata avoids scraping again when saving bookmarks, in case the
|
||||||
|
# metadata was already scraped to show preview values in the bookmark form
|
||||||
|
@lru_cache(maxsize=10)
|
||||||
def load_website_metadata(url: str):
|
def load_website_metadata(url: str):
|
||||||
title = None
|
title = None
|
||||||
description = None
|
description = None
|
||||||
|
@@ -25,6 +25,10 @@ class MockStreamingResponse:
|
|||||||
|
|
||||||
|
|
||||||
class WebsiteLoaderTestCase(TestCase):
|
class WebsiteLoaderTestCase(TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
# clear cached metadata before test run
|
||||||
|
website_loader.load_website_metadata.cache_clear()
|
||||||
|
|
||||||
def render_html_document(self, title, description):
|
def render_html_document(self, title, description):
|
||||||
return f'''
|
return f'''
|
||||||
<!DOCTYPE html>
|
<!DOCTYPE html>
|
||||||
|
Reference in New Issue
Block a user