diff --git a/bookmarks/services/favicon_loader.py b/bookmarks/services/favicon_loader.py index ac77b1c..6534880 100644 --- a/bookmarks/services/favicon_loader.py +++ b/bookmarks/services/favicon_loader.py @@ -1,6 +1,7 @@ +import logging +import mimetypes import os.path import re -import shutil import time from pathlib import Path from urllib.parse import urlparse @@ -10,25 +11,46 @@ from django.conf import settings max_file_age = 60 * 60 * 24 # 1 day +logger = logging.getLogger(__name__) + +# register mime type for .ico files, which is not included in the default +# mimetypes of the Docker image +mimetypes.add_type('image/x-icon', '.ico') + def _ensure_favicon_folder(): Path(settings.LD_FAVICON_FOLDER).mkdir(parents=True, exist_ok=True) def _url_to_filename(url: str) -> str: - name = re.sub(r'\W+', '_', url) - return f'{name}.png' + return re.sub(r'\W+', '_', url) -def _get_base_url(url: str) -> str: +def _get_url_parameters(url: str) -> dict: parsed_uri = urlparse(url) - return f'{parsed_uri.scheme}://{parsed_uri.hostname}' + return { + # https://example.com/foo?bar -> https://example.com + 'url': f'{parsed_uri.scheme}://{parsed_uri.hostname}', + # https://example.com/foo?bar -> example.com + 'domain': parsed_uri.hostname, + } def _get_favicon_path(favicon_file: str) -> Path: return Path(os.path.join(settings.LD_FAVICON_FOLDER, favicon_file)) +def _check_existing_favicon(favicon_name: str): + # return existing file if a file with the same name, ignoring extension, + # exists and is not stale + for filename in os.listdir(settings.LD_FAVICON_FOLDER): + file_base_name, _ = os.path.splitext(filename) + if file_base_name == favicon_name: + favicon_path = _get_favicon_path(filename) + return filename if not _is_stale(favicon_path) else None + return None + + def _is_stale(path: Path) -> bool: stat = path.stat() file_age = time.time() - stat.st_mtime @@ -36,22 +58,26 @@ def _is_stale(path: Path) -> bool: def load_favicon(url: str) -> str: - # Get base URL so that we can reuse favicons for multiple bookmarks with the same host - base_url = _get_base_url(url) - favicon_name = _url_to_filename(base_url) - favicon_path = _get_favicon_path(favicon_name) + url_parameters = _get_url_parameters(url) - # Load icon if it doesn't exist yet or has become stale - if not favicon_path.exists() or _is_stale(favicon_path): - # Create favicon folder if not exists - _ensure_favicon_folder() + # Create favicon folder if not exists + _ensure_favicon_folder() + # Use scheme+hostname as favicon filename to reuse icon for all pages on the same domain + favicon_name = _url_to_filename(url_parameters['url']) + favicon_file = _check_existing_favicon(favicon_name) + + if not favicon_file: # Load favicon from provider, save to file - favicon_url = settings.LD_FAVICON_PROVIDER.format(url=base_url) - response = requests.get(favicon_url, stream=True) + favicon_url = settings.LD_FAVICON_PROVIDER.format(**url_parameters) + logger.debug(f'Loading favicon from: {favicon_url}') + with requests.get(favicon_url, stream=True) as response: + content_type = response.headers['Content-Type'] + file_extension = mimetypes.guess_extension(content_type) + favicon_file = f'{favicon_name}{file_extension}' + favicon_path = _get_favicon_path(favicon_file) + with open(favicon_path, 'wb') as file: + for chunk in response.iter_content(chunk_size=8192): + file.write(chunk) + logger.debug(f'Saved favicon as: {favicon_path}') - with open(favicon_path, 'wb') as file: - shutil.copyfileobj(response.raw, file) - - del response - - return favicon_name + return favicon_file diff --git a/bookmarks/services/tasks.py b/bookmarks/services/tasks.py index 06b8566..3a57bf0 100644 --- a/bookmarks/services/tasks.py +++ b/bookmarks/services/tasks.py @@ -130,12 +130,12 @@ def _load_favicon_task(bookmark_id: int): logger.info(f'Load favicon for bookmark. url={bookmark.url}') - new_favicon = favicon_loader.load_favicon(bookmark.url) + new_favicon_file = favicon_loader.load_favicon(bookmark.url) - if new_favicon != bookmark.favicon_file: - bookmark.favicon_file = new_favicon + if new_favicon_file != bookmark.favicon_file: + bookmark.favicon_file = new_favicon_file bookmark.save(update_fields=['favicon_file']) - logger.info(f'Successfully updated favicon for bookmark. url={bookmark.url} icon={new_favicon}') + logger.info(f'Successfully updated favicon for bookmark. url={bookmark.url} icon={new_favicon_file}') def schedule_bookmarks_without_favicons(user: User): diff --git a/bookmarks/templates/settings/general.html b/bookmarks/templates/settings/general.html index e7f517e..7070b3b 100644 --- a/bookmarks/templates/settings/general.html +++ b/bookmarks/templates/settings/general.html @@ -74,8 +74,8 @@ Automatically loads favicons for bookmarked websites and displays them next to each bookmark. By default, this feature uses a Google service to download favicons. If you don't want to use this service, check the options - documentation on how to configure a custom favicon provider. + href="https://github.com/sissbruecker/linkding/blob/master/docs/Options.md#ld_favicon_provider" + target="_blank">options documentation on how to configure a custom favicon provider. Icons are downloaded in the background, and it may take a while for them to show up. {% if request.user_profile.enable_favicons and enable_refresh_favicons %} diff --git a/bookmarks/tests/test_favicon_loader.py b/bookmarks/tests/test_favicon_loader.py index cdffff9..6dec983 100644 --- a/bookmarks/tests/test_favicon_loader.py +++ b/bookmarks/tests/test_favicon_loader.py @@ -2,25 +2,40 @@ import io import os.path import time from pathlib import Path -from unittest import mock +from unittest import mock, skip from django.conf import settings -from django.test import TestCase +from django.test import TestCase, override_settings from bookmarks.services import favicon_loader mock_icon_data = b'mock_icon' +class MockStreamingResponse: + def __init__(self, data=mock_icon_data, content_type='image/png'): + self.chunks = [data] + self.headers = {'Content-Type': content_type} + + def iter_content(self, **kwargs): + return self.chunks + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, traceback): + pass + + class FaviconLoaderTestCase(TestCase): def setUp(self) -> None: self.ensure_favicon_folder() self.clear_favicon_folder() - def create_mock_response(self, icon_data=mock_icon_data): + def create_mock_response(self, icon_data=mock_icon_data, content_type='image/png'): mock_response = mock.Mock() mock_response.raw = io.BytesIO(icon_data) - return mock_response + return MockStreamingResponse(icon_data, content_type) def ensure_favicon_folder(self): Path(settings.LD_FAVICON_FOLDER).mkdir(parents=True, exist_ok=True) @@ -93,12 +108,14 @@ class FaviconLoaderTestCase(TestCase): with mock.patch('requests.get') as mock_get: mock_get.return_value = self.create_mock_response() - favicon_loader.load_favicon('https://example.com') + favicon_file = favicon_loader.load_favicon('https://example.com') mock_get.assert_called() + self.assertEqual(favicon_file, 'https_example_com.png') mock_get.reset_mock() - favicon_loader.load_favicon('https://example.com') + updated_favicon_file = favicon_loader.load_favicon('https://example.com') mock_get.assert_not_called() + self.assertEqual(favicon_file, updated_favicon_file) def test_load_favicon_updates_stale_icon(self): with mock.patch('requests.get') as mock_get: @@ -125,3 +142,35 @@ class FaviconLoaderTestCase(TestCase): favicon_loader.load_favicon('https://example.com') mock_get.assert_called() self.assertEqual(updated_mock_icon_data, self.get_icon_data('https_example_com.png')) + + @override_settings(LD_FAVICON_PROVIDER='https://custom.icons.com/?url={url}') + def test_custom_provider_with_url_param(self): + with mock.patch('requests.get') as mock_get: + mock_get.return_value = self.create_mock_response() + + favicon_loader.load_favicon('https://example.com/foo?bar=baz') + mock_get.assert_called_with('https://custom.icons.com/?url=https://example.com', stream=True) + + @override_settings(LD_FAVICON_PROVIDER='https://custom.icons.com/?url={domain}') + def test_custom_provider_with_domain_param(self): + with mock.patch('requests.get') as mock_get: + mock_get.return_value = self.create_mock_response() + + favicon_loader.load_favicon('https://example.com/foo?bar=baz') + mock_get.assert_called_with('https://custom.icons.com/?url=example.com', stream=True) + + def test_guess_file_extension(self): + with mock.patch('requests.get') as mock_get: + mock_get.return_value = self.create_mock_response(content_type='image/png') + favicon_loader.load_favicon('https://example.com') + + self.assertTrue(self.icon_exists('https_example_com.png')) + + self.clear_favicon_folder() + self.ensure_favicon_folder() + + with mock.patch('requests.get') as mock_get: + mock_get.return_value = self.create_mock_response(content_type='image/x-icon') + favicon_loader.load_favicon('https://example.com') + + self.assertTrue(self.icon_exists('https_example_com.ico')) diff --git a/docs/Options.md b/docs/Options.md index 278b9e8..a018c41 100644 --- a/docs/Options.md +++ b/docs/Options.md @@ -164,12 +164,20 @@ A json string with additional options for the database. Passed directly to OPTIO ### `LD_FAVICON_PROVIDER` -Values: `String` | Default = `https://t1.gstatic.com/faviconV2?url={url}&client=SOCIAL&type=FAVICON` +Values: `String` | Default = `https://t1.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url={url}&size=32` The favicon provider used for downloading icons if they are enabled in the user profile settings. The default provider is a Google service that automatically detects the correct favicon for a website, and provides icons in consistent image format (PNG) and in a consistent image size. This setting allows to configure a custom provider in form of a URL. When calling the provider with the URL of a website, it must return the image data for the favicon of that website. -The configured favicon provider URL must contain a `{url}` placeholder that will be replaced with the URL of the website for which to download the favicon. -See the default URL for an example. +The configured favicon provider URL must contain a placeholder that will be replaced with the URL of the website for which to download the favicon. +The available placeholders are: +- `{url}` - Includes the scheme and hostname of the website, for example `https://example.com` +- `{domain}` - Includes only the hostname of the website, for example `example.com` + +Which placeholder you need to use depends on the respective favicon provider, please check their documentation or usage examples. +See the default URL for how to insert the placeholder to the favicon provider URL. + +Alternative favicon providers: +- DuckDuckGo: `https://icons.duckduckgo.com/ip3/{domain}.ico` diff --git a/siteroot/settings/base.py b/siteroot/settings/base.py index d032b87..9dc94f5 100644 --- a/siteroot/settings/base.py +++ b/siteroot/settings/base.py @@ -231,7 +231,7 @@ DATABASES = { } # Favicons -LD_DEFAULT_FAVICON_PROVIDER = 'https://t1.gstatic.com/faviconV2?url={url}&client=SOCIAL&type=FAVICON' +LD_DEFAULT_FAVICON_PROVIDER = 'https://t1.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url={url}&size=32' LD_FAVICON_PROVIDER = os.getenv('LD_FAVICON_PROVIDER', LD_DEFAULT_FAVICON_PROVIDER) LD_FAVICON_FOLDER = os.path.join(BASE_DIR, 'data', 'favicons') LD_ENABLE_REFRESH_FAVICONS = os.getenv('LD_ENABLE_REFRESH_FAVICONS', True) in (True, 'True', '1')