From a23c357f2f806ff1b037eb45fbc93c8c51f409a6 Mon Sep 17 00:00:00 2001 From: Josh S <100461662+Teknicallity@users.noreply.github.com> Date: Sat, 22 Mar 2025 06:34:10 -0400 Subject: [PATCH] Add bulk and single bookmark metadata refresh (#999) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add url create/edit query paramter to clear cache * Add refresh bookmark metadata button in create/edit bookmark page * Fix refresh bookmark metadata when editing existing bookmark * Add bulk refresh metadata functionality * Fix test cases for bulk view dropdown selection list * Allow bulk metadata refresh when background tasks are disabled * Move load preview image call on refresh metadata * Update bookmark modified time on metadata refresh * Rename function to align with convention * Add tests for refresh task * Add tests for bookmarks service refresh metadata * Add tests for bookmarks api disable cache on check * Remove bulk refresh metadata when background tasks disabled * Refactor refresh metadata task * Remove unnecessary call * Fix testing mock name * Abstract clearing metadata cache * Add test to check if load page is called twice when cache disabled * Remove refresh button for new bookmarks * Remove strict disable cache is true check * Refactor refresh metadata form logic into its own function * move button and highlight changes * polish and update tests --------- Co-authored-by: Sascha Ißbrücker --- bookmarks/api/routes.py | 3 +- bookmarks/services/bookmarks.py | 11 +++ bookmarks/services/tasks.py | 28 ++++++- bookmarks/services/website_loader.py | 12 ++- bookmarks/styles/bookmark-form.css | 13 ++- .../templates/bookmarks/bulk_edit/bar.html | 1 + bookmarks/templates/bookmarks/form.html | 46 +++++++++- .../tests/test_bookmark_archived_view.py | 2 + bookmarks/tests/test_bookmark_index_view.py | 2 + bookmarks/tests/test_bookmarks_api.py | 37 +++++++++ bookmarks/tests/test_bookmarks_service.py | 83 +++++++++++++++++++ bookmarks/tests/test_bookmarks_tasks.py | 50 +++++++++++ bookmarks/tests/test_website_loader.py | 19 ++++- bookmarks/views/bookmarks.py | 3 + 14 files changed, 300 insertions(+), 10 deletions(-) diff --git a/bookmarks/api/routes.py b/bookmarks/api/routes.py index 78d9c58..aa49f4f 100644 --- a/bookmarks/api/routes.py +++ b/bookmarks/api/routes.py @@ -98,12 +98,13 @@ class BookmarkViewSet( @action(methods=["get"], detail=False) def check(self, request: HttpRequest): url = request.GET.get("url") + ignore_cache = request.GET.get("ignore_cache", False) in ["true"] bookmark = Bookmark.objects.filter(owner=request.user, url=url).first() existing_bookmark_data = ( self.get_serializer(bookmark).data if bookmark else None ) - metadata = website_loader.load_website_metadata(url) + metadata = website_loader.load_website_metadata(url, ignore_cache=ignore_cache) # Return tags that would be automatically applied to the bookmark profile = request.user.profile diff --git a/bookmarks/services/bookmarks.py b/bookmarks/services/bookmarks.py index c3e2f50..f2b8a81 100644 --- a/bookmarks/services/bookmarks.py +++ b/bookmarks/services/bookmarks.py @@ -197,6 +197,17 @@ def unshare_bookmarks(bookmark_ids: [Union[int, str]], current_user: User): ) +def refresh_bookmarks_metadata(bookmark_ids: [Union[int, str]], current_user: User): + sanitized_bookmark_ids = _sanitize_id_list(bookmark_ids) + owned_bookmarks = Bookmark.objects.filter( + owner=current_user, id__in=sanitized_bookmark_ids + ) + + for bookmark in owned_bookmarks: + tasks.refresh_metadata(bookmark) + tasks.load_preview_image(current_user, bookmark) + + def _merge_bookmark_data(from_bookmark: Bookmark, to_bookmark: Bookmark): to_bookmark.title = from_bookmark.title to_bookmark.description = from_bookmark.description diff --git a/bookmarks/services/tasks.py b/bookmarks/services/tasks.py index 3af0014..9c5ce61 100644 --- a/bookmarks/services/tasks.py +++ b/bookmarks/services/tasks.py @@ -6,6 +6,7 @@ import waybackpy from django.conf import settings from django.contrib.auth.models import User from django.db.models import Q +from django.utils import timezone from huey import crontab from huey.contrib.djhuey import HUEY as huey from huey.exceptions import TaskLockedException @@ -13,7 +14,7 @@ from waybackpy.exceptions import WaybackError, TooManyRequestsError from bookmarks.models import Bookmark, BookmarkAsset, UserProfile from bookmarks.services import assets, favicon_loader, preview_image_loader -from bookmarks.services.website_loader import DEFAULT_USER_AGENT +from bookmarks.services.website_loader import DEFAULT_USER_AGENT, load_website_metadata logger = logging.getLogger(__name__) @@ -225,6 +226,31 @@ def _schedule_bookmarks_without_previews_task(user_id: int): logging.exception(exc) +def refresh_metadata(bookmark: Bookmark): + if not settings.LD_DISABLE_BACKGROUND_TASKS: + _refresh_metadata_task(bookmark.id) + + +@task() +def _refresh_metadata_task(bookmark_id: int): + try: + bookmark = Bookmark.objects.get(id=bookmark_id) + except Bookmark.DoesNotExist: + return + + logger.info(f"Refresh metadata for bookmark. url={bookmark.url}") + + metadata = load_website_metadata(bookmark.url) + if metadata.title: + bookmark.title = metadata.title + if metadata.description: + bookmark.description = metadata.description + bookmark.date_modified = timezone.now() + + bookmark.save() + logger.info(f"Successfully refreshed metadata for bookmark. url={bookmark.url}") + + def is_html_snapshot_feature_active() -> bool: return settings.LD_ENABLE_SNAPSHOTS and not settings.LD_DISABLE_BACKGROUND_TASKS diff --git a/bookmarks/services/website_loader.py b/bookmarks/services/website_loader.py index 2134659..e396f57 100644 --- a/bookmarks/services/website_loader.py +++ b/bookmarks/services/website_loader.py @@ -27,10 +27,20 @@ class WebsiteMetadata: } +def load_website_metadata(url: str, ignore_cache: bool = False): + if ignore_cache: + return _load_website_metadata(url) + return _load_website_metadata_cached(url) + + # Caching metadata avoids scraping again when saving bookmarks, in case the # metadata was already scraped to show preview values in the bookmark form @lru_cache(maxsize=10) -def load_website_metadata(url: str): +def _load_website_metadata_cached(url: str): + return _load_website_metadata(url) + + +def _load_website_metadata(url: str): title = None description = None preview_image = None diff --git a/bookmarks/styles/bookmark-form.css b/bookmarks/styles/bookmark-form.css index 820fab8..6b52aa4 100644 --- a/bookmarks/styles/bookmark-form.css +++ b/bookmarks/styles/bookmark-form.css @@ -15,14 +15,23 @@ visibility: hidden; } - & .form-group .clear-button { - display: none; + & .form-group .suffix-button { padding: 0; border: none; height: auto; font-size: var(--font-size-sm); } + & .form-group .clear-button, + & .form-group #refresh-button { + display: none; + } + + & .form-group input.modified, + & .form-group textarea.modified { + background: var(--primary-color-shade); + } + & .form-input-hint.bookmark-exists { display: none; color: var(--warning-color); diff --git a/bookmarks/templates/bookmarks/bulk_edit/bar.html b/bookmarks/templates/bookmarks/bulk_edit/bar.html index 9f5d8a9..3648483 100644 --- a/bookmarks/templates/bookmarks/bulk_edit/bar.html +++ b/bookmarks/templates/bookmarks/bulk_edit/bar.html @@ -22,6 +22,7 @@ {% endif %} +
diff --git a/bookmarks/templates/bookmarks/form.html b/bookmarks/templates/bookmarks/form.html index 6245754..309dfc5 100644 --- a/bookmarks/templates/bookmarks/form.html +++ b/bookmarks/templates/bookmarks/form.html @@ -33,9 +33,12 @@
- +
+ + +
{{ form.title|add_class:"form-input"|attr:"autocomplete:off" }} {{ form.title.errors }} @@ -43,7 +46,8 @@
-
@@ -111,6 +115,7 @@ const notesInput = document.getElementById('{{ form.notes.id_for_label }}'); const unreadCheckbox = document.getElementById('{{ form.unread.id_for_label }}'); const sharedCheckbox = document.getElementById('{{ form.shared.id_for_label }}'); + const refreshButton = document.getElementById('refresh-button'); const bookmarkExistsHint = document.querySelector('.form-input-hint.bookmark-exists'); const editedBookmarkId = {{ form.instance.id|default:0 }}; let isTitleModified = !!titleInput.value; @@ -154,6 +159,7 @@ // Display hint if URL is already bookmarked const existingBookmark = data.bookmark; bookmarkExistsHint.style['display'] = existingBookmark ? 'block' : 'none'; + refreshButton.style['display'] = existingBookmark ? 'inline-block' : 'none'; // Prefill form with existing bookmark data if (existingBookmark) { @@ -193,6 +199,36 @@ }); } + function refreshMetadata() { + if (!urlInput.value) { + return; + } + + toggleLoadingIcon(urlInput, true); + + const websiteUrl = encodeURIComponent(urlInput.value); + const requestUrl = `{% url 'linkding:api-root' %}bookmarks/check?url=${websiteUrl}&ignore_cache=true`; + + fetch(requestUrl) + .then(response => response.json()) + .then(data => { + const metadata = data.metadata; + const existingBookmark = data.bookmark; + toggleLoadingIcon(urlInput, false); + + if (metadata.title && metadata.title !== existingBookmark?.title) { + titleInput.value = metadata.title; + titleInput.classList.add("modified"); + } + + if (metadata.description && metadata.description !== existingBookmark?.description) { + descriptionInput.value = metadata.description; + descriptionInput.classList.add("modified"); + } + }); + } + refreshButton.addEventListener('click', refreshMetadata); + // Fetch website metadata when page loads and when URL changes, unless we are editing an existing bookmark if (!editedBookmarkId) { checkUrl(); @@ -203,6 +239,8 @@ descriptionInput.addEventListener('input', () => { isDescriptionModified = true; }); + } else { + refreshButton.style['display'] = 'inline-block'; } })(); diff --git a/bookmarks/tests/test_bookmark_archived_view.py b/bookmarks/tests/test_bookmark_archived_view.py index 2d12c99..c871d8b 100644 --- a/bookmarks/tests/test_bookmark_archived_view.py +++ b/bookmarks/tests/test_bookmark_archived_view.py @@ -278,6 +278,7 @@ class BookmarkArchivedViewTestCase( + """, html, @@ -303,6 +304,7 @@ class BookmarkArchivedViewTestCase( + """, html, diff --git a/bookmarks/tests/test_bookmark_index_view.py b/bookmarks/tests/test_bookmark_index_view.py index deabf77..8884e03 100644 --- a/bookmarks/tests/test_bookmark_index_view.py +++ b/bookmarks/tests/test_bookmark_index_view.py @@ -259,6 +259,7 @@ class BookmarkIndexViewTestCase( + """, html, @@ -284,6 +285,7 @@ class BookmarkIndexViewTestCase( + """, html, diff --git a/bookmarks/tests/test_bookmarks_api.py b/bookmarks/tests/test_bookmarks_api.py index 515e96e..68a4510 100644 --- a/bookmarks/tests/test_bookmarks_api.py +++ b/bookmarks/tests/test_bookmarks_api.py @@ -1047,6 +1047,43 @@ class BookmarksApiTestCase(LinkdingApiTestCase, BookmarkFactoryMixin): self.assertCountEqual(auto_tags, ["tag1", "tag2"]) + def test_check_ignore_cache(self): + self.authenticate() + + with patch.object( + website_loader, "load_website_metadata" + ) as mock_load_website_metadata: + expected_metadata = WebsiteMetadata( + "https://example.com", + "Scraped metadata", + "Scraped description", + "https://example.com/preview.png", + ) + mock_load_website_metadata.return_value = expected_metadata + + # Does not ignore cache by default + url = reverse("linkding:bookmark-check") + check_url = urllib.parse.quote_plus("https://example.com") + self.get( + f"{url}?url={check_url}", + expected_status_code=status.HTTP_200_OK, + ) + + mock_load_website_metadata.assert_called_once_with( + "https://example.com", ignore_cache=False + ) + mock_load_website_metadata.reset_mock() + + # Ignores cache based on query param + self.get( + f"{url}?url={check_url}&ignore_cache=true", + expected_status_code=status.HTTP_200_OK, + ) + + mock_load_website_metadata.assert_called_once_with( + "https://example.com", ignore_cache=True + ) + def test_can_only_access_own_bookmarks(self): self.authenticate() self.setup_bookmark() diff --git a/bookmarks/tests/test_bookmarks_service.py b/bookmarks/tests/test_bookmarks_service.py index 1715b88..db2c34a 100644 --- a/bookmarks/tests/test_bookmarks_service.py +++ b/bookmarks/tests/test_bookmarks_service.py @@ -21,6 +21,7 @@ from bookmarks.services.bookmarks import ( share_bookmarks, unshare_bookmarks, enhance_with_website_metadata, + refresh_bookmarks_metadata, ) from bookmarks.tests.helpers import BookmarkFactoryMixin @@ -30,6 +31,21 @@ class BookmarkServiceTestCase(TestCase, BookmarkFactoryMixin): def setUp(self) -> None: self.get_or_create_test_user() + self.mock_schedule_refresh_metadata_patcher = patch( + "bookmarks.services.bookmarks.tasks.refresh_metadata" + ) + self.mock_schedule_refresh_metadata = ( + self.mock_schedule_refresh_metadata_patcher.start() + ) + self.mock_load_preview_image_patcher = patch( + "bookmarks.services.bookmarks.tasks.load_preview_image" + ) + self.mock_load_preview_image = self.mock_load_preview_image_patcher.start() + + def tearDown(self): + self.mock_schedule_refresh_metadata_patcher.stop() + self.mock_load_preview_image_patcher.stop() + def test_create_should_not_update_website_metadata(self): with patch.object( website_loader, "load_website_metadata" @@ -891,3 +907,70 @@ class BookmarkServiceTestCase(TestCase, BookmarkFactoryMixin): self.assertEqual("", bookmark.title) self.assertEqual("", bookmark.description) + + def test_refresh_bookmarks_metadata(self): + bookmark1 = self.setup_bookmark() + bookmark2 = self.setup_bookmark() + bookmark3 = self.setup_bookmark() + + refresh_bookmarks_metadata( + [bookmark1.id, bookmark2.id, bookmark3.id], self.get_or_create_test_user() + ) + + self.assertEqual(self.mock_schedule_refresh_metadata.call_count, 3) + self.assertEqual(self.mock_load_preview_image.call_count, 3) + + def test_refresh_bookmarks_metadata_should_only_refresh_specified_bookmarks(self): + bookmark1 = self.setup_bookmark() + bookmark2 = self.setup_bookmark() + bookmark3 = self.setup_bookmark() + + refresh_bookmarks_metadata( + [bookmark1.id, bookmark3.id], self.get_or_create_test_user() + ) + + self.assertEqual(self.mock_schedule_refresh_metadata.call_count, 2) + self.assertEqual(self.mock_load_preview_image.call_count, 2) + + for call_args in self.mock_schedule_refresh_metadata.call_args_list: + args, kwargs = call_args + self.assertNotIn(bookmark2.id, args) + + for call_args in self.mock_load_preview_image.call_args_list: + args, kwargs = call_args + self.assertNotIn(bookmark2.id, args) + + def test_refresh_bookmarks_metadata_should_only_refresh_user_owned_bookmarks(self): + other_user = self.setup_user() + bookmark1 = self.setup_bookmark() + bookmark2 = self.setup_bookmark() + inaccessible_bookmark = self.setup_bookmark(user=other_user) + + refresh_bookmarks_metadata( + [bookmark1.id, bookmark2.id, inaccessible_bookmark.id], + self.get_or_create_test_user(), + ) + + self.assertEqual(self.mock_schedule_refresh_metadata.call_count, 2) + self.assertEqual(self.mock_load_preview_image.call_count, 2) + + for call_args in self.mock_schedule_refresh_metadata.call_args_list: + args, kwargs = call_args + self.assertNotIn(inaccessible_bookmark.id, args) + + for call_args in self.mock_load_preview_image.call_args_list: + args, kwargs = call_args + self.assertNotIn(inaccessible_bookmark.id, args) + + def test_refresh_bookmarks_metadata_should_accept_mix_of_int_and_string_ids(self): + bookmark1 = self.setup_bookmark() + bookmark2 = self.setup_bookmark() + bookmark3 = self.setup_bookmark() + + refresh_bookmarks_metadata( + [str(bookmark1.id), str(bookmark2.id), bookmark3.id], + self.get_or_create_test_user(), + ) + + self.assertEqual(self.mock_schedule_refresh_metadata.call_count, 3) + self.assertEqual(self.mock_load_preview_image.call_count, 3) diff --git a/bookmarks/tests/test_bookmarks_tasks.py b/bookmarks/tests/test_bookmarks_tasks.py index de29f7a..b5f36e3 100644 --- a/bookmarks/tests/test_bookmarks_tasks.py +++ b/bookmarks/tests/test_bookmarks_tasks.py @@ -8,6 +8,7 @@ from waybackpy.exceptions import WaybackError from bookmarks.models import BookmarkAsset, UserProfile from bookmarks.services import tasks +from bookmarks.services.website_loader import WebsiteMetadata from bookmarks.tests.helpers import BookmarkFactoryMixin @@ -615,3 +616,52 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin): self.assertEqual(count, 3) self.assertEqual(BookmarkAsset.objects.count(), count) + + @override_settings(LD_DISABLE_BACKGROUND_TASKS=True) + def test_refresh_metadata_task_not_called_when_background_tasks_disabled(self): + bookmark = self.setup_bookmark() + with mock.patch( + "bookmarks.services.tasks._refresh_metadata_task" + ) as mock_refresh_metadata_task: + tasks.refresh_metadata(bookmark) + mock_refresh_metadata_task.assert_not_called() + + @override_settings(LD_DISABLE_BACKGROUND_TASKS=False) + def test_refresh_metadata_task_called_when_background_tasks_enabled(self): + bookmark = self.setup_bookmark() + with mock.patch( + "bookmarks.services.tasks._refresh_metadata_task" + ) as mock_refresh_metadata_task: + tasks.refresh_metadata(bookmark) + mock_refresh_metadata_task.assert_called_once() + + def test_refresh_metadata_task_should_handle_missing_bookmark(self): + with mock.patch( + "bookmarks.services.website_loader.load_website_metadata" + ) as mock_load_website_metadata: + tasks._refresh_metadata_task(123) + + mock_load_website_metadata.assert_not_called() + + def test_refresh_metadata_updates_title_description(self): + bookmark = self.setup_bookmark( + title="Initial title", + description="Initial description", + ) + mock_website_metadata = WebsiteMetadata( + url=bookmark.url, + title="New title", + description="New description", + preview_image=None, + ) + + with mock.patch( + "bookmarks.services.tasks.load_website_metadata" + ) as mock_load_website_metadata: + mock_load_website_metadata.return_value = mock_website_metadata + + tasks.refresh_metadata(bookmark) + + bookmark.refresh_from_db() + self.assertEqual(bookmark.title, "New title") + self.assertEqual(bookmark.description, "New description") diff --git a/bookmarks/tests/test_website_loader.py b/bookmarks/tests/test_website_loader.py index bfb39c8..ce2ef15 100644 --- a/bookmarks/tests/test_website_loader.py +++ b/bookmarks/tests/test_website_loader.py @@ -27,7 +27,7 @@ class MockStreamingResponse: class WebsiteLoaderTestCase(TestCase): def setUp(self): # clear cached metadata before test run - website_loader.load_website_metadata.cache_clear() + website_loader._load_website_metadata_cached.cache_clear() def render_html_document( self, title, description="", og_description="", og_image="" @@ -183,3 +183,20 @@ class WebsiteLoaderTestCase(TestCase): metadata = website_loader.load_website_metadata("https://example.com") self.assertEqual("test title", metadata.title) self.assertEqual("test description", metadata.description) + + def test_website_metadata_ignore_cache(self): + expected_html = 'Test Title' + + with mock.patch.object( + website_loader, "load_page", return_value=expected_html + ) as mock_load_page: + website_loader.load_website_metadata("https://example.com") + mock_load_page.assert_called_once() + + website_loader.load_website_metadata("https://example.com") + mock_load_page.assert_called_once() + + website_loader.load_website_metadata( + "https://example.com", ignore_cache=True + ) + self.assertEqual(mock_load_page.call_count, 2) diff --git a/bookmarks/views/bookmarks.py b/bookmarks/views/bookmarks.py index f6bdebb..7d27ac5 100644 --- a/bookmarks/views/bookmarks.py +++ b/bookmarks/views/bookmarks.py @@ -30,6 +30,7 @@ from bookmarks.services.bookmarks import ( mark_bookmarks_as_unread, share_bookmarks, unshare_bookmarks, + refresh_bookmarks_metadata, ) from bookmarks.type_defs import HttpRequest from bookmarks.utils import get_safe_return_url @@ -348,6 +349,8 @@ def handle_action(request: HttpRequest, query: QuerySet[Bookmark] = None): return share_bookmarks(bookmark_ids, request.user) if "bulk_unshare" == bulk_action: return unshare_bookmarks(bookmark_ids, request.user) + if "bulk_refresh" == bulk_action: + return refresh_bookmarks_metadata(bookmark_ids, request.user) @login_required