Add bulk and single bookmark metadata refresh (#999)

* Add url create/edit query paramter to clear cache

* Add refresh bookmark metadata button in create/edit bookmark page

* Fix refresh bookmark metadata when editing existing bookmark

* Add bulk refresh metadata functionality

* Fix test cases for bulk view dropdown selection list

* Allow bulk metadata refresh when background tasks are disabled

* Move load preview image call on refresh metadata

* Update bookmark modified time on metadata refresh

* Rename function to align with convention

* Add tests for refresh task

* Add tests for bookmarks service refresh metadata

* Add tests for bookmarks api disable cache on check

* Remove bulk refresh metadata when background tasks disabled

* Refactor refresh metadata task

* Remove unnecessary call

* Fix testing mock name

* Abstract clearing metadata cache

* Add test to check if load page is called twice when cache disabled

* Remove refresh button for new bookmarks

* Remove strict disable cache is true check

* Refactor refresh metadata form logic into its own function

* move button and highlight changes

* polish and update tests

---------

Co-authored-by: Sascha Ißbrücker <sascha.issbruecker@gmail.com>
This commit is contained in:
Josh S
2025-03-22 06:34:10 -04:00
committed by GitHub
parent f1acb4f7c9
commit a23c357f2f
14 changed files with 300 additions and 10 deletions

View File

@@ -98,12 +98,13 @@ class BookmarkViewSet(
@action(methods=["get"], detail=False) @action(methods=["get"], detail=False)
def check(self, request: HttpRequest): def check(self, request: HttpRequest):
url = request.GET.get("url") url = request.GET.get("url")
ignore_cache = request.GET.get("ignore_cache", False) in ["true"]
bookmark = Bookmark.objects.filter(owner=request.user, url=url).first() bookmark = Bookmark.objects.filter(owner=request.user, url=url).first()
existing_bookmark_data = ( existing_bookmark_data = (
self.get_serializer(bookmark).data if bookmark else None self.get_serializer(bookmark).data if bookmark else None
) )
metadata = website_loader.load_website_metadata(url) metadata = website_loader.load_website_metadata(url, ignore_cache=ignore_cache)
# Return tags that would be automatically applied to the bookmark # Return tags that would be automatically applied to the bookmark
profile = request.user.profile profile = request.user.profile

View File

@@ -197,6 +197,17 @@ def unshare_bookmarks(bookmark_ids: [Union[int, str]], current_user: User):
) )
def refresh_bookmarks_metadata(bookmark_ids: [Union[int, str]], current_user: User):
sanitized_bookmark_ids = _sanitize_id_list(bookmark_ids)
owned_bookmarks = Bookmark.objects.filter(
owner=current_user, id__in=sanitized_bookmark_ids
)
for bookmark in owned_bookmarks:
tasks.refresh_metadata(bookmark)
tasks.load_preview_image(current_user, bookmark)
def _merge_bookmark_data(from_bookmark: Bookmark, to_bookmark: Bookmark): def _merge_bookmark_data(from_bookmark: Bookmark, to_bookmark: Bookmark):
to_bookmark.title = from_bookmark.title to_bookmark.title = from_bookmark.title
to_bookmark.description = from_bookmark.description to_bookmark.description = from_bookmark.description

View File

@@ -6,6 +6,7 @@ import waybackpy
from django.conf import settings from django.conf import settings
from django.contrib.auth.models import User from django.contrib.auth.models import User
from django.db.models import Q from django.db.models import Q
from django.utils import timezone
from huey import crontab from huey import crontab
from huey.contrib.djhuey import HUEY as huey from huey.contrib.djhuey import HUEY as huey
from huey.exceptions import TaskLockedException from huey.exceptions import TaskLockedException
@@ -13,7 +14,7 @@ from waybackpy.exceptions import WaybackError, TooManyRequestsError
from bookmarks.models import Bookmark, BookmarkAsset, UserProfile from bookmarks.models import Bookmark, BookmarkAsset, UserProfile
from bookmarks.services import assets, favicon_loader, preview_image_loader from bookmarks.services import assets, favicon_loader, preview_image_loader
from bookmarks.services.website_loader import DEFAULT_USER_AGENT from bookmarks.services.website_loader import DEFAULT_USER_AGENT, load_website_metadata
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -225,6 +226,31 @@ def _schedule_bookmarks_without_previews_task(user_id: int):
logging.exception(exc) logging.exception(exc)
def refresh_metadata(bookmark: Bookmark):
if not settings.LD_DISABLE_BACKGROUND_TASKS:
_refresh_metadata_task(bookmark.id)
@task()
def _refresh_metadata_task(bookmark_id: int):
try:
bookmark = Bookmark.objects.get(id=bookmark_id)
except Bookmark.DoesNotExist:
return
logger.info(f"Refresh metadata for bookmark. url={bookmark.url}")
metadata = load_website_metadata(bookmark.url)
if metadata.title:
bookmark.title = metadata.title
if metadata.description:
bookmark.description = metadata.description
bookmark.date_modified = timezone.now()
bookmark.save()
logger.info(f"Successfully refreshed metadata for bookmark. url={bookmark.url}")
def is_html_snapshot_feature_active() -> bool: def is_html_snapshot_feature_active() -> bool:
return settings.LD_ENABLE_SNAPSHOTS and not settings.LD_DISABLE_BACKGROUND_TASKS return settings.LD_ENABLE_SNAPSHOTS and not settings.LD_DISABLE_BACKGROUND_TASKS

View File

@@ -27,10 +27,20 @@ class WebsiteMetadata:
} }
def load_website_metadata(url: str, ignore_cache: bool = False):
if ignore_cache:
return _load_website_metadata(url)
return _load_website_metadata_cached(url)
# Caching metadata avoids scraping again when saving bookmarks, in case the # Caching metadata avoids scraping again when saving bookmarks, in case the
# metadata was already scraped to show preview values in the bookmark form # metadata was already scraped to show preview values in the bookmark form
@lru_cache(maxsize=10) @lru_cache(maxsize=10)
def load_website_metadata(url: str): def _load_website_metadata_cached(url: str):
return _load_website_metadata(url)
def _load_website_metadata(url: str):
title = None title = None
description = None description = None
preview_image = None preview_image = None

View File

@@ -15,14 +15,23 @@
visibility: hidden; visibility: hidden;
} }
& .form-group .clear-button { & .form-group .suffix-button {
display: none;
padding: 0; padding: 0;
border: none; border: none;
height: auto; height: auto;
font-size: var(--font-size-sm); font-size: var(--font-size-sm);
} }
& .form-group .clear-button,
& .form-group #refresh-button {
display: none;
}
& .form-group input.modified,
& .form-group textarea.modified {
background: var(--primary-color-shade);
}
& .form-input-hint.bookmark-exists { & .form-input-hint.bookmark-exists {
display: none; display: none;
color: var(--warning-color); color: var(--warning-color);

View File

@@ -22,6 +22,7 @@
<option value="bulk_share">Share</option> <option value="bulk_share">Share</option>
<option value="bulk_unshare">Unshare</option> <option value="bulk_unshare">Unshare</option>
{% endif %} {% endif %}
<option value="bulk_refresh">Refresh from website</option>
</select> </select>
<div class="tag-autocomplete d-none" ld-tag-autocomplete> <div class="tag-autocomplete d-none" ld-tag-autocomplete>
<input name="bulk_tag_string" class="form-input input-sm" placeholder="Tag names..." variant="small"> <input name="bulk_tag_string" class="form-input input-sm" placeholder="Tag names..." variant="small">

View File

@@ -33,17 +33,21 @@
<div class="form-group"> <div class="form-group">
<div class="d-flex justify-between align-baseline"> <div class="d-flex justify-between align-baseline">
<label for="{{ form.title.id_for_label }}" class="form-label">Title</label> <label for="{{ form.title.id_for_label }}" class="form-label">Title</label>
<button ld-clear-button data-for="{{ form.title.id_for_label }}" class="btn btn-link clear-button" <div class="flex">
<button id="refresh-button" class="btn btn-link suffix-button" type="button">Refresh from website</button>
<button ld-clear-button data-for="{{ form.title.id_for_label }}" class="ml-2 btn btn-link suffix-button clear-button"
type="button">Clear type="button">Clear
</button> </button>
</div> </div>
</div>
{{ form.title|add_class:"form-input"|attr:"autocomplete:off" }} {{ form.title|add_class:"form-input"|attr:"autocomplete:off" }}
{{ form.title.errors }} {{ form.title.errors }}
</div> </div>
<div class="form-group"> <div class="form-group">
<div class="d-flex justify-between align-baseline"> <div class="d-flex justify-between align-baseline">
<label for="{{ form.description.id_for_label }}" class="form-label">Description</label> <label for="{{ form.description.id_for_label }}" class="form-label">Description</label>
<button ld-clear-button data-for="{{ form.description.id_for_label }}" class="btn btn-link clear-button" <button ld-clear-button data-for="{{ form.description.id_for_label }}"
class="btn btn-link suffix-button clear-button"
type="button">Clear type="button">Clear
</button> </button>
</div> </div>
@@ -111,6 +115,7 @@
const notesInput = document.getElementById('{{ form.notes.id_for_label }}'); const notesInput = document.getElementById('{{ form.notes.id_for_label }}');
const unreadCheckbox = document.getElementById('{{ form.unread.id_for_label }}'); const unreadCheckbox = document.getElementById('{{ form.unread.id_for_label }}');
const sharedCheckbox = document.getElementById('{{ form.shared.id_for_label }}'); const sharedCheckbox = document.getElementById('{{ form.shared.id_for_label }}');
const refreshButton = document.getElementById('refresh-button');
const bookmarkExistsHint = document.querySelector('.form-input-hint.bookmark-exists'); const bookmarkExistsHint = document.querySelector('.form-input-hint.bookmark-exists');
const editedBookmarkId = {{ form.instance.id|default:0 }}; const editedBookmarkId = {{ form.instance.id|default:0 }};
let isTitleModified = !!titleInput.value; let isTitleModified = !!titleInput.value;
@@ -154,6 +159,7 @@
// Display hint if URL is already bookmarked // Display hint if URL is already bookmarked
const existingBookmark = data.bookmark; const existingBookmark = data.bookmark;
bookmarkExistsHint.style['display'] = existingBookmark ? 'block' : 'none'; bookmarkExistsHint.style['display'] = existingBookmark ? 'block' : 'none';
refreshButton.style['display'] = existingBookmark ? 'inline-block' : 'none';
// Prefill form with existing bookmark data // Prefill form with existing bookmark data
if (existingBookmark) { if (existingBookmark) {
@@ -193,6 +199,36 @@
}); });
} }
function refreshMetadata() {
if (!urlInput.value) {
return;
}
toggleLoadingIcon(urlInput, true);
const websiteUrl = encodeURIComponent(urlInput.value);
const requestUrl = `{% url 'linkding:api-root' %}bookmarks/check?url=${websiteUrl}&ignore_cache=true`;
fetch(requestUrl)
.then(response => response.json())
.then(data => {
const metadata = data.metadata;
const existingBookmark = data.bookmark;
toggleLoadingIcon(urlInput, false);
if (metadata.title && metadata.title !== existingBookmark?.title) {
titleInput.value = metadata.title;
titleInput.classList.add("modified");
}
if (metadata.description && metadata.description !== existingBookmark?.description) {
descriptionInput.value = metadata.description;
descriptionInput.classList.add("modified");
}
});
}
refreshButton.addEventListener('click', refreshMetadata);
// Fetch website metadata when page loads and when URL changes, unless we are editing an existing bookmark // Fetch website metadata when page loads and when URL changes, unless we are editing an existing bookmark
if (!editedBookmarkId) { if (!editedBookmarkId) {
checkUrl(); checkUrl();
@@ -203,6 +239,8 @@
descriptionInput.addEventListener('input', () => { descriptionInput.addEventListener('input', () => {
isDescriptionModified = true; isDescriptionModified = true;
}); });
} else {
refreshButton.style['display'] = 'inline-block';
} }
})(); })();
</script> </script>

View File

@@ -278,6 +278,7 @@ class BookmarkArchivedViewTestCase(
<option value="bulk_untag">Remove tags</option> <option value="bulk_untag">Remove tags</option>
<option value="bulk_read">Mark as read</option> <option value="bulk_read">Mark as read</option>
<option value="bulk_unread">Mark as unread</option> <option value="bulk_unread">Mark as unread</option>
<option value="bulk_refresh">Refresh from website</option>
</select> </select>
""", """,
html, html,
@@ -303,6 +304,7 @@ class BookmarkArchivedViewTestCase(
<option value="bulk_unread">Mark as unread</option> <option value="bulk_unread">Mark as unread</option>
<option value="bulk_share">Share</option> <option value="bulk_share">Share</option>
<option value="bulk_unshare">Unshare</option> <option value="bulk_unshare">Unshare</option>
<option value="bulk_refresh">Refresh from website</option>
</select> </select>
""", """,
html, html,

View File

@@ -259,6 +259,7 @@ class BookmarkIndexViewTestCase(
<option value="bulk_untag">Remove tags</option> <option value="bulk_untag">Remove tags</option>
<option value="bulk_read">Mark as read</option> <option value="bulk_read">Mark as read</option>
<option value="bulk_unread">Mark as unread</option> <option value="bulk_unread">Mark as unread</option>
<option value="bulk_refresh">Refresh from website</option>
</select> </select>
""", """,
html, html,
@@ -284,6 +285,7 @@ class BookmarkIndexViewTestCase(
<option value="bulk_unread">Mark as unread</option> <option value="bulk_unread">Mark as unread</option>
<option value="bulk_share">Share</option> <option value="bulk_share">Share</option>
<option value="bulk_unshare">Unshare</option> <option value="bulk_unshare">Unshare</option>
<option value="bulk_refresh">Refresh from website</option>
</select> </select>
""", """,
html, html,

View File

@@ -1047,6 +1047,43 @@ class BookmarksApiTestCase(LinkdingApiTestCase, BookmarkFactoryMixin):
self.assertCountEqual(auto_tags, ["tag1", "tag2"]) self.assertCountEqual(auto_tags, ["tag1", "tag2"])
def test_check_ignore_cache(self):
self.authenticate()
with patch.object(
website_loader, "load_website_metadata"
) as mock_load_website_metadata:
expected_metadata = WebsiteMetadata(
"https://example.com",
"Scraped metadata",
"Scraped description",
"https://example.com/preview.png",
)
mock_load_website_metadata.return_value = expected_metadata
# Does not ignore cache by default
url = reverse("linkding:bookmark-check")
check_url = urllib.parse.quote_plus("https://example.com")
self.get(
f"{url}?url={check_url}",
expected_status_code=status.HTTP_200_OK,
)
mock_load_website_metadata.assert_called_once_with(
"https://example.com", ignore_cache=False
)
mock_load_website_metadata.reset_mock()
# Ignores cache based on query param
self.get(
f"{url}?url={check_url}&ignore_cache=true",
expected_status_code=status.HTTP_200_OK,
)
mock_load_website_metadata.assert_called_once_with(
"https://example.com", ignore_cache=True
)
def test_can_only_access_own_bookmarks(self): def test_can_only_access_own_bookmarks(self):
self.authenticate() self.authenticate()
self.setup_bookmark() self.setup_bookmark()

View File

@@ -21,6 +21,7 @@ from bookmarks.services.bookmarks import (
share_bookmarks, share_bookmarks,
unshare_bookmarks, unshare_bookmarks,
enhance_with_website_metadata, enhance_with_website_metadata,
refresh_bookmarks_metadata,
) )
from bookmarks.tests.helpers import BookmarkFactoryMixin from bookmarks.tests.helpers import BookmarkFactoryMixin
@@ -30,6 +31,21 @@ class BookmarkServiceTestCase(TestCase, BookmarkFactoryMixin):
def setUp(self) -> None: def setUp(self) -> None:
self.get_or_create_test_user() self.get_or_create_test_user()
self.mock_schedule_refresh_metadata_patcher = patch(
"bookmarks.services.bookmarks.tasks.refresh_metadata"
)
self.mock_schedule_refresh_metadata = (
self.mock_schedule_refresh_metadata_patcher.start()
)
self.mock_load_preview_image_patcher = patch(
"bookmarks.services.bookmarks.tasks.load_preview_image"
)
self.mock_load_preview_image = self.mock_load_preview_image_patcher.start()
def tearDown(self):
self.mock_schedule_refresh_metadata_patcher.stop()
self.mock_load_preview_image_patcher.stop()
def test_create_should_not_update_website_metadata(self): def test_create_should_not_update_website_metadata(self):
with patch.object( with patch.object(
website_loader, "load_website_metadata" website_loader, "load_website_metadata"
@@ -891,3 +907,70 @@ class BookmarkServiceTestCase(TestCase, BookmarkFactoryMixin):
self.assertEqual("", bookmark.title) self.assertEqual("", bookmark.title)
self.assertEqual("", bookmark.description) self.assertEqual("", bookmark.description)
def test_refresh_bookmarks_metadata(self):
bookmark1 = self.setup_bookmark()
bookmark2 = self.setup_bookmark()
bookmark3 = self.setup_bookmark()
refresh_bookmarks_metadata(
[bookmark1.id, bookmark2.id, bookmark3.id], self.get_or_create_test_user()
)
self.assertEqual(self.mock_schedule_refresh_metadata.call_count, 3)
self.assertEqual(self.mock_load_preview_image.call_count, 3)
def test_refresh_bookmarks_metadata_should_only_refresh_specified_bookmarks(self):
bookmark1 = self.setup_bookmark()
bookmark2 = self.setup_bookmark()
bookmark3 = self.setup_bookmark()
refresh_bookmarks_metadata(
[bookmark1.id, bookmark3.id], self.get_or_create_test_user()
)
self.assertEqual(self.mock_schedule_refresh_metadata.call_count, 2)
self.assertEqual(self.mock_load_preview_image.call_count, 2)
for call_args in self.mock_schedule_refresh_metadata.call_args_list:
args, kwargs = call_args
self.assertNotIn(bookmark2.id, args)
for call_args in self.mock_load_preview_image.call_args_list:
args, kwargs = call_args
self.assertNotIn(bookmark2.id, args)
def test_refresh_bookmarks_metadata_should_only_refresh_user_owned_bookmarks(self):
other_user = self.setup_user()
bookmark1 = self.setup_bookmark()
bookmark2 = self.setup_bookmark()
inaccessible_bookmark = self.setup_bookmark(user=other_user)
refresh_bookmarks_metadata(
[bookmark1.id, bookmark2.id, inaccessible_bookmark.id],
self.get_or_create_test_user(),
)
self.assertEqual(self.mock_schedule_refresh_metadata.call_count, 2)
self.assertEqual(self.mock_load_preview_image.call_count, 2)
for call_args in self.mock_schedule_refresh_metadata.call_args_list:
args, kwargs = call_args
self.assertNotIn(inaccessible_bookmark.id, args)
for call_args in self.mock_load_preview_image.call_args_list:
args, kwargs = call_args
self.assertNotIn(inaccessible_bookmark.id, args)
def test_refresh_bookmarks_metadata_should_accept_mix_of_int_and_string_ids(self):
bookmark1 = self.setup_bookmark()
bookmark2 = self.setup_bookmark()
bookmark3 = self.setup_bookmark()
refresh_bookmarks_metadata(
[str(bookmark1.id), str(bookmark2.id), bookmark3.id],
self.get_or_create_test_user(),
)
self.assertEqual(self.mock_schedule_refresh_metadata.call_count, 3)
self.assertEqual(self.mock_load_preview_image.call_count, 3)

View File

@@ -8,6 +8,7 @@ from waybackpy.exceptions import WaybackError
from bookmarks.models import BookmarkAsset, UserProfile from bookmarks.models import BookmarkAsset, UserProfile
from bookmarks.services import tasks from bookmarks.services import tasks
from bookmarks.services.website_loader import WebsiteMetadata
from bookmarks.tests.helpers import BookmarkFactoryMixin from bookmarks.tests.helpers import BookmarkFactoryMixin
@@ -615,3 +616,52 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
self.assertEqual(count, 3) self.assertEqual(count, 3)
self.assertEqual(BookmarkAsset.objects.count(), count) self.assertEqual(BookmarkAsset.objects.count(), count)
@override_settings(LD_DISABLE_BACKGROUND_TASKS=True)
def test_refresh_metadata_task_not_called_when_background_tasks_disabled(self):
bookmark = self.setup_bookmark()
with mock.patch(
"bookmarks.services.tasks._refresh_metadata_task"
) as mock_refresh_metadata_task:
tasks.refresh_metadata(bookmark)
mock_refresh_metadata_task.assert_not_called()
@override_settings(LD_DISABLE_BACKGROUND_TASKS=False)
def test_refresh_metadata_task_called_when_background_tasks_enabled(self):
bookmark = self.setup_bookmark()
with mock.patch(
"bookmarks.services.tasks._refresh_metadata_task"
) as mock_refresh_metadata_task:
tasks.refresh_metadata(bookmark)
mock_refresh_metadata_task.assert_called_once()
def test_refresh_metadata_task_should_handle_missing_bookmark(self):
with mock.patch(
"bookmarks.services.website_loader.load_website_metadata"
) as mock_load_website_metadata:
tasks._refresh_metadata_task(123)
mock_load_website_metadata.assert_not_called()
def test_refresh_metadata_updates_title_description(self):
bookmark = self.setup_bookmark(
title="Initial title",
description="Initial description",
)
mock_website_metadata = WebsiteMetadata(
url=bookmark.url,
title="New title",
description="New description",
preview_image=None,
)
with mock.patch(
"bookmarks.services.tasks.load_website_metadata"
) as mock_load_website_metadata:
mock_load_website_metadata.return_value = mock_website_metadata
tasks.refresh_metadata(bookmark)
bookmark.refresh_from_db()
self.assertEqual(bookmark.title, "New title")
self.assertEqual(bookmark.description, "New description")

View File

@@ -27,7 +27,7 @@ class MockStreamingResponse:
class WebsiteLoaderTestCase(TestCase): class WebsiteLoaderTestCase(TestCase):
def setUp(self): def setUp(self):
# clear cached metadata before test run # clear cached metadata before test run
website_loader.load_website_metadata.cache_clear() website_loader._load_website_metadata_cached.cache_clear()
def render_html_document( def render_html_document(
self, title, description="", og_description="", og_image="" self, title, description="", og_description="", og_image=""
@@ -183,3 +183,20 @@ class WebsiteLoaderTestCase(TestCase):
metadata = website_loader.load_website_metadata("https://example.com") metadata = website_loader.load_website_metadata("https://example.com")
self.assertEqual("test title", metadata.title) self.assertEqual("test title", metadata.title)
self.assertEqual("test description", metadata.description) self.assertEqual("test description", metadata.description)
def test_website_metadata_ignore_cache(self):
expected_html = '<html><head><title>Test Title</title><meta name="description" content="Test Description"><meta property="og:image" content="/images/test.jpg"></head></html>'
with mock.patch.object(
website_loader, "load_page", return_value=expected_html
) as mock_load_page:
website_loader.load_website_metadata("https://example.com")
mock_load_page.assert_called_once()
website_loader.load_website_metadata("https://example.com")
mock_load_page.assert_called_once()
website_loader.load_website_metadata(
"https://example.com", ignore_cache=True
)
self.assertEqual(mock_load_page.call_count, 2)

View File

@@ -30,6 +30,7 @@ from bookmarks.services.bookmarks import (
mark_bookmarks_as_unread, mark_bookmarks_as_unread,
share_bookmarks, share_bookmarks,
unshare_bookmarks, unshare_bookmarks,
refresh_bookmarks_metadata,
) )
from bookmarks.type_defs import HttpRequest from bookmarks.type_defs import HttpRequest
from bookmarks.utils import get_safe_return_url from bookmarks.utils import get_safe_return_url
@@ -348,6 +349,8 @@ def handle_action(request: HttpRequest, query: QuerySet[Bookmark] = None):
return share_bookmarks(bookmark_ids, request.user) return share_bookmarks(bookmark_ids, request.user)
if "bulk_unshare" == bulk_action: if "bulk_unshare" == bulk_action:
return unshare_bookmarks(bookmark_ids, request.user) return unshare_bookmarks(bookmark_ids, request.user)
if "bulk_refresh" == bulk_action:
return refresh_bookmarks_metadata(bookmark_ids, request.user)
@login_required @login_required