Add bulk and single bookmark metadata refresh (#999)

* Add url create/edit query paramter to clear cache

* Add refresh bookmark metadata button in create/edit bookmark page

* Fix refresh bookmark metadata when editing existing bookmark

* Add bulk refresh metadata functionality

* Fix test cases for bulk view dropdown selection list

* Allow bulk metadata refresh when background tasks are disabled

* Move load preview image call on refresh metadata

* Update bookmark modified time on metadata refresh

* Rename function to align with convention

* Add tests for refresh task

* Add tests for bookmarks service refresh metadata

* Add tests for bookmarks api disable cache on check

* Remove bulk refresh metadata when background tasks disabled

* Refactor refresh metadata task

* Remove unnecessary call

* Fix testing mock name

* Abstract clearing metadata cache

* Add test to check if load page is called twice when cache disabled

* Remove refresh button for new bookmarks

* Remove strict disable cache is true check

* Refactor refresh metadata form logic into its own function

* move button and highlight changes

* polish and update tests

---------

Co-authored-by: Sascha Ißbrücker <sascha.issbruecker@gmail.com>
This commit is contained in:
Josh S
2025-03-22 06:34:10 -04:00
committed by GitHub
parent f1acb4f7c9
commit a23c357f2f
14 changed files with 300 additions and 10 deletions

View File

@@ -98,12 +98,13 @@ class BookmarkViewSet(
@action(methods=["get"], detail=False)
def check(self, request: HttpRequest):
url = request.GET.get("url")
ignore_cache = request.GET.get("ignore_cache", False) in ["true"]
bookmark = Bookmark.objects.filter(owner=request.user, url=url).first()
existing_bookmark_data = (
self.get_serializer(bookmark).data if bookmark else None
)
metadata = website_loader.load_website_metadata(url)
metadata = website_loader.load_website_metadata(url, ignore_cache=ignore_cache)
# Return tags that would be automatically applied to the bookmark
profile = request.user.profile

View File

@@ -197,6 +197,17 @@ def unshare_bookmarks(bookmark_ids: [Union[int, str]], current_user: User):
)
def refresh_bookmarks_metadata(bookmark_ids: [Union[int, str]], current_user: User):
sanitized_bookmark_ids = _sanitize_id_list(bookmark_ids)
owned_bookmarks = Bookmark.objects.filter(
owner=current_user, id__in=sanitized_bookmark_ids
)
for bookmark in owned_bookmarks:
tasks.refresh_metadata(bookmark)
tasks.load_preview_image(current_user, bookmark)
def _merge_bookmark_data(from_bookmark: Bookmark, to_bookmark: Bookmark):
to_bookmark.title = from_bookmark.title
to_bookmark.description = from_bookmark.description

View File

@@ -6,6 +6,7 @@ import waybackpy
from django.conf import settings
from django.contrib.auth.models import User
from django.db.models import Q
from django.utils import timezone
from huey import crontab
from huey.contrib.djhuey import HUEY as huey
from huey.exceptions import TaskLockedException
@@ -13,7 +14,7 @@ from waybackpy.exceptions import WaybackError, TooManyRequestsError
from bookmarks.models import Bookmark, BookmarkAsset, UserProfile
from bookmarks.services import assets, favicon_loader, preview_image_loader
from bookmarks.services.website_loader import DEFAULT_USER_AGENT
from bookmarks.services.website_loader import DEFAULT_USER_AGENT, load_website_metadata
logger = logging.getLogger(__name__)
@@ -225,6 +226,31 @@ def _schedule_bookmarks_without_previews_task(user_id: int):
logging.exception(exc)
def refresh_metadata(bookmark: Bookmark):
if not settings.LD_DISABLE_BACKGROUND_TASKS:
_refresh_metadata_task(bookmark.id)
@task()
def _refresh_metadata_task(bookmark_id: int):
try:
bookmark = Bookmark.objects.get(id=bookmark_id)
except Bookmark.DoesNotExist:
return
logger.info(f"Refresh metadata for bookmark. url={bookmark.url}")
metadata = load_website_metadata(bookmark.url)
if metadata.title:
bookmark.title = metadata.title
if metadata.description:
bookmark.description = metadata.description
bookmark.date_modified = timezone.now()
bookmark.save()
logger.info(f"Successfully refreshed metadata for bookmark. url={bookmark.url}")
def is_html_snapshot_feature_active() -> bool:
return settings.LD_ENABLE_SNAPSHOTS and not settings.LD_DISABLE_BACKGROUND_TASKS

View File

@@ -27,10 +27,20 @@ class WebsiteMetadata:
}
def load_website_metadata(url: str, ignore_cache: bool = False):
if ignore_cache:
return _load_website_metadata(url)
return _load_website_metadata_cached(url)
# Caching metadata avoids scraping again when saving bookmarks, in case the
# metadata was already scraped to show preview values in the bookmark form
@lru_cache(maxsize=10)
def load_website_metadata(url: str):
def _load_website_metadata_cached(url: str):
return _load_website_metadata(url)
def _load_website_metadata(url: str):
title = None
description = None
preview_image = None

View File

@@ -15,14 +15,23 @@
visibility: hidden;
}
& .form-group .clear-button {
display: none;
& .form-group .suffix-button {
padding: 0;
border: none;
height: auto;
font-size: var(--font-size-sm);
}
& .form-group .clear-button,
& .form-group #refresh-button {
display: none;
}
& .form-group input.modified,
& .form-group textarea.modified {
background: var(--primary-color-shade);
}
& .form-input-hint.bookmark-exists {
display: none;
color: var(--warning-color);

View File

@@ -22,6 +22,7 @@
<option value="bulk_share">Share</option>
<option value="bulk_unshare">Unshare</option>
{% endif %}
<option value="bulk_refresh">Refresh from website</option>
</select>
<div class="tag-autocomplete d-none" ld-tag-autocomplete>
<input name="bulk_tag_string" class="form-input input-sm" placeholder="Tag names..." variant="small">

View File

@@ -33,9 +33,12 @@
<div class="form-group">
<div class="d-flex justify-between align-baseline">
<label for="{{ form.title.id_for_label }}" class="form-label">Title</label>
<button ld-clear-button data-for="{{ form.title.id_for_label }}" class="btn btn-link clear-button"
type="button">Clear
</button>
<div class="flex">
<button id="refresh-button" class="btn btn-link suffix-button" type="button">Refresh from website</button>
<button ld-clear-button data-for="{{ form.title.id_for_label }}" class="ml-2 btn btn-link suffix-button clear-button"
type="button">Clear
</button>
</div>
</div>
{{ form.title|add_class:"form-input"|attr:"autocomplete:off" }}
{{ form.title.errors }}
@@ -43,7 +46,8 @@
<div class="form-group">
<div class="d-flex justify-between align-baseline">
<label for="{{ form.description.id_for_label }}" class="form-label">Description</label>
<button ld-clear-button data-for="{{ form.description.id_for_label }}" class="btn btn-link clear-button"
<button ld-clear-button data-for="{{ form.description.id_for_label }}"
class="btn btn-link suffix-button clear-button"
type="button">Clear
</button>
</div>
@@ -111,6 +115,7 @@
const notesInput = document.getElementById('{{ form.notes.id_for_label }}');
const unreadCheckbox = document.getElementById('{{ form.unread.id_for_label }}');
const sharedCheckbox = document.getElementById('{{ form.shared.id_for_label }}');
const refreshButton = document.getElementById('refresh-button');
const bookmarkExistsHint = document.querySelector('.form-input-hint.bookmark-exists');
const editedBookmarkId = {{ form.instance.id|default:0 }};
let isTitleModified = !!titleInput.value;
@@ -154,6 +159,7 @@
// Display hint if URL is already bookmarked
const existingBookmark = data.bookmark;
bookmarkExistsHint.style['display'] = existingBookmark ? 'block' : 'none';
refreshButton.style['display'] = existingBookmark ? 'inline-block' : 'none';
// Prefill form with existing bookmark data
if (existingBookmark) {
@@ -193,6 +199,36 @@
});
}
function refreshMetadata() {
if (!urlInput.value) {
return;
}
toggleLoadingIcon(urlInput, true);
const websiteUrl = encodeURIComponent(urlInput.value);
const requestUrl = `{% url 'linkding:api-root' %}bookmarks/check?url=${websiteUrl}&ignore_cache=true`;
fetch(requestUrl)
.then(response => response.json())
.then(data => {
const metadata = data.metadata;
const existingBookmark = data.bookmark;
toggleLoadingIcon(urlInput, false);
if (metadata.title && metadata.title !== existingBookmark?.title) {
titleInput.value = metadata.title;
titleInput.classList.add("modified");
}
if (metadata.description && metadata.description !== existingBookmark?.description) {
descriptionInput.value = metadata.description;
descriptionInput.classList.add("modified");
}
});
}
refreshButton.addEventListener('click', refreshMetadata);
// Fetch website metadata when page loads and when URL changes, unless we are editing an existing bookmark
if (!editedBookmarkId) {
checkUrl();
@@ -203,6 +239,8 @@
descriptionInput.addEventListener('input', () => {
isDescriptionModified = true;
});
} else {
refreshButton.style['display'] = 'inline-block';
}
})();
</script>

View File

@@ -278,6 +278,7 @@ class BookmarkArchivedViewTestCase(
<option value="bulk_untag">Remove tags</option>
<option value="bulk_read">Mark as read</option>
<option value="bulk_unread">Mark as unread</option>
<option value="bulk_refresh">Refresh from website</option>
</select>
""",
html,
@@ -303,6 +304,7 @@ class BookmarkArchivedViewTestCase(
<option value="bulk_unread">Mark as unread</option>
<option value="bulk_share">Share</option>
<option value="bulk_unshare">Unshare</option>
<option value="bulk_refresh">Refresh from website</option>
</select>
""",
html,

View File

@@ -259,6 +259,7 @@ class BookmarkIndexViewTestCase(
<option value="bulk_untag">Remove tags</option>
<option value="bulk_read">Mark as read</option>
<option value="bulk_unread">Mark as unread</option>
<option value="bulk_refresh">Refresh from website</option>
</select>
""",
html,
@@ -284,6 +285,7 @@ class BookmarkIndexViewTestCase(
<option value="bulk_unread">Mark as unread</option>
<option value="bulk_share">Share</option>
<option value="bulk_unshare">Unshare</option>
<option value="bulk_refresh">Refresh from website</option>
</select>
""",
html,

View File

@@ -1047,6 +1047,43 @@ class BookmarksApiTestCase(LinkdingApiTestCase, BookmarkFactoryMixin):
self.assertCountEqual(auto_tags, ["tag1", "tag2"])
def test_check_ignore_cache(self):
self.authenticate()
with patch.object(
website_loader, "load_website_metadata"
) as mock_load_website_metadata:
expected_metadata = WebsiteMetadata(
"https://example.com",
"Scraped metadata",
"Scraped description",
"https://example.com/preview.png",
)
mock_load_website_metadata.return_value = expected_metadata
# Does not ignore cache by default
url = reverse("linkding:bookmark-check")
check_url = urllib.parse.quote_plus("https://example.com")
self.get(
f"{url}?url={check_url}",
expected_status_code=status.HTTP_200_OK,
)
mock_load_website_metadata.assert_called_once_with(
"https://example.com", ignore_cache=False
)
mock_load_website_metadata.reset_mock()
# Ignores cache based on query param
self.get(
f"{url}?url={check_url}&ignore_cache=true",
expected_status_code=status.HTTP_200_OK,
)
mock_load_website_metadata.assert_called_once_with(
"https://example.com", ignore_cache=True
)
def test_can_only_access_own_bookmarks(self):
self.authenticate()
self.setup_bookmark()

View File

@@ -21,6 +21,7 @@ from bookmarks.services.bookmarks import (
share_bookmarks,
unshare_bookmarks,
enhance_with_website_metadata,
refresh_bookmarks_metadata,
)
from bookmarks.tests.helpers import BookmarkFactoryMixin
@@ -30,6 +31,21 @@ class BookmarkServiceTestCase(TestCase, BookmarkFactoryMixin):
def setUp(self) -> None:
self.get_or_create_test_user()
self.mock_schedule_refresh_metadata_patcher = patch(
"bookmarks.services.bookmarks.tasks.refresh_metadata"
)
self.mock_schedule_refresh_metadata = (
self.mock_schedule_refresh_metadata_patcher.start()
)
self.mock_load_preview_image_patcher = patch(
"bookmarks.services.bookmarks.tasks.load_preview_image"
)
self.mock_load_preview_image = self.mock_load_preview_image_patcher.start()
def tearDown(self):
self.mock_schedule_refresh_metadata_patcher.stop()
self.mock_load_preview_image_patcher.stop()
def test_create_should_not_update_website_metadata(self):
with patch.object(
website_loader, "load_website_metadata"
@@ -891,3 +907,70 @@ class BookmarkServiceTestCase(TestCase, BookmarkFactoryMixin):
self.assertEqual("", bookmark.title)
self.assertEqual("", bookmark.description)
def test_refresh_bookmarks_metadata(self):
bookmark1 = self.setup_bookmark()
bookmark2 = self.setup_bookmark()
bookmark3 = self.setup_bookmark()
refresh_bookmarks_metadata(
[bookmark1.id, bookmark2.id, bookmark3.id], self.get_or_create_test_user()
)
self.assertEqual(self.mock_schedule_refresh_metadata.call_count, 3)
self.assertEqual(self.mock_load_preview_image.call_count, 3)
def test_refresh_bookmarks_metadata_should_only_refresh_specified_bookmarks(self):
bookmark1 = self.setup_bookmark()
bookmark2 = self.setup_bookmark()
bookmark3 = self.setup_bookmark()
refresh_bookmarks_metadata(
[bookmark1.id, bookmark3.id], self.get_or_create_test_user()
)
self.assertEqual(self.mock_schedule_refresh_metadata.call_count, 2)
self.assertEqual(self.mock_load_preview_image.call_count, 2)
for call_args in self.mock_schedule_refresh_metadata.call_args_list:
args, kwargs = call_args
self.assertNotIn(bookmark2.id, args)
for call_args in self.mock_load_preview_image.call_args_list:
args, kwargs = call_args
self.assertNotIn(bookmark2.id, args)
def test_refresh_bookmarks_metadata_should_only_refresh_user_owned_bookmarks(self):
other_user = self.setup_user()
bookmark1 = self.setup_bookmark()
bookmark2 = self.setup_bookmark()
inaccessible_bookmark = self.setup_bookmark(user=other_user)
refresh_bookmarks_metadata(
[bookmark1.id, bookmark2.id, inaccessible_bookmark.id],
self.get_or_create_test_user(),
)
self.assertEqual(self.mock_schedule_refresh_metadata.call_count, 2)
self.assertEqual(self.mock_load_preview_image.call_count, 2)
for call_args in self.mock_schedule_refresh_metadata.call_args_list:
args, kwargs = call_args
self.assertNotIn(inaccessible_bookmark.id, args)
for call_args in self.mock_load_preview_image.call_args_list:
args, kwargs = call_args
self.assertNotIn(inaccessible_bookmark.id, args)
def test_refresh_bookmarks_metadata_should_accept_mix_of_int_and_string_ids(self):
bookmark1 = self.setup_bookmark()
bookmark2 = self.setup_bookmark()
bookmark3 = self.setup_bookmark()
refresh_bookmarks_metadata(
[str(bookmark1.id), str(bookmark2.id), bookmark3.id],
self.get_or_create_test_user(),
)
self.assertEqual(self.mock_schedule_refresh_metadata.call_count, 3)
self.assertEqual(self.mock_load_preview_image.call_count, 3)

View File

@@ -8,6 +8,7 @@ from waybackpy.exceptions import WaybackError
from bookmarks.models import BookmarkAsset, UserProfile
from bookmarks.services import tasks
from bookmarks.services.website_loader import WebsiteMetadata
from bookmarks.tests.helpers import BookmarkFactoryMixin
@@ -615,3 +616,52 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
self.assertEqual(count, 3)
self.assertEqual(BookmarkAsset.objects.count(), count)
@override_settings(LD_DISABLE_BACKGROUND_TASKS=True)
def test_refresh_metadata_task_not_called_when_background_tasks_disabled(self):
bookmark = self.setup_bookmark()
with mock.patch(
"bookmarks.services.tasks._refresh_metadata_task"
) as mock_refresh_metadata_task:
tasks.refresh_metadata(bookmark)
mock_refresh_metadata_task.assert_not_called()
@override_settings(LD_DISABLE_BACKGROUND_TASKS=False)
def test_refresh_metadata_task_called_when_background_tasks_enabled(self):
bookmark = self.setup_bookmark()
with mock.patch(
"bookmarks.services.tasks._refresh_metadata_task"
) as mock_refresh_metadata_task:
tasks.refresh_metadata(bookmark)
mock_refresh_metadata_task.assert_called_once()
def test_refresh_metadata_task_should_handle_missing_bookmark(self):
with mock.patch(
"bookmarks.services.website_loader.load_website_metadata"
) as mock_load_website_metadata:
tasks._refresh_metadata_task(123)
mock_load_website_metadata.assert_not_called()
def test_refresh_metadata_updates_title_description(self):
bookmark = self.setup_bookmark(
title="Initial title",
description="Initial description",
)
mock_website_metadata = WebsiteMetadata(
url=bookmark.url,
title="New title",
description="New description",
preview_image=None,
)
with mock.patch(
"bookmarks.services.tasks.load_website_metadata"
) as mock_load_website_metadata:
mock_load_website_metadata.return_value = mock_website_metadata
tasks.refresh_metadata(bookmark)
bookmark.refresh_from_db()
self.assertEqual(bookmark.title, "New title")
self.assertEqual(bookmark.description, "New description")

View File

@@ -27,7 +27,7 @@ class MockStreamingResponse:
class WebsiteLoaderTestCase(TestCase):
def setUp(self):
# clear cached metadata before test run
website_loader.load_website_metadata.cache_clear()
website_loader._load_website_metadata_cached.cache_clear()
def render_html_document(
self, title, description="", og_description="", og_image=""
@@ -183,3 +183,20 @@ class WebsiteLoaderTestCase(TestCase):
metadata = website_loader.load_website_metadata("https://example.com")
self.assertEqual("test title", metadata.title)
self.assertEqual("test description", metadata.description)
def test_website_metadata_ignore_cache(self):
expected_html = '<html><head><title>Test Title</title><meta name="description" content="Test Description"><meta property="og:image" content="/images/test.jpg"></head></html>'
with mock.patch.object(
website_loader, "load_page", return_value=expected_html
) as mock_load_page:
website_loader.load_website_metadata("https://example.com")
mock_load_page.assert_called_once()
website_loader.load_website_metadata("https://example.com")
mock_load_page.assert_called_once()
website_loader.load_website_metadata(
"https://example.com", ignore_cache=True
)
self.assertEqual(mock_load_page.call_count, 2)

View File

@@ -30,6 +30,7 @@ from bookmarks.services.bookmarks import (
mark_bookmarks_as_unread,
share_bookmarks,
unshare_bookmarks,
refresh_bookmarks_metadata,
)
from bookmarks.type_defs import HttpRequest
from bookmarks.utils import get_safe_return_url
@@ -348,6 +349,8 @@ def handle_action(request: HttpRequest, query: QuerySet[Bookmark] = None):
return share_bookmarks(bookmark_ids, request.user)
if "bulk_unshare" == bulk_action:
return unshare_bookmarks(bookmark_ids, request.user)
if "bulk_refresh" == bulk_action:
return refresh_bookmarks_metadata(bookmark_ids, request.user)
@login_required