From 95529eccd4060145cf13885bbf4de41919f07316 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sascha=20I=C3=9Fbr=C3=BCcker?= Date: Sat, 11 Oct 2025 10:45:23 +0200 Subject: [PATCH] Check for dupes by exact URL if normalized URL is missing (#1204) --- bookmarks/api/routes.py | 11 ++---- bookmarks/forms.py | 6 +--- bookmarks/models.py | 13 ++++++- bookmarks/services/bookmarks.py | 6 ++-- bookmarks/tests/test_bookmarks_service.py | 41 +++++++++++++++++++++++ 5 files changed, 58 insertions(+), 19 deletions(-) diff --git a/bookmarks/api/routes.py b/bookmarks/api/routes.py index 649d581..ee44f9c 100644 --- a/bookmarks/api/routes.py +++ b/bookmarks/api/routes.py @@ -27,7 +27,6 @@ from bookmarks.models import ( BookmarkBundle, ) from bookmarks.services import assets, bookmarks, bundles, auto_tagging, website_loader -from bookmarks.utils import normalize_url from bookmarks.type_defs import HttpRequest from bookmarks.views import access @@ -108,10 +107,7 @@ class BookmarkViewSet( def check(self, request: HttpRequest): url = request.GET.get("url") ignore_cache = request.GET.get("ignore_cache", False) in ["true"] - normalized_url = normalize_url(url) - bookmark = Bookmark.objects.filter( - owner=request.user, url_normalized=normalized_url - ).first() + bookmark = Bookmark.query_existing(request.user, url).first() existing_bookmark_data = ( self.get_serializer(bookmark).data if bookmark else None ) @@ -155,10 +151,7 @@ class BookmarkViewSet( status=status.HTTP_400_BAD_REQUEST, ) - normalized_url = normalize_url(url) - bookmark = Bookmark.objects.filter( - owner=request.user, url_normalized=normalized_url - ).first() + bookmark = Bookmark.query_existing(request.user, url).first() if not bookmark: bookmark = Bookmark(url=url) diff --git a/bookmarks/forms.py b/bookmarks/forms.py index 87b546b..3062db0 100644 --- a/bookmarks/forms.py +++ b/bookmarks/forms.py @@ -11,7 +11,6 @@ from bookmarks.models import ( ) from bookmarks.services.bookmarks import create_bookmark, update_bookmark from bookmarks.type_defs import HttpRequest -from bookmarks.utils import normalize_url from bookmarks.validators import BookmarkURLValidator @@ -94,11 +93,8 @@ class BookmarkForm(forms.ModelForm): # raise a validation error in that case. url = self.cleaned_data["url"] if self.instance.pk: - normalized_url = normalize_url(url) is_duplicate = ( - Bookmark.objects.filter( - owner=self.instance.owner, url_normalized=normalized_url - ) + Bookmark.query_existing(self.instance.owner, url) .exclude(pk=self.instance.pk) .exists() ) diff --git a/bookmarks/models.py b/bookmarks/models.py index cb86d74..3311d6e 100644 --- a/bookmarks/models.py +++ b/bookmarks/models.py @@ -1,14 +1,15 @@ -import binascii import hashlib import logging import os from typing import List +import binascii from django import forms from django.conf import settings from django.contrib.auth.models import User from django.core.validators import MinValueValidator from django.db import models +from django.db.models import Q from django.db.models.signals import post_save, post_delete from django.dispatch import receiver from django.http import QueryDict @@ -103,6 +104,16 @@ class Bookmark(models.Model): def __str__(self): return self.resolved_title + " (" + self.url[:30] + "...)" + @staticmethod + def query_existing(owner: User, url: str) -> models.QuerySet: + # Find existing bookmark by normalized URL, or fall back to exact URL if + # normalized URL was not generated for whatever reason + normalized_url = normalize_url(url) + q = Q(owner=owner) & ( + Q(url_normalized=normalized_url) | Q(url_normalized="", url=url) + ) + return Bookmark.objects.filter(q) + @receiver(post_delete, sender=Bookmark) def bookmark_deleted(sender, instance, **kwargs): diff --git a/bookmarks/services/bookmarks.py b/bookmarks/services/bookmarks.py index 923ba87..180c8ff 100644 --- a/bookmarks/services/bookmarks.py +++ b/bookmarks/services/bookmarks.py @@ -4,7 +4,6 @@ from typing import Union from django.utils import timezone from bookmarks.models import Bookmark, User, parse_tag_string -from bookmarks.utils import normalize_url from bookmarks.services import auto_tagging from bookmarks.services import tasks from bookmarks.services import website_loader @@ -20,9 +19,8 @@ def create_bookmark( disable_html_snapshot: bool = False, ): # If URL is already bookmarked, then update it - normalized_url = normalize_url(bookmark.url) - existing_bookmark: Bookmark = Bookmark.objects.filter( - owner=current_user, url_normalized=normalized_url + existing_bookmark: Bookmark = Bookmark.query_existing( + current_user, bookmark.url ).first() if existing_bookmark is not None: diff --git a/bookmarks/tests/test_bookmarks_service.py b/bookmarks/tests/test_bookmarks_service.py index a930580..47c95c3 100644 --- a/bookmarks/tests/test_bookmarks_service.py +++ b/bookmarks/tests/test_bookmarks_service.py @@ -114,6 +114,47 @@ class BookmarkServiceTestCase(TestCase, BookmarkFactoryMixin): self.assertEqual(updated_bookmark.id, original_bookmark.id) self.assertEqual(updated_bookmark.title, bookmark_data.title) + def test_create_should_update_existing_bookmark_when_normalized_url_is_empty( + self, + ): + # Test behavior when url_normalized is empty for whatever reason + # In this case should at least match the URL directly + original_bookmark = self.setup_bookmark(url="https://example.com") + Bookmark.objects.update(url_normalized="") + bookmark_data = Bookmark( + url="https://example.com", + title="Updated Title", + description="Updated description", + ) + updated_bookmark = create_bookmark( + bookmark_data, "", self.get_or_create_test_user() + ) + + self.assertEqual(Bookmark.objects.count(), 1) + self.assertEqual(updated_bookmark.id, original_bookmark.id) + self.assertEqual(updated_bookmark.title, bookmark_data.title) + + def test_create_should_update_first_existing_bookmark_for_multiple_duplicates( + self, + ): + first_dupe = self.setup_bookmark(url="https://example.com") + second_dupe = self.setup_bookmark(url="https://example.com/") + + bookmark_data = Bookmark( + url="https://example.com", + title="Updated Title", + description="Updated description", + ) + create_bookmark(bookmark_data, "", self.get_or_create_test_user()) + + self.assertEqual(Bookmark.objects.count(), 2) + + first_dupe.refresh_from_db() + self.assertEqual(first_dupe.title, bookmark_data.title) + + second_dupe.refresh_from_db() + self.assertNotEqual(second_dupe.title, bookmark_data.title) + def test_create_should_populate_url_normalized_field(self): bookmark_data = Bookmark( url="https://EXAMPLE.COM/path/?z=1&a=2",