Check for dupes by exact URL if normalized URL is missing (#1204)

This commit is contained in:
Sascha Ißbrücker
2025-10-11 10:45:23 +02:00
committed by GitHub
parent a6b36750da
commit 95529eccd4
5 changed files with 58 additions and 19 deletions

View File

@@ -27,7 +27,6 @@ from bookmarks.models import (
BookmarkBundle, BookmarkBundle,
) )
from bookmarks.services import assets, bookmarks, bundles, auto_tagging, website_loader from bookmarks.services import assets, bookmarks, bundles, auto_tagging, website_loader
from bookmarks.utils import normalize_url
from bookmarks.type_defs import HttpRequest from bookmarks.type_defs import HttpRequest
from bookmarks.views import access from bookmarks.views import access
@@ -108,10 +107,7 @@ class BookmarkViewSet(
def check(self, request: HttpRequest): def check(self, request: HttpRequest):
url = request.GET.get("url") url = request.GET.get("url")
ignore_cache = request.GET.get("ignore_cache", False) in ["true"] ignore_cache = request.GET.get("ignore_cache", False) in ["true"]
normalized_url = normalize_url(url) bookmark = Bookmark.query_existing(request.user, url).first()
bookmark = Bookmark.objects.filter(
owner=request.user, url_normalized=normalized_url
).first()
existing_bookmark_data = ( existing_bookmark_data = (
self.get_serializer(bookmark).data if bookmark else None self.get_serializer(bookmark).data if bookmark else None
) )
@@ -155,10 +151,7 @@ class BookmarkViewSet(
status=status.HTTP_400_BAD_REQUEST, status=status.HTTP_400_BAD_REQUEST,
) )
normalized_url = normalize_url(url) bookmark = Bookmark.query_existing(request.user, url).first()
bookmark = Bookmark.objects.filter(
owner=request.user, url_normalized=normalized_url
).first()
if not bookmark: if not bookmark:
bookmark = Bookmark(url=url) bookmark = Bookmark(url=url)

View File

@@ -11,7 +11,6 @@ from bookmarks.models import (
) )
from bookmarks.services.bookmarks import create_bookmark, update_bookmark from bookmarks.services.bookmarks import create_bookmark, update_bookmark
from bookmarks.type_defs import HttpRequest from bookmarks.type_defs import HttpRequest
from bookmarks.utils import normalize_url
from bookmarks.validators import BookmarkURLValidator from bookmarks.validators import BookmarkURLValidator
@@ -94,11 +93,8 @@ class BookmarkForm(forms.ModelForm):
# raise a validation error in that case. # raise a validation error in that case.
url = self.cleaned_data["url"] url = self.cleaned_data["url"]
if self.instance.pk: if self.instance.pk:
normalized_url = normalize_url(url)
is_duplicate = ( is_duplicate = (
Bookmark.objects.filter( Bookmark.query_existing(self.instance.owner, url)
owner=self.instance.owner, url_normalized=normalized_url
)
.exclude(pk=self.instance.pk) .exclude(pk=self.instance.pk)
.exists() .exists()
) )

View File

@@ -1,14 +1,15 @@
import binascii
import hashlib import hashlib
import logging import logging
import os import os
from typing import List from typing import List
import binascii
from django import forms from django import forms
from django.conf import settings from django.conf import settings
from django.contrib.auth.models import User from django.contrib.auth.models import User
from django.core.validators import MinValueValidator from django.core.validators import MinValueValidator
from django.db import models from django.db import models
from django.db.models import Q
from django.db.models.signals import post_save, post_delete from django.db.models.signals import post_save, post_delete
from django.dispatch import receiver from django.dispatch import receiver
from django.http import QueryDict from django.http import QueryDict
@@ -103,6 +104,16 @@ class Bookmark(models.Model):
def __str__(self): def __str__(self):
return self.resolved_title + " (" + self.url[:30] + "...)" return self.resolved_title + " (" + self.url[:30] + "...)"
@staticmethod
def query_existing(owner: User, url: str) -> models.QuerySet:
# Find existing bookmark by normalized URL, or fall back to exact URL if
# normalized URL was not generated for whatever reason
normalized_url = normalize_url(url)
q = Q(owner=owner) & (
Q(url_normalized=normalized_url) | Q(url_normalized="", url=url)
)
return Bookmark.objects.filter(q)
@receiver(post_delete, sender=Bookmark) @receiver(post_delete, sender=Bookmark)
def bookmark_deleted(sender, instance, **kwargs): def bookmark_deleted(sender, instance, **kwargs):

View File

@@ -4,7 +4,6 @@ from typing import Union
from django.utils import timezone from django.utils import timezone
from bookmarks.models import Bookmark, User, parse_tag_string from bookmarks.models import Bookmark, User, parse_tag_string
from bookmarks.utils import normalize_url
from bookmarks.services import auto_tagging from bookmarks.services import auto_tagging
from bookmarks.services import tasks from bookmarks.services import tasks
from bookmarks.services import website_loader from bookmarks.services import website_loader
@@ -20,9 +19,8 @@ def create_bookmark(
disable_html_snapshot: bool = False, disable_html_snapshot: bool = False,
): ):
# If URL is already bookmarked, then update it # If URL is already bookmarked, then update it
normalized_url = normalize_url(bookmark.url) existing_bookmark: Bookmark = Bookmark.query_existing(
existing_bookmark: Bookmark = Bookmark.objects.filter( current_user, bookmark.url
owner=current_user, url_normalized=normalized_url
).first() ).first()
if existing_bookmark is not None: if existing_bookmark is not None:

View File

@@ -114,6 +114,47 @@ class BookmarkServiceTestCase(TestCase, BookmarkFactoryMixin):
self.assertEqual(updated_bookmark.id, original_bookmark.id) self.assertEqual(updated_bookmark.id, original_bookmark.id)
self.assertEqual(updated_bookmark.title, bookmark_data.title) self.assertEqual(updated_bookmark.title, bookmark_data.title)
def test_create_should_update_existing_bookmark_when_normalized_url_is_empty(
self,
):
# Test behavior when url_normalized is empty for whatever reason
# In this case should at least match the URL directly
original_bookmark = self.setup_bookmark(url="https://example.com")
Bookmark.objects.update(url_normalized="")
bookmark_data = Bookmark(
url="https://example.com",
title="Updated Title",
description="Updated description",
)
updated_bookmark = create_bookmark(
bookmark_data, "", self.get_or_create_test_user()
)
self.assertEqual(Bookmark.objects.count(), 1)
self.assertEqual(updated_bookmark.id, original_bookmark.id)
self.assertEqual(updated_bookmark.title, bookmark_data.title)
def test_create_should_update_first_existing_bookmark_for_multiple_duplicates(
self,
):
first_dupe = self.setup_bookmark(url="https://example.com")
second_dupe = self.setup_bookmark(url="https://example.com/")
bookmark_data = Bookmark(
url="https://example.com",
title="Updated Title",
description="Updated description",
)
create_bookmark(bookmark_data, "", self.get_or_create_test_user())
self.assertEqual(Bookmark.objects.count(), 2)
first_dupe.refresh_from_db()
self.assertEqual(first_dupe.title, bookmark_data.title)
second_dupe.refresh_from_db()
self.assertNotEqual(second_dupe.title, bookmark_data.title)
def test_create_should_populate_url_normalized_field(self): def test_create_should_populate_url_normalized_field(self):
bookmark_data = Bookmark( bookmark_data = Bookmark(
url="https://EXAMPLE.COM/path/?z=1&a=2", url="https://EXAMPLE.COM/path/?z=1&a=2",