Check for dupes by exact URL if normalized URL is missing (#1204)

This commit is contained in:
Sascha Ißbrücker
2025-10-11 10:45:23 +02:00
committed by GitHub
parent a6b36750da
commit 95529eccd4
5 changed files with 58 additions and 19 deletions

View File

@@ -27,7 +27,6 @@ from bookmarks.models import (
BookmarkBundle,
)
from bookmarks.services import assets, bookmarks, bundles, auto_tagging, website_loader
from bookmarks.utils import normalize_url
from bookmarks.type_defs import HttpRequest
from bookmarks.views import access
@@ -108,10 +107,7 @@ class BookmarkViewSet(
def check(self, request: HttpRequest):
url = request.GET.get("url")
ignore_cache = request.GET.get("ignore_cache", False) in ["true"]
normalized_url = normalize_url(url)
bookmark = Bookmark.objects.filter(
owner=request.user, url_normalized=normalized_url
).first()
bookmark = Bookmark.query_existing(request.user, url).first()
existing_bookmark_data = (
self.get_serializer(bookmark).data if bookmark else None
)
@@ -155,10 +151,7 @@ class BookmarkViewSet(
status=status.HTTP_400_BAD_REQUEST,
)
normalized_url = normalize_url(url)
bookmark = Bookmark.objects.filter(
owner=request.user, url_normalized=normalized_url
).first()
bookmark = Bookmark.query_existing(request.user, url).first()
if not bookmark:
bookmark = Bookmark(url=url)

View File

@@ -11,7 +11,6 @@ from bookmarks.models import (
)
from bookmarks.services.bookmarks import create_bookmark, update_bookmark
from bookmarks.type_defs import HttpRequest
from bookmarks.utils import normalize_url
from bookmarks.validators import BookmarkURLValidator
@@ -94,11 +93,8 @@ class BookmarkForm(forms.ModelForm):
# raise a validation error in that case.
url = self.cleaned_data["url"]
if self.instance.pk:
normalized_url = normalize_url(url)
is_duplicate = (
Bookmark.objects.filter(
owner=self.instance.owner, url_normalized=normalized_url
)
Bookmark.query_existing(self.instance.owner, url)
.exclude(pk=self.instance.pk)
.exists()
)

View File

@@ -1,14 +1,15 @@
import binascii
import hashlib
import logging
import os
from typing import List
import binascii
from django import forms
from django.conf import settings
from django.contrib.auth.models import User
from django.core.validators import MinValueValidator
from django.db import models
from django.db.models import Q
from django.db.models.signals import post_save, post_delete
from django.dispatch import receiver
from django.http import QueryDict
@@ -103,6 +104,16 @@ class Bookmark(models.Model):
def __str__(self):
return self.resolved_title + " (" + self.url[:30] + "...)"
@staticmethod
def query_existing(owner: User, url: str) -> models.QuerySet:
# Find existing bookmark by normalized URL, or fall back to exact URL if
# normalized URL was not generated for whatever reason
normalized_url = normalize_url(url)
q = Q(owner=owner) & (
Q(url_normalized=normalized_url) | Q(url_normalized="", url=url)
)
return Bookmark.objects.filter(q)
@receiver(post_delete, sender=Bookmark)
def bookmark_deleted(sender, instance, **kwargs):

View File

@@ -4,7 +4,6 @@ from typing import Union
from django.utils import timezone
from bookmarks.models import Bookmark, User, parse_tag_string
from bookmarks.utils import normalize_url
from bookmarks.services import auto_tagging
from bookmarks.services import tasks
from bookmarks.services import website_loader
@@ -20,9 +19,8 @@ def create_bookmark(
disable_html_snapshot: bool = False,
):
# If URL is already bookmarked, then update it
normalized_url = normalize_url(bookmark.url)
existing_bookmark: Bookmark = Bookmark.objects.filter(
owner=current_user, url_normalized=normalized_url
existing_bookmark: Bookmark = Bookmark.query_existing(
current_user, bookmark.url
).first()
if existing_bookmark is not None:

View File

@@ -114,6 +114,47 @@ class BookmarkServiceTestCase(TestCase, BookmarkFactoryMixin):
self.assertEqual(updated_bookmark.id, original_bookmark.id)
self.assertEqual(updated_bookmark.title, bookmark_data.title)
def test_create_should_update_existing_bookmark_when_normalized_url_is_empty(
self,
):
# Test behavior when url_normalized is empty for whatever reason
# In this case should at least match the URL directly
original_bookmark = self.setup_bookmark(url="https://example.com")
Bookmark.objects.update(url_normalized="")
bookmark_data = Bookmark(
url="https://example.com",
title="Updated Title",
description="Updated description",
)
updated_bookmark = create_bookmark(
bookmark_data, "", self.get_or_create_test_user()
)
self.assertEqual(Bookmark.objects.count(), 1)
self.assertEqual(updated_bookmark.id, original_bookmark.id)
self.assertEqual(updated_bookmark.title, bookmark_data.title)
def test_create_should_update_first_existing_bookmark_for_multiple_duplicates(
self,
):
first_dupe = self.setup_bookmark(url="https://example.com")
second_dupe = self.setup_bookmark(url="https://example.com/")
bookmark_data = Bookmark(
url="https://example.com",
title="Updated Title",
description="Updated description",
)
create_bookmark(bookmark_data, "", self.get_or_create_test_user())
self.assertEqual(Bookmark.objects.count(), 2)
first_dupe.refresh_from_db()
self.assertEqual(first_dupe.title, bookmark_data.title)
second_dupe.refresh_from_db()
self.assertNotEqual(second_dupe.title, bookmark_data.title)
def test_create_should_populate_url_normalized_field(self):
bookmark_data = Bookmark(
url="https://EXAMPLE.COM/path/?z=1&a=2",