diff --git a/bookmarks/api/routes.py b/bookmarks/api/routes.py
index 562d007..649d581 100644
--- a/bookmarks/api/routes.py
+++ b/bookmarks/api/routes.py
@@ -27,6 +27,7 @@ from bookmarks.models import (
BookmarkBundle,
)
from bookmarks.services import assets, bookmarks, bundles, auto_tagging, website_loader
+from bookmarks.utils import normalize_url
from bookmarks.type_defs import HttpRequest
from bookmarks.views import access
@@ -107,7 +108,10 @@ class BookmarkViewSet(
def check(self, request: HttpRequest):
url = request.GET.get("url")
ignore_cache = request.GET.get("ignore_cache", False) in ["true"]
- bookmark = Bookmark.objects.filter(owner=request.user, url=url).first()
+ normalized_url = normalize_url(url)
+ bookmark = Bookmark.objects.filter(
+ owner=request.user, url_normalized=normalized_url
+ ).first()
existing_bookmark_data = (
self.get_serializer(bookmark).data if bookmark else None
)
@@ -151,7 +155,10 @@ class BookmarkViewSet(
status=status.HTTP_400_BAD_REQUEST,
)
- bookmark = Bookmark.objects.filter(owner=request.user, url=url).first()
+ normalized_url = normalize_url(url)
+ bookmark = Bookmark.objects.filter(
+ owner=request.user, url_normalized=normalized_url
+ ).first()
if not bookmark:
bookmark = Bookmark(url=url)
diff --git a/bookmarks/forms.py b/bookmarks/forms.py
index 5aa3efc..c571d0f 100644
--- a/bookmarks/forms.py
+++ b/bookmarks/forms.py
@@ -5,6 +5,7 @@ from bookmarks.models import Bookmark, build_tag_string
from bookmarks.validators import BookmarkURLValidator
from bookmarks.type_defs import HttpRequest
from bookmarks.services.bookmarks import create_bookmark, update_bookmark
+from bookmarks.utils import normalize_url
class CustomErrorList(ErrorList):
@@ -85,8 +86,11 @@ class BookmarkForm(forms.ModelForm):
# raise a validation error in that case.
url = self.cleaned_data["url"]
if self.instance.pk:
+ normalized_url = normalize_url(url)
is_duplicate = (
- Bookmark.objects.filter(owner=self.instance.owner, url=url)
+ Bookmark.objects.filter(
+ owner=self.instance.owner, url_normalized=normalized_url
+ )
.exclude(pk=self.instance.pk)
.exists()
)
diff --git a/bookmarks/migrations/0046_add_url_normalized_field.py b/bookmarks/migrations/0046_add_url_normalized_field.py
new file mode 100644
index 0000000..2b689ad
--- /dev/null
+++ b/bookmarks/migrations/0046_add_url_normalized_field.py
@@ -0,0 +1,18 @@
+# Generated by Django 5.2.3 on 2025-08-22 08:26
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ("bookmarks", "0045_userprofile_hide_bundles_bookmarkbundle"),
+ ]
+
+ operations = [
+ migrations.AddField(
+ model_name="bookmark",
+ name="url_normalized",
+ field=models.CharField(blank=True, db_index=True, max_length=2048),
+ ),
+ ]
diff --git a/bookmarks/migrations/0047_populate_url_normalized_field.py b/bookmarks/migrations/0047_populate_url_normalized_field.py
new file mode 100644
index 0000000..bb4fae2
--- /dev/null
+++ b/bookmarks/migrations/0047_populate_url_normalized_field.py
@@ -0,0 +1,38 @@
+# Generated by Django 5.2.3 on 2025-08-22 08:28
+
+from django.db import migrations, transaction
+from bookmarks.utils import normalize_url
+
+
+def populate_url_normalized(apps, schema_editor):
+ Bookmark = apps.get_model("bookmarks", "Bookmark")
+
+ batch_size = 500
+ with transaction.atomic():
+ qs = Bookmark.objects.all()
+ for start in range(0, qs.count(), batch_size):
+ batch = list(qs[start : start + batch_size])
+ for bookmark in batch:
+ bookmark.url_normalized = normalize_url(bookmark.url)
+ Bookmark.objects.bulk_update(
+ batch, ["url_normalized"], batch_size=batch_size
+ )
+
+
+def reverse_populate_url_normalized(apps, schema_editor):
+ Bookmark = apps.get_model("bookmarks", "Bookmark")
+ Bookmark.objects.all().update(url_normalized="")
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ("bookmarks", "0046_add_url_normalized_field"),
+ ]
+
+ operations = [
+ migrations.RunPython(
+ populate_url_normalized,
+ reverse_populate_url_normalized,
+ ),
+ ]
diff --git a/bookmarks/models.py b/bookmarks/models.py
index fe7efce..2e860d7 100644
--- a/bookmarks/models.py
+++ b/bookmarks/models.py
@@ -14,7 +14,7 @@ from django.db.models.signals import post_save, post_delete
from django.dispatch import receiver
from django.http import QueryDict
-from bookmarks.utils import unique
+from bookmarks.utils import unique, normalize_url
from bookmarks.validators import BookmarkURLValidator
logger = logging.getLogger(__name__)
@@ -54,6 +54,7 @@ def build_tag_string(tag_names: List[str], delimiter: str = ","):
class Bookmark(models.Model):
url = models.CharField(max_length=2048, validators=[BookmarkURLValidator()])
+ url_normalized = models.CharField(max_length=2048, blank=True, db_index=True)
title = models.CharField(max_length=512, blank=True)
description = models.TextField(blank=True)
notes = models.TextField(blank=True)
@@ -96,6 +97,10 @@ class Bookmark(models.Model):
names = [tag.name for tag in self.tags.all()]
return sorted(names)
+ def save(self, *args, **kwargs):
+ self.url_normalized = normalize_url(self.url)
+ super().save(*args, **kwargs)
+
def __str__(self):
return self.resolved_title + " (" + self.url[:30] + "...)"
diff --git a/bookmarks/services/bookmarks.py b/bookmarks/services/bookmarks.py
index 82b1fb2..923ba87 100644
--- a/bookmarks/services/bookmarks.py
+++ b/bookmarks/services/bookmarks.py
@@ -4,6 +4,7 @@ from typing import Union
from django.utils import timezone
from bookmarks.models import Bookmark, User, parse_tag_string
+from bookmarks.utils import normalize_url
from bookmarks.services import auto_tagging
from bookmarks.services import tasks
from bookmarks.services import website_loader
@@ -19,8 +20,9 @@ def create_bookmark(
disable_html_snapshot: bool = False,
):
# If URL is already bookmarked, then update it
+ normalized_url = normalize_url(bookmark.url)
existing_bookmark: Bookmark = Bookmark.objects.filter(
- owner=current_user, url=bookmark.url
+ owner=current_user, url_normalized=normalized_url
).first()
if existing_bookmark is not None:
diff --git a/bookmarks/tests/test_bookmark_edit_view.py b/bookmarks/tests/test_bookmark_edit_view.py
index bec1ee4..da7dc67 100644
--- a/bookmarks/tests/test_bookmark_edit_view.py
+++ b/bookmarks/tests/test_bookmark_edit_view.py
@@ -188,6 +188,25 @@ class BookmarkEditViewTestCase(TestCase, BookmarkFactoryMixin):
edited_bookmark.refresh_from_db()
self.assertNotEqual(edited_bookmark.url, existing_bookmark.url)
+ def test_should_prevent_duplicate_normalized_urls(self):
+ self.setup_bookmark(url="https://EXAMPLE.COM/path/?z=1&a=2")
+
+ edited_bookmark = self.setup_bookmark(url="http://different.com")
+
+ form_data = self.create_form_data({"url": "https://example.com/path?a=2&z=1"})
+ response = self.client.post(
+ reverse("linkding:bookmarks.edit", args=[edited_bookmark.id]), form_data
+ )
+
+ self.assertEqual(response.status_code, 422)
+ self.assertInHTML(
+ "
A bookmark with this URL already exists.",
+ response.content.decode(),
+ )
+
+ edited_bookmark.refresh_from_db()
+ self.assertEqual(edited_bookmark.url, "http://different.com")
+
def test_should_redirect_to_return_url(self):
bookmark = self.setup_bookmark()
form_data = self.create_form_data()
diff --git a/bookmarks/tests/test_bookmarks_api.py b/bookmarks/tests/test_bookmarks_api.py
index 5f22d03..ea77a0c 100644
--- a/bookmarks/tests/test_bookmarks_api.py
+++ b/bookmarks/tests/test_bookmarks_api.py
@@ -1047,6 +1047,29 @@ class BookmarksApiTestCase(LinkdingApiTestCase, BookmarkFactoryMixin):
self.assertEqual(expected_metadata.description, metadata["description"])
self.assertEqual(expected_metadata.preview_image, metadata["preview_image"])
+ def test_check_returns_bookmark_using_normalized_url(self):
+ self.authenticate()
+
+ # Create bookmark with one URL variant
+ bookmark = self.setup_bookmark(
+ url="https://EXAMPLE.COM/path/?z=1&a=2",
+ title="Example title",
+ description="Example description",
+ )
+
+ # Check with different URL variant that should normalize to the same URL
+ url = reverse("linkding:bookmark-check")
+ check_url = urllib.parse.quote_plus("https://example.com/path?a=2&z=1")
+ response = self.get(
+ f"{url}?url={check_url}", expected_status_code=status.HTTP_200_OK
+ )
+ bookmark_data = response.data["bookmark"]
+
+ # Should find the existing bookmark despite URL differences
+ self.assertIsNotNone(bookmark_data)
+ self.assertEqual(bookmark.id, bookmark_data["id"])
+ self.assertEqual(bookmark.title, bookmark_data["title"])
+
def test_check_returns_no_auto_tags_if_none_configured(self):
self.authenticate()
diff --git a/bookmarks/tests/test_bookmarks_service.py b/bookmarks/tests/test_bookmarks_service.py
index 42cb78d..a930580 100644
--- a/bookmarks/tests/test_bookmarks_service.py
+++ b/bookmarks/tests/test_bookmarks_service.py
@@ -95,6 +95,41 @@ class BookmarkServiceTestCase(TestCase, BookmarkFactoryMixin):
# Saving a duplicate bookmark should not modify archive flag - right?
self.assertFalse(updated_bookmark.is_archived)
+ def test_create_should_update_existing_bookmark_with_normalized_url(
+ self,
+ ):
+ original_bookmark = self.setup_bookmark(
+ url="https://EXAMPLE.com/path/?a=1&z=2", unread=False, shared=False
+ )
+ bookmark_data = Bookmark(
+ url="HTTPS://example.com/path?z=2&a=1",
+ title="Updated Title",
+ description="Updated description",
+ )
+ updated_bookmark = create_bookmark(
+ bookmark_data, "", self.get_or_create_test_user()
+ )
+
+ self.assertEqual(Bookmark.objects.count(), 1)
+ self.assertEqual(updated_bookmark.id, original_bookmark.id)
+ self.assertEqual(updated_bookmark.title, bookmark_data.title)
+
+ def test_create_should_populate_url_normalized_field(self):
+ bookmark_data = Bookmark(
+ url="https://EXAMPLE.COM/path/?z=1&a=2",
+ title="Test Title",
+ description="Test description",
+ )
+ created_bookmark = create_bookmark(
+ bookmark_data, "", self.get_or_create_test_user()
+ )
+
+ created_bookmark.refresh_from_db()
+ self.assertEqual(created_bookmark.url, "https://EXAMPLE.COM/path/?z=1&a=2")
+ self.assertEqual(
+ created_bookmark.url_normalized, "https://example.com/path?a=2&z=1"
+ )
+
def test_create_should_create_web_archive_snapshot(self):
with patch.object(
tasks, "create_web_archive_snapshot"
diff --git a/bookmarks/tests/test_utils.py b/bookmarks/tests/test_utils.py
index 136cf70..7bdeb38 100644
--- a/bookmarks/tests/test_utils.py
+++ b/bookmarks/tests/test_utils.py
@@ -8,6 +8,7 @@ from bookmarks.utils import (
humanize_absolute_date,
humanize_relative_date,
parse_timestamp,
+ normalize_url,
)
@@ -182,3 +183,181 @@ class UtilsTestCase(TestCase):
with self.assertRaises(ValueError):
self.verify_timestamp(now, 1000000000)
+
+ def test_normalize_url_trailing_slash_handling(self):
+ test_cases = [
+ ("https://example.com/", "https://example.com"),
+ (
+ "https://example.com/path/",
+ "https://example.com/path",
+ ),
+ ("https://example.com/path/to/page/", "https://example.com/path/to/page"),
+ (
+ "https://example.com/path",
+ "https://example.com/path",
+ ),
+ ]
+
+ for original, expected in test_cases:
+ with self.subTest(url=original):
+ result = normalize_url(original)
+ self.assertEqual(expected, result)
+
+ def test_normalize_url_query_parameters(self):
+ test_cases = [
+ ("https://example.com?z=1&a=2", "https://example.com?a=2&z=1"),
+ ("https://example.com?c=3&b=2&a=1", "https://example.com?a=1&b=2&c=3"),
+ ("https://example.com?param=value", "https://example.com?param=value"),
+ ("https://example.com?", "https://example.com"),
+ (
+ "https://example.com?empty=&filled=value",
+ "https://example.com?empty=&filled=value",
+ ),
+ ]
+
+ for original, expected in test_cases:
+ with self.subTest(url=original):
+ result = normalize_url(original)
+ self.assertEqual(expected, result)
+
+ def test_normalize_url_case_sensitivity(self):
+ test_cases = [
+ (
+ "https://EXAMPLE.com/Path/To/Page",
+ "https://example.com/Path/To/Page",
+ ),
+ ("https://EXAMPLE.COM/API/v1/Users", "https://example.com/API/v1/Users"),
+ (
+ "HTTPS://EXAMPLE.COM/path",
+ "https://example.com/path",
+ ),
+ ]
+
+ for original, expected in test_cases:
+ with self.subTest(url=original):
+ result = normalize_url(original)
+ self.assertEqual(expected, result)
+
+ def test_normalize_url_special_characters_and_encoding(self):
+ test_cases = [
+ (
+ "https://example.com/path%20with%20spaces",
+ "https://example.com/path%20with%20spaces",
+ ),
+ ("https://example.com/caf%C3%A9", "https://example.com/caf%C3%A9"),
+ (
+ "https://example.com/path?q=hello%20world",
+ "https://example.com/path?q=hello%20world",
+ ),
+ ("https://example.com/pàth", "https://example.com/pàth"),
+ ]
+
+ for original, expected in test_cases:
+ with self.subTest(url=original):
+ result = normalize_url(original)
+ self.assertEqual(expected, result)
+
+ def test_normalize_url_various_protocols(self):
+ test_cases = [
+ ("FTP://example.com", "ftp://example.com"),
+ ("HTTP://EXAMPLE.COM", "http://example.com"),
+ ("https://example.com", "https://example.com"),
+ ("file:///path/to/file", "file:///path/to/file"),
+ ]
+
+ for original, expected in test_cases:
+ with self.subTest(url=original):
+ result = normalize_url(original)
+ self.assertEqual(expected, result)
+
+ def test_normalize_url_port_handling(self):
+ test_cases = [
+ ("https://example.com:8080", "https://example.com:8080"),
+ ("https://EXAMPLE.COM:8080", "https://example.com:8080"),
+ ("http://example.com:80", "http://example.com:80"),
+ ("https://example.com:443", "https://example.com:443"),
+ ]
+
+ for original, expected in test_cases:
+ with self.subTest(url=original):
+ result = normalize_url(original)
+ self.assertEqual(expected, result)
+
+ def test_normalize_url_authentication_handling(self):
+ test_cases = [
+ ("https://user:pass@EXAMPLE.COM", "https://user:pass@example.com"),
+ ("https://user@EXAMPLE.COM", "https://user@example.com"),
+ ("ftp://admin:secret@EXAMPLE.COM", "ftp://admin:secret@example.com"),
+ ]
+
+ for original, expected in test_cases:
+ with self.subTest(url=original):
+ result = normalize_url(original)
+ self.assertEqual(expected, result)
+
+ def test_normalize_url_fragment_handling(self):
+ test_cases = [
+ ("https://example.com#", "https://example.com"),
+ ("https://example.com#section", "https://example.com#section"),
+ ("https://EXAMPLE.COM/path#Section", "https://example.com/path#Section"),
+ ("https://EXAMPLE.COM/path/#Section", "https://example.com/path#Section"),
+ ("https://example.com?a=1#fragment", "https://example.com?a=1#fragment"),
+ (
+ "https://example.com?z=2&a=1#fragment",
+ "https://example.com?a=1&z=2#fragment",
+ ),
+ ]
+
+ for original, expected in test_cases:
+ with self.subTest(url=original):
+ result = normalize_url(original)
+ self.assertEqual(expected, result)
+
+ def test_normalize_url_edge_cases(self):
+ test_cases = [
+ ("", ""),
+ (" ", ""),
+ (" https://example.com ", "https://example.com"),
+ ("not-a-url", "not-a-url"),
+ ("://invalid", "://invalid"),
+ ]
+
+ for original, expected in test_cases:
+ with self.subTest(url=original):
+ result = normalize_url(original)
+ self.assertEqual(expected, result)
+
+ def test_normalize_url_internationalized_domain_names(self):
+ test_cases = [
+ (
+ "https://xn--fsq.xn--0zwm56d",
+ "https://xn--fsq.xn--0zwm56d",
+ ),
+ ("https://测试.中国", "https://测试.中国"),
+ ]
+
+ for original, expected in test_cases:
+ with self.subTest(url=original):
+ result = normalize_url(original)
+ self.assertEqual(expected.lower() if expected else expected, result)
+
+ def test_normalize_url_complex_query_parameters(self):
+ test_cases = [
+ (
+ "https://example.com?z=1&a=2&z=3&b=4",
+ "https://example.com?a=2&b=4&z=1&z=3", # Multiple values for same key
+ ),
+ (
+ "https://example.com?param=value1¶m=value2",
+ "https://example.com?param=value1¶m=value2",
+ ),
+ (
+ "https://example.com?special=%21%40%23%24%25",
+ "https://example.com?special=%21%40%23%24%25",
+ ),
+ ]
+
+ for original, expected in test_cases:
+ with self.subTest(url=original):
+ result = normalize_url(original)
+ self.assertEqual(expected, result)
diff --git a/bookmarks/utils.py b/bookmarks/utils.py
index 27db7d4..da814fa 100644
--- a/bookmarks/utils.py
+++ b/bookmarks/utils.py
@@ -139,3 +139,49 @@ def generate_username(email, claims):
else:
username = email
return unicodedata.normalize("NFKC", username)[:150]
+
+
+def normalize_url(url: str) -> str:
+ if not url or not isinstance(url, str):
+ return ""
+
+ url = url.strip()
+ if not url:
+ return ""
+
+ try:
+ parsed = urllib.parse.urlparse(url)
+
+ # Normalize the scheme to lowercase
+ scheme = parsed.scheme.lower()
+
+ # Normalize the netloc (domain) to lowercase
+ netloc = parsed.hostname.lower() if parsed.hostname else ""
+ if parsed.port:
+ netloc += f":{parsed.port}"
+ if parsed.username:
+ auth = parsed.username
+ if parsed.password:
+ auth += f":{parsed.password}"
+ netloc = f"{auth}@{netloc}"
+
+ # Remove trailing slashes from all paths
+ path = parsed.path.rstrip("/") if parsed.path else ""
+
+ # Sort query parameters alphabetically
+ query = ""
+ if parsed.query:
+ query_params = urllib.parse.parse_qsl(parsed.query, keep_blank_values=True)
+ query_params.sort(key=lambda x: (x[0], x[1]))
+ query = urllib.parse.urlencode(query_params, quote_via=urllib.parse.quote)
+
+ # Keep fragment as-is
+ fragment = parsed.fragment
+
+ # Reconstruct the normalized URL
+ return urllib.parse.urlunparse(
+ (scheme, netloc, path, parsed.params, query, fragment)
+ )
+
+ except (ValueError, AttributeError):
+ return url