Automatically add tags to bookmarks based on URL pattern (#736)

* [WIP] DSL

* upd

* upd

* upd

* upd

* upd

* upd

* upd

* upd

* upd

* upd

* upd

* dsl2

* full feature

* upd

* upd

* upd

* upd

* rename to auto_tagging_rules

* update migration after rebase

* add REST API tests

* improve settings view

---------

Co-authored-by: Sascha Ißbrücker <sascha.issbruecker@gmail.com>
This commit is contained in:
Viacheslav Slinko
2024-05-17 10:39:46 +03:00
committed by GitHub
parent e03f536925
commit fa5f78cf71
9 changed files with 369 additions and 0 deletions

View File

@@ -0,0 +1,70 @@
from urllib.parse import urlparse, parse_qs
import re
import idna
def get_tags(script: str, url: str):
parsed_url = urlparse(url.lower())
result = set()
for line in script.lower().split("\n"):
if "#" in line:
i = line.index("#")
line = line[:i]
parts = line.split()
if len(parts) < 2:
continue
domain_pattern = re.sub("^https?://", "", parts[0])
path_pattern = None
qs_pattern = None
if "/" in domain_pattern:
i = domain_pattern.index("/")
path_pattern = domain_pattern[i:]
domain_pattern = domain_pattern[:i]
if path_pattern and "?" in path_pattern:
i = path_pattern.index("?")
qs_pattern = path_pattern[i + 1 :]
path_pattern = path_pattern[:i]
if not _domains_matches(domain_pattern, parsed_url.netloc):
continue
if path_pattern and not _path_matches(path_pattern, parsed_url.path):
continue
if qs_pattern and not _qs_matches(qs_pattern, parsed_url.query):
continue
for tag in parts[1:]:
result.add(tag)
return result
def _path_matches(expected_path: str, actual_path: str) -> bool:
return actual_path.startswith(expected_path)
def _domains_matches(expected_domain: str, actual_domain: str) -> bool:
expected_domain = idna.encode(expected_domain)
actual_domain = idna.encode(actual_domain)
return actual_domain.endswith(expected_domain)
def _qs_matches(expected_qs: str, actual_qs: str) -> bool:
expected_qs = parse_qs(expected_qs, keep_blank_values=True)
actual_qs = parse_qs(actual_qs, keep_blank_values=True)
for key in expected_qs:
if key not in actual_qs:
return False
for value in expected_qs[key]:
if value != "" and value not in actual_qs[key]:
return False
return True

View File

@@ -10,6 +10,7 @@ from django.utils import timezone
from bookmarks.models import Bookmark, BookmarkAsset, parse_tag_string
from bookmarks.services import tasks
from bookmarks.services import website_loader
from bookmarks.services import auto_tagging
from bookmarks.services.tags import get_or_create_tags
logger = logging.getLogger(__name__)
@@ -242,6 +243,15 @@ def _update_website_metadata(bookmark: Bookmark):
def _update_bookmark_tags(bookmark: Bookmark, tag_string: str, user: User):
tag_names = parse_tag_string(tag_string)
if user.profile.auto_tagging_rules:
auto_tag_names = auto_tagging.get_tags(
user.profile.auto_tagging_rules, bookmark.url
)
for auto_tag_name in auto_tag_names:
if auto_tag_name not in tag_names:
tag_names.append(auto_tag_name)
tags = get_or_create_tags(tag_names, user)
bookmark.tags.set(tags)