mirror of
https://github.com/sissbruecker/linkding.git
synced 2025-08-13 21:49:26 +02:00
Automatically add tags to bookmarks based on URL pattern (#736)
* [WIP] DSL * upd * upd * upd * upd * upd * upd * upd * upd * upd * upd * upd * dsl2 * full feature * upd * upd * upd * upd * rename to auto_tagging_rules * update migration after rebase * add REST API tests * improve settings view --------- Co-authored-by: Sascha Ißbrücker <sascha.issbruecker@gmail.com>
This commit is contained in:

committed by
GitHub

parent
e03f536925
commit
fa5f78cf71
70
bookmarks/services/auto_tagging.py
Normal file
70
bookmarks/services/auto_tagging.py
Normal file
@@ -0,0 +1,70 @@
|
||||
from urllib.parse import urlparse, parse_qs
|
||||
import re
|
||||
import idna
|
||||
|
||||
|
||||
def get_tags(script: str, url: str):
|
||||
parsed_url = urlparse(url.lower())
|
||||
result = set()
|
||||
|
||||
for line in script.lower().split("\n"):
|
||||
if "#" in line:
|
||||
i = line.index("#")
|
||||
line = line[:i]
|
||||
|
||||
parts = line.split()
|
||||
if len(parts) < 2:
|
||||
continue
|
||||
|
||||
domain_pattern = re.sub("^https?://", "", parts[0])
|
||||
path_pattern = None
|
||||
qs_pattern = None
|
||||
|
||||
if "/" in domain_pattern:
|
||||
i = domain_pattern.index("/")
|
||||
path_pattern = domain_pattern[i:]
|
||||
domain_pattern = domain_pattern[:i]
|
||||
|
||||
if path_pattern and "?" in path_pattern:
|
||||
i = path_pattern.index("?")
|
||||
qs_pattern = path_pattern[i + 1 :]
|
||||
path_pattern = path_pattern[:i]
|
||||
|
||||
if not _domains_matches(domain_pattern, parsed_url.netloc):
|
||||
continue
|
||||
|
||||
if path_pattern and not _path_matches(path_pattern, parsed_url.path):
|
||||
continue
|
||||
|
||||
if qs_pattern and not _qs_matches(qs_pattern, parsed_url.query):
|
||||
continue
|
||||
|
||||
for tag in parts[1:]:
|
||||
result.add(tag)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def _path_matches(expected_path: str, actual_path: str) -> bool:
|
||||
return actual_path.startswith(expected_path)
|
||||
|
||||
|
||||
def _domains_matches(expected_domain: str, actual_domain: str) -> bool:
|
||||
expected_domain = idna.encode(expected_domain)
|
||||
actual_domain = idna.encode(actual_domain)
|
||||
|
||||
return actual_domain.endswith(expected_domain)
|
||||
|
||||
|
||||
def _qs_matches(expected_qs: str, actual_qs: str) -> bool:
|
||||
expected_qs = parse_qs(expected_qs, keep_blank_values=True)
|
||||
actual_qs = parse_qs(actual_qs, keep_blank_values=True)
|
||||
|
||||
for key in expected_qs:
|
||||
if key not in actual_qs:
|
||||
return False
|
||||
for value in expected_qs[key]:
|
||||
if value != "" and value not in actual_qs[key]:
|
||||
return False
|
||||
|
||||
return True
|
@@ -10,6 +10,7 @@ from django.utils import timezone
|
||||
from bookmarks.models import Bookmark, BookmarkAsset, parse_tag_string
|
||||
from bookmarks.services import tasks
|
||||
from bookmarks.services import website_loader
|
||||
from bookmarks.services import auto_tagging
|
||||
from bookmarks.services.tags import get_or_create_tags
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -242,6 +243,15 @@ def _update_website_metadata(bookmark: Bookmark):
|
||||
|
||||
def _update_bookmark_tags(bookmark: Bookmark, tag_string: str, user: User):
|
||||
tag_names = parse_tag_string(tag_string)
|
||||
|
||||
if user.profile.auto_tagging_rules:
|
||||
auto_tag_names = auto_tagging.get_tags(
|
||||
user.profile.auto_tagging_rules, bookmark.url
|
||||
)
|
||||
for auto_tag_name in auto_tag_names:
|
||||
if auto_tag_name not in tag_names:
|
||||
tag_names.append(auto_tag_name)
|
||||
|
||||
tags = get_or_create_tags(tag_names, user)
|
||||
bookmark.tags.set(tags)
|
||||
|
||||
|
Reference in New Issue
Block a user