From 051bd39256450b91ac2f3a70111e0f145782f5b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sascha=20I=C3=9Fbr=C3=BCcker?= Date: Sun, 5 Oct 2025 12:51:08 +0200 Subject: [PATCH] Add new search engine that supports logical expressions (and, or, not) (#1198) * parser implementation * add support for quoted strings * add support for tags * ignore empty tags * implicit and * prepare query conversion by disabling tests * convert query logic * fix nested combined tag searches * simplify query logic * Add special keyword support to parser * Add special keyword support to query builder * Handle invalid queries in query builder * Notify user about invalid queries * Add helper to strip tags from search query * Make tag cloud show all tags from search query * Use new method for extracting tags * Add query for getting tags from search query * Get selected tags through specific context * Properly remove selected tags from complex queries * cleanup * Clarify bundle search terms * Add documentation draft * Improve adding tags to search query * Add option to switch back to the old search --- .../0049_userprofile_legacy_search.py | 18 + bookmarks/migrations/0050_new_search_toast.py | 34 + bookmarks/models.py | 3 +- bookmarks/queries.py | 200 ++- bookmarks/services/search_query_parser.py | 575 ++++++++ bookmarks/styles/theme/empty.css | 2 +- .../templates/bookmarks/bookmark_list.html | 14 +- .../templates/bookmarks/details/form.html | 4 +- .../templates/bookmarks/empty_bookmarks.html | 22 +- bookmarks/templates/bookmarks/tag_cloud.html | 6 +- bookmarks/templates/bundles/form.html | 4 +- bookmarks/templates/settings/general.html | 12 + bookmarks/templatetags/shared.py | 52 - .../tests/test_bookmarks_list_template.py | 49 + bookmarks/tests/test_pagination_tag.py | 8 +- bookmarks/tests/test_queries.py | 323 ++++- bookmarks/tests/test_search_query_parser.py | 1277 +++++++++++++++++ bookmarks/tests/test_settings_general_view.py | 3 + bookmarks/tests/test_tag_cloud_template.py | 78 + bookmarks/views/contexts.py | 199 ++- docs/astro.config.mjs | 1 + docs/src/content/docs/search.md | 74 + 22 files changed, 2831 insertions(+), 127 deletions(-) create mode 100644 bookmarks/migrations/0049_userprofile_legacy_search.py create mode 100644 bookmarks/migrations/0050_new_search_toast.py create mode 100644 bookmarks/services/search_query_parser.py create mode 100644 bookmarks/tests/test_search_query_parser.py create mode 100644 docs/src/content/docs/search.md diff --git a/bookmarks/migrations/0049_userprofile_legacy_search.py b/bookmarks/migrations/0049_userprofile_legacy_search.py new file mode 100644 index 0000000..84b0bb1 --- /dev/null +++ b/bookmarks/migrations/0049_userprofile_legacy_search.py @@ -0,0 +1,18 @@ +# Generated by Django 5.2.5 on 2025-10-05 09:10 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("bookmarks", "0048_userprofile_default_mark_shared"), + ] + + operations = [ + migrations.AddField( + model_name="userprofile", + name="legacy_search", + field=models.BooleanField(default=False), + ), + ] diff --git a/bookmarks/migrations/0050_new_search_toast.py b/bookmarks/migrations/0050_new_search_toast.py new file mode 100644 index 0000000..623e6c5 --- /dev/null +++ b/bookmarks/migrations/0050_new_search_toast.py @@ -0,0 +1,34 @@ +# Generated by Django 5.2.5 on 2025-10-05 10:01 + +from django.contrib.auth import get_user_model +from django.db import migrations + +from bookmarks.models import Toast + +User = get_user_model() + + +def forwards(apps, schema_editor): + + for user in User.objects.all(): + toast = Toast( + key="new_search_toast", + message="This version replaces the search engine with a new implementation that supports logical operators (and, or, not). If you run into any issues with the new search, you can switch back to the old one by enabling legacy search in the settings.", + owner=user, + ) + toast.save() + + +def reverse(apps, schema_editor): + Toast.objects.filter(key="new_search_toast").delete() + + +class Migration(migrations.Migration): + + dependencies = [ + ("bookmarks", "0049_userprofile_legacy_search"), + ] + + operations = [ + migrations.RunPython(forwards, reverse), + ] diff --git a/bookmarks/models.py b/bookmarks/models.py index af6c3cd..cb86d74 100644 --- a/bookmarks/models.py +++ b/bookmarks/models.py @@ -2,7 +2,6 @@ import binascii import hashlib import logging import os -from functools import cached_property from typing import List from django import forms @@ -486,6 +485,7 @@ class UserProfile(models.Model): sticky_pagination = models.BooleanField(default=False, null=False) collapse_side_panel = models.BooleanField(default=False, null=False) hide_bundles = models.BooleanField(default=False, null=False) + legacy_search = models.BooleanField(default=False, null=False) def save(self, *args, **kwargs): if self.custom_css: @@ -528,6 +528,7 @@ class UserProfileForm(forms.ModelForm): "sticky_pagination", "collapse_side_panel", "hide_bundles", + "legacy_search", ] diff --git a/bookmarks/queries.py b/bookmarks/queries.py index 5337b0b..e99c294 100644 --- a/bookmarks/queries.py +++ b/bookmarks/queries.py @@ -15,6 +15,18 @@ from bookmarks.models import ( UserProfile, parse_tag_string, ) +from bookmarks.services.search_query_parser import ( + parse_search_query, + SearchExpression, + TermExpression, + TagExpression, + SpecialKeywordExpression, + AndExpression, + OrExpression, + NotExpression, + SearchQueryParseError, + extract_tag_names_from_query, +) from bookmarks.utils import unique @@ -45,6 +57,122 @@ def query_shared_bookmarks( return _base_bookmarks_query(user, profile, search).filter(conditions) +def _convert_ast_to_q_object(ast_node: SearchExpression, profile: UserProfile) -> Q: + if isinstance(ast_node, TermExpression): + # Search across title, description, notes, URL + conditions = ( + Q(title__icontains=ast_node.term) + | Q(description__icontains=ast_node.term) + | Q(notes__icontains=ast_node.term) + | Q(url__icontains=ast_node.term) + ) + + # In lax mode, also search in tag names + if profile.tag_search == UserProfile.TAG_SEARCH_LAX: + conditions = conditions | Exists( + Bookmark.objects.filter( + id=OuterRef("id"), tags__name__iexact=ast_node.term + ) + ) + + return conditions + + elif isinstance(ast_node, TagExpression): + # Use Exists() to avoid reusing the same join when combining multiple tag expressions with and + return Q( + Exists( + Bookmark.objects.filter( + id=OuterRef("id"), tags__name__iexact=ast_node.tag + ) + ) + ) + + elif isinstance(ast_node, SpecialKeywordExpression): + # Handle special keywords + if ast_node.keyword.lower() == "unread": + return Q(unread=True) + elif ast_node.keyword.lower() == "untagged": + return Q(tags=None) + else: + # Unknown keyword, return empty Q object (matches all) + return Q() + + elif isinstance(ast_node, AndExpression): + # Combine left and right with AND + left_q = _convert_ast_to_q_object(ast_node.left, profile) + right_q = _convert_ast_to_q_object(ast_node.right, profile) + return left_q & right_q + + elif isinstance(ast_node, OrExpression): + # Combine left and right with OR + left_q = _convert_ast_to_q_object(ast_node.left, profile) + right_q = _convert_ast_to_q_object(ast_node.right, profile) + return left_q | right_q + + elif isinstance(ast_node, NotExpression): + # Negate the operand + operand_q = _convert_ast_to_q_object(ast_node.operand, profile) + return ~operand_q + + else: + # Fallback for unknown node types + return Q() + + +def _filter_search_query( + query_set: QuerySet, query_string: str, profile: UserProfile +) -> QuerySet: + """New search filtering logic using logical expressions.""" + + try: + ast = parse_search_query(query_string) + if ast: + search_query = _convert_ast_to_q_object(ast, profile) + query_set = query_set.filter(search_query) + except SearchQueryParseError: + # If the query cannot be parsed, return zero results + return query_set.none() + + return query_set + + +def _filter_search_query_legacy( + query_set: QuerySet, query_string: str, profile: UserProfile +) -> QuerySet: + """Legacy search filtering logic where everything is just combined with AND.""" + + # Split query into search terms and tags + query = parse_query_string(query_string) + + # Filter for search terms and tags + for term in query["search_terms"]: + conditions = ( + Q(title__icontains=term) + | Q(description__icontains=term) + | Q(notes__icontains=term) + | Q(url__icontains=term) + ) + + if profile.tag_search == UserProfile.TAG_SEARCH_LAX: + conditions = conditions | Exists( + Bookmark.objects.filter(id=OuterRef("id"), tags__name__iexact=term) + ) + + query_set = query_set.filter(conditions) + + for tag_name in query["tag_names"]: + query_set = query_set.filter(tags__name__iexact=tag_name) + + # Untagged bookmarks + if query["untagged"]: + query_set = query_set.filter(tags=None) + # Legacy unread bookmarks filter from query + if query["unread"]: + query_set = query_set.filter(unread=True) + + return query_set + + def _filter_bundle(query_set: QuerySet, bundle: BookmarkBundle) -> QuerySet: # Search terms search_terms = parse_query_string(bundle.search)["search_terms"] @@ -113,34 +241,11 @@ def _base_bookmarks_query( # If the date format is invalid, ignore the filter pass - # Split query into search terms and tags - query = parse_query_string(search.q) - - # Filter for search terms and tags - for term in query["search_terms"]: - conditions = ( - Q(title__icontains=term) - | Q(description__icontains=term) - | Q(notes__icontains=term) - | Q(url__icontains=term) - ) - - if profile.tag_search == UserProfile.TAG_SEARCH_LAX: - conditions = conditions | Exists( - Bookmark.objects.filter(id=OuterRef("id"), tags__name__iexact=term) - ) - - query_set = query_set.filter(conditions) - - for tag_name in query["tag_names"]: - query_set = query_set.filter(tags__name__iexact=tag_name) - - # Untagged bookmarks - if query["untagged"]: - query_set = query_set.filter(tags=None) - # Legacy unread bookmarks filter from query - if query["unread"]: - query_set = query_set.filter(unread=True) + # Filter by search query + if profile.legacy_search: + query_set = _filter_search_query_legacy(query_set, search.q, profile) + else: + query_set = _filter_search_query(query_set, search.q, profile) # Unread filter from bookmark search if search.unread == BookmarkSearch.FILTER_UNREAD_YES: @@ -241,6 +346,45 @@ def get_user_tags(user: User): return Tag.objects.filter(owner=user).all() +def get_tags_for_query(user: User, profile: UserProfile, query: str) -> QuerySet: + tag_names = extract_tag_names_from_query(query, profile) + + if not tag_names: + return Tag.objects.none() + + tag_conditions = Q() + for tag_name in tag_names: + tag_conditions |= Q(name__iexact=tag_name) + + return Tag.objects.filter(owner=user).filter(tag_conditions).distinct() + + +def get_shared_tags_for_query( + user: Optional[User], profile: UserProfile, query: str, public_only: bool +) -> QuerySet: + tag_names = extract_tag_names_from_query(query, profile) + + if not tag_names: + return Tag.objects.none() + + # Build conditions similar to query_shared_bookmarks + conditions = Q(bookmark__shared=True) & Q( + bookmark__owner__profile__enable_sharing=True + ) + if public_only: + conditions = conditions & Q( + bookmark__owner__profile__enable_public_sharing=True + ) + if user is not None: + conditions = conditions & Q(bookmark__owner=user) + + tag_conditions = Q() + for tag_name in tag_names: + tag_conditions |= Q(name__iexact=tag_name) + + return Tag.objects.filter(conditions).filter(tag_conditions).distinct() + + def parse_query_string(query_string): # Sanitize query params if not query_string: diff --git a/bookmarks/services/search_query_parser.py b/bookmarks/services/search_query_parser.py new file mode 100644 index 0000000..25415f4 --- /dev/null +++ b/bookmarks/services/search_query_parser.py @@ -0,0 +1,575 @@ +from dataclasses import dataclass +from enum import Enum +from typing import List, Optional + +from bookmarks.models import UserProfile + + +class TokenType(Enum): + TERM = "TERM" + TAG = "TAG" + SPECIAL_KEYWORD = "SPECIAL_KEYWORD" + AND = "AND" + OR = "OR" + NOT = "NOT" + LPAREN = "LPAREN" + RPAREN = "RPAREN" + EOF = "EOF" + + +@dataclass +class Token: + type: TokenType + value: str + position: int + + +class SearchQueryTokenizer: + def __init__(self, query: str): + self.query = query.strip() + self.position = 0 + self.current_char = self.query[0] if self.query else None + + def advance(self): + """Move to the next character in the query.""" + self.position += 1 + if self.position >= len(self.query): + self.current_char = None + else: + self.current_char = self.query[self.position] + + def skip_whitespace(self): + """Skip whitespace characters.""" + while self.current_char and self.current_char.isspace(): + self.advance() + + def read_term(self) -> str: + """Read a search term (sequence of non-whitespace, non-special characters).""" + term = "" + + while ( + self.current_char + and not self.current_char.isspace() + and self.current_char not in "()\"'#!" + ): + term += self.current_char + self.advance() + + return term + + def read_quoted_string(self, quote_char: str) -> str: + """Read a quoted string, handling escaped quotes.""" + content = "" + self.advance() # skip opening quote + + while self.current_char and self.current_char != quote_char: + if self.current_char == "\\": + # Handle escaped characters + self.advance() + if self.current_char: + if self.current_char == "n": + content += "\n" + elif self.current_char == "t": + content += "\t" + elif self.current_char == "r": + content += "\r" + elif self.current_char == "\\": + content += "\\" + elif self.current_char == quote_char: + content += quote_char + else: + # For any other escaped character, just include it as-is + content += self.current_char + self.advance() + else: + content += self.current_char + self.advance() + + if self.current_char == quote_char: + self.advance() # skip closing quote + else: + # Unclosed quote - we could raise an error here, but let's be lenient + # and treat it as if the quote was closed at the end + pass + + return content + + def read_tag(self) -> str: + """Read a tag (starts with # and continues until whitespace or special chars).""" + tag = "" + self.advance() # skip the # character + + while ( + self.current_char + and not self.current_char.isspace() + and self.current_char not in "()\"'" + ): + tag += self.current_char + self.advance() + + return tag + + def read_special_keyword(self) -> str: + """Read a special keyword (starts with ! and continues until whitespace or special chars).""" + keyword = "" + self.advance() # skip the ! character + + while ( + self.current_char + and not self.current_char.isspace() + and self.current_char not in "()\"'" + ): + keyword += self.current_char + self.advance() + + return keyword + + def tokenize(self) -> List[Token]: + """Convert the query string into a list of tokens.""" + tokens = [] + + while self.current_char: + self.skip_whitespace() + + if not self.current_char: + break + + start_pos = self.position + + if self.current_char == "(": + tokens.append(Token(TokenType.LPAREN, "(", start_pos)) + self.advance() + elif self.current_char == ")": + tokens.append(Token(TokenType.RPAREN, ")", start_pos)) + self.advance() + elif self.current_char in "\"'": + # Read a quoted string - always treated as a term + quote_char = self.current_char + term = self.read_quoted_string(quote_char) + tokens.append(Token(TokenType.TERM, term, start_pos)) + elif self.current_char == "#": + # Read a tag + tag = self.read_tag() + # Only add the tag token if it has content + if tag: + tokens.append(Token(TokenType.TAG, tag, start_pos)) + elif self.current_char == "!": + # Read a special keyword + keyword = self.read_special_keyword() + # Only add the keyword token if it has content + if keyword: + tokens.append(Token(TokenType.SPECIAL_KEYWORD, keyword, start_pos)) + else: + # Read a term and check if it's an operator + term = self.read_term() + term_lower = term.lower() + + if term_lower == "and": + tokens.append(Token(TokenType.AND, term, start_pos)) + elif term_lower == "or": + tokens.append(Token(TokenType.OR, term, start_pos)) + elif term_lower == "not": + tokens.append(Token(TokenType.NOT, term, start_pos)) + else: + tokens.append(Token(TokenType.TERM, term, start_pos)) + + tokens.append(Token(TokenType.EOF, "", len(self.query))) + return tokens + + +class SearchExpression: + pass + + +@dataclass +class TermExpression(SearchExpression): + term: str + + +@dataclass +class TagExpression(SearchExpression): + tag: str + + +@dataclass +class SpecialKeywordExpression(SearchExpression): + keyword: str + + +@dataclass +class AndExpression(SearchExpression): + left: SearchExpression + right: SearchExpression + + +@dataclass +class OrExpression(SearchExpression): + left: SearchExpression + right: SearchExpression + + +@dataclass +class NotExpression(SearchExpression): + operand: SearchExpression + + +class SearchQueryParseError(Exception): + def __init__(self, message: str, position: int): + self.message = message + self.position = position + super().__init__(f"{message} at position {position}") + + +class SearchQueryParser: + def __init__(self, tokens: List[Token]): + self.tokens = tokens + self.position = 0 + self.current_token = tokens[0] if tokens else Token(TokenType.EOF, "", 0) + + def advance(self): + """Move to the next token.""" + if self.position < len(self.tokens) - 1: + self.position += 1 + self.current_token = self.tokens[self.position] + + def consume(self, expected_type: TokenType) -> Token: + """Consume a token of the expected type or raise an error.""" + if self.current_token.type == expected_type: + token = self.current_token + self.advance() + return token + else: + raise SearchQueryParseError( + f"Expected {expected_type.value}, got {self.current_token.type.value}", + self.current_token.position, + ) + + def parse(self) -> Optional[SearchExpression]: + """Parse the tokens into an AST.""" + if not self.tokens or ( + len(self.tokens) == 1 and self.tokens[0].type == TokenType.EOF + ): + return None + + expr = self.parse_or_expression() + + if self.current_token.type != TokenType.EOF: + raise SearchQueryParseError( + f"Unexpected token {self.current_token.type.value}", + self.current_token.position, + ) + + return expr + + def parse_or_expression(self) -> SearchExpression: + """Parse OR expressions (lowest precedence).""" + left = self.parse_and_expression() + + while self.current_token.type == TokenType.OR: + self.advance() # consume OR + right = self.parse_and_expression() + left = OrExpression(left, right) + + return left + + def parse_and_expression(self) -> SearchExpression: + """Parse AND expressions (medium precedence), including implicit AND.""" + left = self.parse_not_expression() + + while self.current_token.type == TokenType.AND or self.current_token.type in [ + TokenType.TERM, + TokenType.TAG, + TokenType.SPECIAL_KEYWORD, + TokenType.LPAREN, + TokenType.NOT, + ]: + + if self.current_token.type == TokenType.AND: + self.advance() # consume explicit AND + # else: implicit AND (don't advance token) + + right = self.parse_not_expression() + left = AndExpression(left, right) + + return left + + def parse_not_expression(self) -> SearchExpression: + """Parse NOT expressions (high precedence).""" + if self.current_token.type == TokenType.NOT: + self.advance() # consume NOT + operand = self.parse_not_expression() # right associative + return NotExpression(operand) + + return self.parse_primary_expression() + + def parse_primary_expression(self) -> SearchExpression: + """Parse primary expressions (terms, tags, special keywords, and parenthesized expressions).""" + if self.current_token.type == TokenType.TERM: + term = self.current_token.value + self.advance() + return TermExpression(term) + elif self.current_token.type == TokenType.TAG: + tag = self.current_token.value + self.advance() + return TagExpression(tag) + elif self.current_token.type == TokenType.SPECIAL_KEYWORD: + keyword = self.current_token.value + self.advance() + return SpecialKeywordExpression(keyword) + elif self.current_token.type == TokenType.LPAREN: + self.advance() # consume ( + expr = self.parse_or_expression() + self.consume(TokenType.RPAREN) # consume ) + return expr + else: + raise SearchQueryParseError( + f"Unexpected token {self.current_token.type.value}", + self.current_token.position, + ) + + +def parse_search_query(query: str) -> Optional[SearchExpression]: + if not query or not query.strip(): + return None + + tokenizer = SearchQueryTokenizer(query) + tokens = tokenizer.tokenize() + parser = SearchQueryParser(tokens) + return parser.parse() + + +def _needs_parentheses(expr: SearchExpression, parent_type: type) -> bool: + if isinstance(expr, OrExpression) and parent_type == AndExpression: + return True + # AndExpression or OrExpression needs parentheses when inside NotExpression + if isinstance(expr, (AndExpression, OrExpression)) and parent_type == NotExpression: + return True + return False + + +def _is_simple_expression(expr: SearchExpression) -> bool: + """Check if an expression is simple (term, tag, or keyword).""" + return isinstance(expr, (TermExpression, TagExpression, SpecialKeywordExpression)) + + +def _expression_to_string(expr: SearchExpression, parent_type: type = None) -> str: + if isinstance(expr, TermExpression): + # Quote terms if they contain spaces or special characters + if " " in expr.term or any(c in expr.term for c in ["(", ")", '"', "'"]): + # Escape any quotes in the term + escaped = expr.term.replace("\\", "\\\\").replace('"', '\\"') + return f'"{escaped}"' + return expr.term + + elif isinstance(expr, TagExpression): + return f"#{expr.tag}" + + elif isinstance(expr, SpecialKeywordExpression): + return f"!{expr.keyword}" + + elif isinstance(expr, NotExpression): + # Don't pass parent type to children + operand_str = _expression_to_string(expr.operand, None) + # Add parentheses if the operand is a binary operation + if isinstance(expr.operand, (AndExpression, OrExpression)): + return f"not ({operand_str})" + return f"not {operand_str}" + + elif isinstance(expr, AndExpression): + # Don't pass parent type to children - they'll add their own parens only if needed + left_str = _expression_to_string(expr.left, None) + right_str = _expression_to_string(expr.right, None) + + # Add parentheses to children if needed for precedence + if _needs_parentheses(expr.left, AndExpression): + left_str = f"({left_str})" + if _needs_parentheses(expr.right, AndExpression): + right_str = f"({right_str})" + + result = f"{left_str} {right_str}" + + # Add outer parentheses if needed based on parent context + if parent_type and _needs_parentheses(expr, parent_type): + result = f"({result})" + + return result + + elif isinstance(expr, OrExpression): + # Don't pass parent type to children + left_str = _expression_to_string(expr.left, None) + right_str = _expression_to_string(expr.right, None) + + # OrExpression children don't need parentheses unless they're also OR (handled by recursion) + result = f"{left_str} or {right_str}" + + # Add outer parentheses if needed based on parent context + if parent_type and _needs_parentheses(expr, parent_type): + result = f"({result})" + + return result + + else: + raise ValueError(f"Unknown expression type: {type(expr)}") + + +def expression_to_string(expr: Optional[SearchExpression]) -> str: + if expr is None: + return "" + return _expression_to_string(expr) + + +def _strip_tag_from_expression( + expr: Optional[SearchExpression], tag_name: str, enable_lax_search: bool = False +) -> Optional[SearchExpression]: + if expr is None: + return None + + if isinstance(expr, TagExpression): + # Remove this tag if it matches + if expr.tag.lower() == tag_name.lower(): + return None + return expr + + elif isinstance(expr, TermExpression): + # In lax search mode, also remove terms that match the tag name + if enable_lax_search and expr.term.lower() == tag_name.lower(): + return None + return expr + + elif isinstance(expr, SpecialKeywordExpression): + # Keep special keywords as-is + return expr + + elif isinstance(expr, NotExpression): + # Recursively filter the operand + filtered_operand = _strip_tag_from_expression( + expr.operand, tag_name, enable_lax_search + ) + if filtered_operand is None: + # If the operand is removed, the whole NOT expression should be removed + return None + return NotExpression(filtered_operand) + + elif isinstance(expr, AndExpression): + # Recursively filter both sides + left = _strip_tag_from_expression(expr.left, tag_name, enable_lax_search) + right = _strip_tag_from_expression(expr.right, tag_name, enable_lax_search) + + # If both sides are removed, remove the AND expression + if left is None and right is None: + return None + # If one side is removed, return the other side + elif left is None: + return right + elif right is None: + return left + else: + return AndExpression(left, right) + + elif isinstance(expr, OrExpression): + # Recursively filter both sides + left = _strip_tag_from_expression(expr.left, tag_name, enable_lax_search) + right = _strip_tag_from_expression(expr.right, tag_name, enable_lax_search) + + # If both sides are removed, remove the OR expression + if left is None and right is None: + return None + # If one side is removed, return the other side + elif left is None: + return right + elif right is None: + return left + else: + return OrExpression(left, right) + + else: + # Unknown expression type, return as-is + return expr + + +def strip_tag_from_query( + query: str, tag_name: str, user_profile: UserProfile | None = None +) -> str: + try: + ast = parse_search_query(query) + except SearchQueryParseError: + return query + + if ast is None: + return "" + + # Determine if lax search is enabled + enable_lax_search = False + if user_profile is not None: + enable_lax_search = user_profile.tag_search == UserProfile.TAG_SEARCH_LAX + + # Strip the tag from the AST + filtered_ast = _strip_tag_from_expression(ast, tag_name, enable_lax_search) + + # Convert back to a query string + return expression_to_string(filtered_ast) + + +def _extract_tag_names_from_expression( + expr: Optional[SearchExpression], enable_lax_search: bool = False +) -> List[str]: + if expr is None: + return [] + + if isinstance(expr, TagExpression): + return [expr.tag] + + elif isinstance(expr, TermExpression): + # In lax search mode, terms are also considered tags + if enable_lax_search: + return [expr.term] + return [] + + elif isinstance(expr, SpecialKeywordExpression): + # Special keywords are not tags + return [] + + elif isinstance(expr, NotExpression): + # Recursively extract from the operand + return _extract_tag_names_from_expression(expr.operand, enable_lax_search) + + elif isinstance(expr, (AndExpression, OrExpression)): + # Recursively extract from both sides and combine + left_tags = _extract_tag_names_from_expression(expr.left, enable_lax_search) + right_tags = _extract_tag_names_from_expression(expr.right, enable_lax_search) + return left_tags + right_tags + + else: + # Unknown expression type + return [] + + +def extract_tag_names_from_query( + query: str, user_profile: UserProfile | None = None +) -> List[str]: + try: + ast = parse_search_query(query) + except SearchQueryParseError: + return [] + + if ast is None: + return [] + + # Determine if lax search is enabled + enable_lax_search = False + if user_profile is not None: + enable_lax_search = user_profile.tag_search == UserProfile.TAG_SEARCH_LAX + + # Extract tag names from the AST + tag_names = _extract_tag_names_from_expression(ast, enable_lax_search) + + # Deduplicate (case-insensitive) and sort + seen = set() + unique_tags = [] + for tag in tag_names: + tag_lower = tag.lower() + if tag_lower not in seen: + seen.add(tag_lower) + unique_tags.append(tag_lower) + + return sorted(unique_tags) diff --git a/bookmarks/styles/theme/empty.css b/bookmarks/styles/theme/empty.css index 3ebb2bd..580d5d1 100644 --- a/bookmarks/styles/theme/empty.css +++ b/bookmarks/styles/theme/empty.css @@ -4,7 +4,7 @@ border-radius: var(--border-radius); color: var(--secondary-text-color); text-align: center; - padding: var(--unit-16) var(--unit-8); + padding: var(--unit-8) var(--unit-8); .empty-icon { margin-bottom: var(--layout-spacing-lg); diff --git a/bookmarks/templates/bookmarks/bookmark_list.html b/bookmarks/templates/bookmarks/bookmark_list.html index 61beed5..7af15d4 100644 --- a/bookmarks/templates/bookmarks/bookmark_list.html +++ b/bookmarks/templates/bookmarks/bookmark_list.html @@ -36,14 +36,14 @@ {% endif %} {% if bookmark_list.description_display == 'inline' %}
- {% if bookmark_item.tag_names %} + {% if bookmark_item.tags %} - {% for tag_name in bookmark_item.tag_names %} - {{ tag_name|hash_tag }} + {% for tag in bookmark_item.tags %} + #{{ tag.name }} {% endfor %} {% endif %} - {% if bookmark_item.tag_names and bookmark_item.description %} | {% endif %} + {% if bookmark_item.tags and bookmark_item.description %} | {% endif %} {% if bookmark_item.description %} {{ bookmark_item.description }} {% endif %} @@ -52,10 +52,10 @@ {% if bookmark_item.description %}
{{ bookmark_item.description }}
{% endif %} - {% if bookmark_item.tag_names %} + {% if bookmark_item.tags %}
- {% for tag_name in bookmark_item.tag_names %} - {{ tag_name|hash_tag }} + {% for tag in bookmark_item.tags %} + #{{ tag.name }} {% endfor %}
{% endif %} diff --git a/bookmarks/templates/bookmarks/details/form.html b/bookmarks/templates/bookmarks/details/form.html index a2231bb..b6f29a0 100644 --- a/bookmarks/templates/bookmarks/details/form.html +++ b/bookmarks/templates/bookmarks/details/form.html @@ -84,8 +84,8 @@

Tags

- {% for tag_name in details.bookmark.tag_names %} - {{ tag_name|hash_tag }} + {% for tag in details.tags %} + #{{ tag.name }} {% endfor %}
diff --git a/bookmarks/templates/bookmarks/empty_bookmarks.html b/bookmarks/templates/bookmarks/empty_bookmarks.html index 02626db..19dece6 100644 --- a/bookmarks/templates/bookmarks/empty_bookmarks.html +++ b/bookmarks/templates/bookmarks/empty_bookmarks.html @@ -1,9 +1,17 @@
-

You have no bookmarks yet

-

- You can get started by adding bookmarks, - importing your existing bookmarks or configuring the - browser extension or the bookmarklet. -

+ {% if not bookmark_list.query_is_valid %} +

Invalid search query

+

+ The search query you entered is not valid. Common reasons are unclosed parentheses or a logical operator (AND, OR, + NOT) without operands. The error message from the parser is: "{{ bookmark_list.query_error_message }}". +

+ {% else %} +

You have no bookmarks yet

+

+ You can get started by adding bookmarks, + importing your existing bookmarks or configuring the + browser extension or the bookmarklet. +

+ {% endif %}
diff --git a/bookmarks/templates/bookmarks/tag_cloud.html b/bookmarks/templates/bookmarks/tag_cloud.html index 52a8ad2..cbf414a 100644 --- a/bookmarks/templates/bookmarks/tag_cloud.html +++ b/bookmarks/templates/bookmarks/tag_cloud.html @@ -4,7 +4,7 @@ {% if tag_cloud.has_selected_tags %}

{% for tag in tag_cloud.selected_tags %} - -{{ tag.name }} @@ -17,14 +17,14 @@ {% for tag in group.tags %} {# Highlight first char of first tag in group #} {% if forloop.counter == 1 %} - {{ tag.name|first_char }}{{ tag.name|remaining_chars:1 }} {% else %} {# Render remaining tags normally #} - {{ tag.name }} diff --git a/bookmarks/templates/bundles/form.html b/bookmarks/templates/bundles/form.html index 014a1ee..a41f64c 100644 --- a/bookmarks/templates/bundles/form.html +++ b/bookmarks/templates/bundles/form.html @@ -11,7 +11,7 @@

- + {{ form.search|add_class:"form-input"|attr:"autocomplete:off"|attr:"placeholder: " }} {% if form.search.errors %}
@@ -19,7 +19,7 @@
{% endif %}
- Search terms to match bookmarks in this bundle. + All of these search terms must be present in a bookmark to match.
diff --git a/bookmarks/templates/settings/general.html b/bookmarks/templates/settings/general.html index a79a2db..6fe0eac 100644 --- a/bookmarks/templates/settings/general.html +++ b/bookmarks/templates/settings/general.html @@ -158,6 +158,18 @@ result will also include bookmarks where a search term matches otherwise. +
+ +
+ Since version 1.44.0, linkding has a new search engine that supports logical expressions (and, or, not). + If you run into any issues with the new search, you can enable this option to temporarily switch back to the old search. + Please report any issues you encounter with the new search on GitHub so they can be addressed. + This option will be removed in a future version. +
+
{{ form.tag_grouping|add_class:"form-select width-25 width-sm-100" }} diff --git a/bookmarks/templatetags/shared.py b/bookmarks/templatetags/shared.py index b067232..c8a56d4 100644 --- a/bookmarks/templatetags/shared.py +++ b/bookmarks/templatetags/shared.py @@ -23,53 +23,6 @@ def update_query_string(context, **kwargs): return query.urlencode() -@register.simple_tag(takes_context=True) -def add_tag_to_query(context, tag_name: str): - params = context.request.GET.copy() - - # Append to or create query string - query_string = params.get("q", "") - query_string = (query_string + " #" + tag_name).strip() - params.setlist("q", [query_string]) - - # Remove details ID and page number - params.pop("details", None) - params.pop("page", None) - - return params.urlencode() - - -@register.simple_tag(takes_context=True) -def remove_tag_from_query(context, tag_name: str): - params = context.request.GET.copy() - if params.__contains__("q"): - # Split query string into parts - query_string = params.__getitem__("q") - query_parts = query_string.split() - # Remove tag with hash - tag_name_with_hash = "#" + tag_name - query_parts = [ - part - for part in query_parts - if str.lower(part) != str.lower(tag_name_with_hash) - ] - # When using lax tag search, also remove tag without hash - profile = context.request.user_profile - if profile.tag_search == UserProfile.TAG_SEARCH_LAX: - query_parts = [ - part for part in query_parts if str.lower(part) != str.lower(tag_name) - ] - # Rebuild query string - query_string = " ".join(query_parts) - params.__setitem__("q", query_string) - - # Remove details ID and page number - params.pop("details", None) - params.pop("page", None) - - return params.urlencode() - - @register.simple_tag(takes_context=True) def replace_query_param(context, **kwargs): query = context.request.GET.copy() @@ -82,11 +35,6 @@ def replace_query_param(context, **kwargs): return query.urlencode() -@register.filter(name="hash_tag") -def hash_tag(tag_name): - return "#" + tag_name - - @register.filter(name="first_char") def first_char(text): return text[0] diff --git a/bookmarks/tests/test_bookmarks_list_template.py b/bookmarks/tests/test_bookmarks_list_template.py index a95bd87..758ef72 100644 --- a/bookmarks/tests/test_bookmarks_list_template.py +++ b/bookmarks/tests/test_bookmarks_list_template.py @@ -476,6 +476,27 @@ class BookmarkListTemplateTest(TestCase, BookmarkFactoryMixin, HtmlTestMixin): self.assertEqual(tag_links[1].text, "#tag2") self.assertEqual(tag_links[2].text, "#tag3") + def test_bookmark_tag_query_string(self): + # appends tag to existing query string + bookmark = self.setup_bookmark(title="term1 term2") + tag1 = self.setup_tag(name="tag1") + bookmark.tags.add(tag1) + + html = self.render_template(url="/bookmarks?q=term1 and term2") + soup = self.make_soup(html) + tags = soup.select_one(".tags") + tag_links = tags.find_all("a") + self.assertEqual(len(tag_links), 1) + self.assertEqual(tag_links[0]["href"], "?q=term1+and+term2+%23tag1") + + # wraps or expression in parentheses + html = self.render_template(url="/bookmarks?q=term1 or term2") + soup = self.make_soup(html) + tags = soup.select_one(".tags") + tag_links = tags.find_all("a") + self.assertEqual(len(tag_links), 1) + self.assertEqual(tag_links[0]["href"], "?q=%28term1+or+term2%29+%23tag1") + def test_should_render_web_archive_link_with_absolute_date_setting(self): bookmark = self.setup_date_format_test( UserProfile.BOOKMARK_DATE_DISPLAY_ABSOLUTE, @@ -1017,6 +1038,34 @@ class BookmarkListTemplateTest(TestCase, BookmarkFactoryMixin, HtmlTestMixin): '

You have no bookmarks yet

', html ) + def test_empty_state_with_valid_query_no_results(self): + self.setup_bookmark(title="Test Bookmark") + html = self.render_template(url="/bookmarks?q=nonexistent") + + self.assertInHTML( + '

You have no bookmarks yet

', html + ) + + def test_empty_state_with_invalid_query(self): + self.setup_bookmark() + html = self.render_template(url="/bookmarks?q=(test") + + self.assertInHTML('

Invalid search query

', html) + self.assertIn("Expected RPAREN", html) + + def test_empty_state_with_legacy_search(self): + profile = self.get_or_create_test_user().profile + profile.legacy_search = True + profile.save() + + self.setup_bookmark() + html = self.render_template(url="/bookmarks?q=(test") + + # With legacy search, search queries are not validated + self.assertInHTML( + '

You have no bookmarks yet

', html + ) + def test_pagination_is_not_sticky_by_default(self): self.setup_bookmark() html = self.render_template() diff --git a/bookmarks/tests/test_pagination_tag.py b/bookmarks/tests/test_pagination_tag.py index 17e4925..1200240 100644 --- a/bookmarks/tests/test_pagination_tag.py +++ b/bookmarks/tests/test_pagination_tag.py @@ -192,10 +192,6 @@ class PaginationTagTest(TestCase, BookmarkFactoryMixin): 100, 10, 2, url="/test?details=1&page=2" ) self.assertPrevLink(rendered_template, 1, href="/test?page=1") - self.assertPageLink( - rendered_template, 1, False, href="/test?page=1" - ) - self.assertPageLink( - rendered_template, 2, True, href="/test?page=2" - ) + self.assertPageLink(rendered_template, 1, False, href="/test?page=1") + self.assertPageLink(rendered_template, 2, True, href="/test?page=2") self.assertNextLink(rendered_template, 3, href="/test?page=3") diff --git a/bookmarks/tests/test_queries.py b/bookmarks/tests/test_queries.py index 7516aa7..b3bb889 100644 --- a/bookmarks/tests/test_queries.py +++ b/bookmarks/tests/test_queries.py @@ -11,7 +11,7 @@ from bookmarks.tests.helpers import BookmarkFactoryMixin, random_sentence from bookmarks.utils import unique -class QueriesTestCase(TestCase, BookmarkFactoryMixin): +class QueriesBasicTestCase(TestCase, BookmarkFactoryMixin): def setUp(self): self.profile = self.get_or_create_test_user().profile @@ -1551,3 +1551,324 @@ class QueriesTestCase(TestCase, BookmarkFactoryMixin): None, self.profile, BookmarkSearch(q="", bundle=bundle), False ) self.assertQueryResult(query, [matching_bookmarks]) + + +# Legacy search should be covered by basic test suite which was effectively the +# full test suite before advanced search was introduced. +class QueriesLegacySearchTestCase(QueriesBasicTestCase): + def setUp(self): + super().setUp() + self.profile.legacy_search = True + self.profile.save() + + +class QueriesAdvancedSearchTestCase(TestCase, BookmarkFactoryMixin): + + def setUp(self): + self.user = self.get_or_create_test_user() + self.profile = self.user.profile + + self.python_bookmark = self.setup_bookmark( + title="Python Tutorial", + tags=[self.setup_tag(name="python"), self.setup_tag(name="tutorial")], + ) + self.java_bookmark = self.setup_bookmark( + title="Java Guide", + tags=[self.setup_tag(name="java"), self.setup_tag(name="programming")], + ) + self.deprecated_python_bookmark = self.setup_bookmark( + title="Old Python Guide", + tags=[self.setup_tag(name="python"), self.setup_tag(name="deprecated")], + ) + self.javascript_tutorial = self.setup_bookmark( + title="JavaScript Basics", + tags=[self.setup_tag(name="javascript"), self.setup_tag(name="tutorial")], + ) + self.web_development = self.setup_bookmark( + title="Web Development with React", + description="Modern web development", + tags=[self.setup_tag(name="react"), self.setup_tag(name="web")], + ) + + def test_explicit_and_operator(self): + search = BookmarkSearch(q="python AND tutorial") + query = queries.query_bookmarks(self.user, self.profile, search) + self.assertCountEqual(list(query), [self.python_bookmark]) + + def test_or_operator(self): + search = BookmarkSearch(q="#python OR #java") + query = queries.query_bookmarks(self.user, self.profile, search) + self.assertCountEqual( + list(query), + [self.python_bookmark, self.java_bookmark, self.deprecated_python_bookmark], + ) + + def test_not_operator(self): + search = BookmarkSearch(q="#python AND NOT #deprecated") + query = queries.query_bookmarks(self.user, self.profile, search) + self.assertCountEqual(list(query), [self.python_bookmark]) + + def test_implicit_and_between_terms(self): + search = BookmarkSearch(q="web development") + query = queries.query_bookmarks(self.user, self.profile, search) + self.assertCountEqual(list(query), [self.web_development]) + + search = BookmarkSearch(q="python tutorial") + query = queries.query_bookmarks(self.user, self.profile, search) + self.assertCountEqual(list(query), [self.python_bookmark]) + + def test_implicit_and_between_tags(self): + search = BookmarkSearch(q="#python #tutorial") + query = queries.query_bookmarks(self.user, self.profile, search) + self.assertCountEqual(list(query), [self.python_bookmark]) + + def test_nested_and_expression(self): + search = BookmarkSearch(q="nonexistingterm OR (#python AND #tutorial)") + query = queries.query_bookmarks(self.user, self.profile, search) + self.assertCountEqual(list(query), [self.python_bookmark]) + + search = BookmarkSearch( + q="(#javascript AND #tutorial) OR (#python AND #tutorial)" + ) + query = queries.query_bookmarks(self.user, self.profile, search) + self.assertCountEqual( + list(query), [self.javascript_tutorial, self.python_bookmark] + ) + + def test_mixed_terms_and_tags_with_operators(self): + # Set lax mode to allow term matching against tags + self.profile.tag_search = self.profile.TAG_SEARCH_LAX + self.profile.save() + + search = BookmarkSearch(q="(tutorial OR guide) AND #python") + query = queries.query_bookmarks(self.user, self.profile, search) + self.assertCountEqual( + list(query), [self.python_bookmark, self.deprecated_python_bookmark] + ) + + def test_parentheses(self): + # Set lax mode to allow term matching against tags + self.profile.tag_search = self.profile.TAG_SEARCH_LAX + self.profile.save() + + # Without parentheses + search = BookmarkSearch(q="python AND tutorial OR javascript AND tutorial") + query = queries.query_bookmarks(self.user, self.profile, search) + self.assertCountEqual( + list(query), [self.python_bookmark, self.javascript_tutorial] + ) + + # With parentheses + search = BookmarkSearch(q="(python OR javascript) AND tutorial") + query = queries.query_bookmarks(self.user, self.profile, search) + self.assertCountEqual( + list(query), [self.python_bookmark, self.javascript_tutorial] + ) + + def test_complex_query_with_all_operators(self): + # Set lax mode to allow term matching against tags + self.profile.tag_search = self.profile.TAG_SEARCH_LAX + self.profile.save() + + search = BookmarkSearch( + q="(#python OR #javascript) AND tutorial AND NOT #deprecated" + ) + query = queries.query_bookmarks(self.user, self.profile, search) + self.assertCountEqual( + list(query), [self.python_bookmark, self.javascript_tutorial] + ) + + def test_quoted_strings_with_operators(self): + # Set lax mode to allow term matching against tags + self.profile.tag_search = self.profile.TAG_SEARCH_LAX + self.profile.save() + + search = BookmarkSearch(q='"Web Development" OR tutorial') + query = queries.query_bookmarks(self.user, self.profile, search) + self.assertCountEqual( + list(query), + [self.web_development, self.python_bookmark, self.javascript_tutorial], + ) + + def test_implicit_and_with_quoted_strings(self): + search = BookmarkSearch(q='"Web Development" react') + query = queries.query_bookmarks(self.user, self.profile, search) + self.assertCountEqual(list(query), [self.web_development]) + + def test_empty_query(self): + # empty query returns all bookmarks + search = BookmarkSearch(q="") + query = queries.query_bookmarks(self.user, self.profile, search) + expected = [ + self.python_bookmark, + self.java_bookmark, + self.deprecated_python_bookmark, + self.javascript_tutorial, + self.web_development, + ] + self.assertCountEqual(list(query), expected) + + def test_unparseable_query_returns_no_results(self): + # Use a query that causes a parse error (unclosed parenthesis) + search = BookmarkSearch(q="(python AND tutorial") + query = queries.query_bookmarks(self.user, self.profile, search) + self.assertCountEqual(list(query), []) + + +class GetTagsForQueryTestCase(TestCase, BookmarkFactoryMixin): + def setUp(self): + self.user = self.get_or_create_test_user() + self.profile = self.user.profile + + def test_returns_tags_matching_query(self): + python_tag = self.setup_tag(name="python") + django_tag = self.setup_tag(name="django") + self.setup_tag(name="unused") + + result = queries.get_tags_for_query( + self.user, self.profile, "#python and #django" + ) + self.assertCountEqual(list(result), [python_tag, django_tag]) + + def test_case_insensitive_matching(self): + python_tag = self.setup_tag(name="Python") + + result = queries.get_tags_for_query(self.user, self.profile, "#python") + self.assertCountEqual(list(result), [python_tag]) + + # having two tags with the same name returns both for now + other_python_tag = self.setup_tag(name="python") + + result = queries.get_tags_for_query(self.user, self.profile, "#python") + self.assertCountEqual(list(result), [python_tag, other_python_tag]) + + def test_lax_mode_includes_terms(self): + python_tag = self.setup_tag(name="python") + django_tag = self.setup_tag(name="django") + + self.profile.tag_search = UserProfile.TAG_SEARCH_LAX + self.profile.save() + + result = queries.get_tags_for_query( + self.user, self.profile, "#python and django" + ) + self.assertCountEqual(list(result), [python_tag, django_tag]) + + def test_strict_mode_excludes_terms(self): + python_tag = self.setup_tag(name="python") + self.setup_tag(name="django") + + result = queries.get_tags_for_query( + self.user, self.profile, "#python and django" + ) + self.assertCountEqual(list(result), [python_tag]) + + def test_only_returns_user_tags(self): + python_tag = self.setup_tag(name="python") + + other_user = self.setup_user() + other_python = self.setup_tag(name="python", user=other_user) + other_django = self.setup_tag(name="django", user=other_user) + + result = queries.get_tags_for_query( + self.user, self.profile, "#python and #django" + ) + self.assertCountEqual(list(result), [python_tag]) + self.assertNotIn(other_python, list(result)) + self.assertNotIn(other_django, list(result)) + + def test_empty_query_returns_no_tags(self): + self.setup_tag(name="python") + + result = queries.get_tags_for_query(self.user, self.profile, "") + self.assertCountEqual(list(result), []) + + def test_query_with_no_tags_returns_empty(self): + self.setup_tag(name="python") + + result = queries.get_tags_for_query(self.user, self.profile, "!unread") + self.assertCountEqual(list(result), []) + + def test_nonexistent_tag_returns_empty(self): + self.setup_tag(name="python") + + result = queries.get_tags_for_query(self.user, self.profile, "#ruby") + self.assertCountEqual(list(result), []) + + +class GetSharedTagsForQueryTestCase(TestCase, BookmarkFactoryMixin): + def setUp(self): + self.user = self.get_or_create_test_user() + self.profile = self.user.profile + self.profile.enable_sharing = True + self.profile.save() + + def test_returns_tags_from_shared_bookmarks(self): + python_tag = self.setup_tag(name="python") + self.setup_tag(name="django") + self.setup_bookmark(shared=True, tags=[python_tag]) + + result = queries.get_shared_tags_for_query( + None, self.profile, "#python and #django", public_only=False + ) + self.assertCountEqual(list(result), [python_tag]) + + def test_excludes_tags_from_non_shared_bookmarks(self): + python_tag = self.setup_tag(name="python") + self.setup_tag(name="django") + self.setup_bookmark(shared=False, tags=[python_tag]) + + result = queries.get_shared_tags_for_query( + None, self.profile, "#python and #django", public_only=False + ) + self.assertCountEqual(list(result), []) + + def test_respects_sharing_enabled_setting(self): + self.profile.enable_sharing = False + self.profile.save() + + python_tag = self.setup_tag(name="python") + self.setup_tag(name="django") + self.setup_bookmark(shared=True, tags=[python_tag]) + + result = queries.get_shared_tags_for_query( + None, self.profile, "#python and #django", public_only=False + ) + self.assertCountEqual(list(result), []) + + def test_public_only_flag(self): + # public sharing disabled + python_tag = self.setup_tag(name="python") + self.setup_tag(name="django") + self.setup_bookmark(shared=True, tags=[python_tag]) + + result = queries.get_shared_tags_for_query( + None, self.profile, "#python and #django", public_only=True + ) + self.assertCountEqual(list(result), []) + + # public sharing enabled + self.profile.enable_public_sharing = True + self.profile.save() + + result = queries.get_shared_tags_for_query( + None, self.profile, "#python and #django", public_only=True + ) + self.assertCountEqual(list(result), [python_tag]) + + def test_filters_by_user(self): + python_tag = self.setup_tag(name="python") + self.setup_tag(name="django") + self.setup_bookmark(shared=True, tags=[python_tag]) + + other_user = self.setup_user() + other_user.profile.enable_sharing = True + other_user.profile.save() + other_tag = self.setup_tag(name="python", user=other_user) + self.setup_bookmark(shared=True, tags=[other_tag], user=other_user) + + result = queries.get_shared_tags_for_query( + self.user, self.profile, "#python and #django", public_only=False + ) + self.assertCountEqual(list(result), [python_tag]) + self.assertNotIn(other_tag, list(result)) diff --git a/bookmarks/tests/test_search_query_parser.py b/bookmarks/tests/test_search_query_parser.py new file mode 100644 index 0000000..9041899 --- /dev/null +++ b/bookmarks/tests/test_search_query_parser.py @@ -0,0 +1,1277 @@ +from django.test import TestCase + +from bookmarks.services.search_query_parser import ( + SearchQueryTokenizer, + TokenType, + SearchExpression, + TermExpression, + TagExpression, + SpecialKeywordExpression, + AndExpression, + OrExpression, + NotExpression, + SearchQueryParseError, + parse_search_query, + expression_to_string, + strip_tag_from_query, + extract_tag_names_from_query, +) +from bookmarks.models import UserProfile + + +def _term(term: str) -> TermExpression: + return TermExpression(term) + + +def _tag(tag: str) -> TagExpression: + return TagExpression(tag) + + +def _and(left: SearchExpression, right: SearchExpression) -> AndExpression: + return AndExpression(left, right) + + +def _or(left: SearchExpression, right: SearchExpression) -> OrExpression: + return OrExpression(left, right) + + +def _not(operand: SearchExpression) -> NotExpression: + return NotExpression(operand) + + +def _keyword(keyword: str) -> SpecialKeywordExpression: + return SpecialKeywordExpression(keyword) + + +class SearchQueryTokenizerTest(TestCase): + def test_empty_query(self): + tokenizer = SearchQueryTokenizer("") + tokens = tokenizer.tokenize() + self.assertEqual(len(tokens), 1) + self.assertEqual(tokens[0].type, TokenType.EOF) + + def test_whitespace_only_query(self): + tokenizer = SearchQueryTokenizer(" ") + tokens = tokenizer.tokenize() + self.assertEqual(len(tokens), 1) + self.assertEqual(tokens[0].type, TokenType.EOF) + + def test_single_term(self): + tokenizer = SearchQueryTokenizer("programming") + tokens = tokenizer.tokenize() + self.assertEqual(len(tokens), 2) + self.assertEqual(tokens[0].type, TokenType.TERM) + self.assertEqual(tokens[0].value, "programming") + self.assertEqual(tokens[1].type, TokenType.EOF) + + def test_multiple_terms(self): + tokenizer = SearchQueryTokenizer("programming books streaming") + tokens = tokenizer.tokenize() + self.assertEqual(len(tokens), 4) + self.assertEqual(tokens[0].type, TokenType.TERM) + self.assertEqual(tokens[0].value, "programming") + self.assertEqual(tokens[1].type, TokenType.TERM) + self.assertEqual(tokens[1].value, "books") + self.assertEqual(tokens[2].type, TokenType.TERM) + self.assertEqual(tokens[2].value, "streaming") + self.assertEqual(tokens[3].type, TokenType.EOF) + + def test_hyphenated_term(self): + tokenizer = SearchQueryTokenizer("client-side") + tokens = tokenizer.tokenize() + self.assertEqual(len(tokens), 2) + self.assertEqual(tokens[0].type, TokenType.TERM) + self.assertEqual(tokens[0].value, "client-side") + self.assertEqual(tokens[1].type, TokenType.EOF) + + def test_and_operator(self): + tokenizer = SearchQueryTokenizer("programming and books") + tokens = tokenizer.tokenize() + self.assertEqual(len(tokens), 4) + self.assertEqual(tokens[0].type, TokenType.TERM) + self.assertEqual(tokens[0].value, "programming") + self.assertEqual(tokens[1].type, TokenType.AND) + self.assertEqual(tokens[1].value, "and") + self.assertEqual(tokens[2].type, TokenType.TERM) + self.assertEqual(tokens[2].value, "books") + self.assertEqual(tokens[3].type, TokenType.EOF) + + def test_or_operator(self): + tokenizer = SearchQueryTokenizer("programming or books") + tokens = tokenizer.tokenize() + self.assertEqual(len(tokens), 4) + self.assertEqual(tokens[0].type, TokenType.TERM) + self.assertEqual(tokens[0].value, "programming") + self.assertEqual(tokens[1].type, TokenType.OR) + self.assertEqual(tokens[1].value, "or") + self.assertEqual(tokens[2].type, TokenType.TERM) + self.assertEqual(tokens[2].value, "books") + self.assertEqual(tokens[3].type, TokenType.EOF) + + def test_not_operator(self): + tokenizer = SearchQueryTokenizer("programming not books") + tokens = tokenizer.tokenize() + self.assertEqual(len(tokens), 4) + self.assertEqual(tokens[0].type, TokenType.TERM) + self.assertEqual(tokens[0].value, "programming") + self.assertEqual(tokens[1].type, TokenType.NOT) + self.assertEqual(tokens[1].value, "not") + self.assertEqual(tokens[2].type, TokenType.TERM) + self.assertEqual(tokens[2].value, "books") + self.assertEqual(tokens[3].type, TokenType.EOF) + + def test_case_insensitive_operators(self): + tokenizer = SearchQueryTokenizer( + "programming AND books OR streaming NOT videos" + ) + tokens = tokenizer.tokenize() + self.assertEqual(len(tokens), 8) + self.assertEqual(tokens[1].type, TokenType.AND) + self.assertEqual(tokens[3].type, TokenType.OR) + self.assertEqual(tokens[5].type, TokenType.NOT) + + def test_parentheses(self): + tokenizer = SearchQueryTokenizer("(programming or books) and streaming") + tokens = tokenizer.tokenize() + self.assertEqual(len(tokens), 8) + self.assertEqual(tokens[0].type, TokenType.LPAREN) + self.assertEqual(tokens[1].type, TokenType.TERM) + self.assertEqual(tokens[1].value, "programming") + self.assertEqual(tokens[2].type, TokenType.OR) + self.assertEqual(tokens[3].type, TokenType.TERM) + self.assertEqual(tokens[3].value, "books") + self.assertEqual(tokens[4].type, TokenType.RPAREN) + self.assertEqual(tokens[5].type, TokenType.AND) + self.assertEqual(tokens[6].type, TokenType.TERM) + self.assertEqual(tokens[6].value, "streaming") + self.assertEqual(tokens[7].type, TokenType.EOF) + + def test_operator_as_part_of_term(self): + # Terms containing operator words should be treated as terms + tokenizer = SearchQueryTokenizer("android notarization") + tokens = tokenizer.tokenize() + self.assertEqual(len(tokens), 3) + self.assertEqual(tokens[0].type, TokenType.TERM) + self.assertEqual(tokens[0].value, "android") + self.assertEqual(tokens[1].type, TokenType.TERM) + self.assertEqual(tokens[1].value, "notarization") + self.assertEqual(tokens[2].type, TokenType.EOF) + + def test_extra_whitespace(self): + tokenizer = SearchQueryTokenizer(" programming and books ") + tokens = tokenizer.tokenize() + self.assertEqual(len(tokens), 4) + self.assertEqual(tokens[0].type, TokenType.TERM) + self.assertEqual(tokens[0].value, "programming") + self.assertEqual(tokens[1].type, TokenType.AND) + self.assertEqual(tokens[2].type, TokenType.TERM) + self.assertEqual(tokens[2].value, "books") + self.assertEqual(tokens[3].type, TokenType.EOF) + + def test_quoted_strings(self): + # Double quotes + tokenizer = SearchQueryTokenizer('"good and bad"') + tokens = tokenizer.tokenize() + self.assertEqual(len(tokens), 2) + self.assertEqual(tokens[0].type, TokenType.TERM) + self.assertEqual(tokens[0].value, "good and bad") + self.assertEqual(tokens[1].type, TokenType.EOF) + + # Single quotes + tokenizer = SearchQueryTokenizer("'hello world'") + tokens = tokenizer.tokenize() + self.assertEqual(len(tokens), 2) + self.assertEqual(tokens[0].type, TokenType.TERM) + self.assertEqual(tokens[0].value, "hello world") + self.assertEqual(tokens[1].type, TokenType.EOF) + + def test_quoted_strings_with_operators(self): + tokenizer = SearchQueryTokenizer('"good and bad" or programming') + tokens = tokenizer.tokenize() + self.assertEqual(len(tokens), 4) + self.assertEqual(tokens[0].type, TokenType.TERM) + self.assertEqual(tokens[0].value, "good and bad") + self.assertEqual(tokens[1].type, TokenType.OR) + self.assertEqual(tokens[2].type, TokenType.TERM) + self.assertEqual(tokens[2].value, "programming") + self.assertEqual(tokens[3].type, TokenType.EOF) + + def test_escaped_quotes(self): + # Escaped double quote within double quotes + tokenizer = SearchQueryTokenizer('"say \\"hello\\""') + tokens = tokenizer.tokenize() + self.assertEqual(len(tokens), 2) + self.assertEqual(tokens[0].type, TokenType.TERM) + self.assertEqual(tokens[0].value, 'say "hello"') + self.assertEqual(tokens[1].type, TokenType.EOF) + + # Escaped single quote within single quotes + tokenizer = SearchQueryTokenizer("'don\\'t worry'") + tokens = tokenizer.tokenize() + self.assertEqual(len(tokens), 2) + self.assertEqual(tokens[0].type, TokenType.TERM) + self.assertEqual(tokens[0].value, "don't worry") + self.assertEqual(tokens[1].type, TokenType.EOF) + + def test_unclosed_quotes(self): + # Unclosed quote should be handled gracefully + tokenizer = SearchQueryTokenizer('"unclosed quote') + tokens = tokenizer.tokenize() + self.assertEqual(len(tokens), 2) + self.assertEqual(tokens[0].type, TokenType.TERM) + self.assertEqual(tokens[0].value, "unclosed quote") + self.assertEqual(tokens[1].type, TokenType.EOF) + + def test_tags(self): + # Basic tag + tokenizer = SearchQueryTokenizer("#python") + tokens = tokenizer.tokenize() + self.assertEqual(len(tokens), 2) + self.assertEqual(tokens[0].type, TokenType.TAG) + self.assertEqual(tokens[0].value, "python") + self.assertEqual(tokens[1].type, TokenType.EOF) + + # Tag with hyphens + tokenizer = SearchQueryTokenizer("#machine-learning") + tokens = tokenizer.tokenize() + self.assertEqual(len(tokens), 2) + self.assertEqual(tokens[0].type, TokenType.TAG) + self.assertEqual(tokens[0].value, "machine-learning") + self.assertEqual(tokens[1].type, TokenType.EOF) + + def test_tags_with_operators(self): + tokenizer = SearchQueryTokenizer("#python and #django") + tokens = tokenizer.tokenize() + self.assertEqual(len(tokens), 4) + self.assertEqual(tokens[0].type, TokenType.TAG) + self.assertEqual(tokens[0].value, "python") + self.assertEqual(tokens[1].type, TokenType.AND) + self.assertEqual(tokens[2].type, TokenType.TAG) + self.assertEqual(tokens[2].value, "django") + self.assertEqual(tokens[3].type, TokenType.EOF) + + def test_tags_mixed_with_terms(self): + tokenizer = SearchQueryTokenizer("programming and #python and web") + tokens = tokenizer.tokenize() + self.assertEqual(len(tokens), 6) + self.assertEqual(tokens[0].type, TokenType.TERM) + self.assertEqual(tokens[0].value, "programming") + self.assertEqual(tokens[1].type, TokenType.AND) + self.assertEqual(tokens[2].type, TokenType.TAG) + self.assertEqual(tokens[2].value, "python") + self.assertEqual(tokens[3].type, TokenType.AND) + self.assertEqual(tokens[4].type, TokenType.TERM) + self.assertEqual(tokens[4].value, "web") + self.assertEqual(tokens[5].type, TokenType.EOF) + + def test_empty_tag(self): + # Tag with just # should be ignored (no token created) + tokenizer = SearchQueryTokenizer("# ") + tokens = tokenizer.tokenize() + self.assertEqual(len(tokens), 1) + self.assertEqual(tokens[0].type, TokenType.EOF) + + # Empty tag at end of string + tokenizer = SearchQueryTokenizer("#") + tokens = tokenizer.tokenize() + self.assertEqual(len(tokens), 1) + self.assertEqual(tokens[0].type, TokenType.EOF) + + # Empty tag mixed with other terms + tokenizer = SearchQueryTokenizer("python # and django") + tokens = tokenizer.tokenize() + self.assertEqual(len(tokens), 4) + self.assertEqual(tokens[0].type, TokenType.TERM) + self.assertEqual(tokens[0].value, "python") + self.assertEqual(tokens[1].type, TokenType.AND) + self.assertEqual(tokens[2].type, TokenType.TERM) + self.assertEqual(tokens[2].value, "django") + self.assertEqual(tokens[3].type, TokenType.EOF) + + def test_special_keywords(self): + tokenizer = SearchQueryTokenizer("!unread") + tokens = tokenizer.tokenize() + self.assertEqual(len(tokens), 2) + self.assertEqual(tokens[0].type, TokenType.SPECIAL_KEYWORD) + self.assertEqual(tokens[0].value, "unread") + self.assertEqual(tokens[1].type, TokenType.EOF) + + tokenizer = SearchQueryTokenizer("!untagged") + tokens = tokenizer.tokenize() + self.assertEqual(len(tokens), 2) + self.assertEqual(tokens[0].type, TokenType.SPECIAL_KEYWORD) + self.assertEqual(tokens[0].value, "untagged") + self.assertEqual(tokens[1].type, TokenType.EOF) + + def test_special_keywords_with_operators(self): + tokenizer = SearchQueryTokenizer("!unread and !untagged") + tokens = tokenizer.tokenize() + self.assertEqual(len(tokens), 4) + self.assertEqual(tokens[0].type, TokenType.SPECIAL_KEYWORD) + self.assertEqual(tokens[0].value, "unread") + self.assertEqual(tokens[1].type, TokenType.AND) + self.assertEqual(tokens[2].type, TokenType.SPECIAL_KEYWORD) + self.assertEqual(tokens[2].value, "untagged") + self.assertEqual(tokens[3].type, TokenType.EOF) + + def test_special_keywords_mixed_with_terms_and_tags(self): + tokenizer = SearchQueryTokenizer("!unread and #python and tutorial") + tokens = tokenizer.tokenize() + self.assertEqual(len(tokens), 6) + self.assertEqual(tokens[0].type, TokenType.SPECIAL_KEYWORD) + self.assertEqual(tokens[0].value, "unread") + self.assertEqual(tokens[1].type, TokenType.AND) + self.assertEqual(tokens[2].type, TokenType.TAG) + self.assertEqual(tokens[2].value, "python") + self.assertEqual(tokens[3].type, TokenType.AND) + self.assertEqual(tokens[4].type, TokenType.TERM) + self.assertEqual(tokens[4].value, "tutorial") + self.assertEqual(tokens[5].type, TokenType.EOF) + + def test_empty_special_keyword(self): + # Special keyword with just ! should be ignored (no token created) + tokenizer = SearchQueryTokenizer("! ") + tokens = tokenizer.tokenize() + self.assertEqual(len(tokens), 1) + self.assertEqual(tokens[0].type, TokenType.EOF) + + # Empty special keyword at end of string + tokenizer = SearchQueryTokenizer("!") + tokens = tokenizer.tokenize() + self.assertEqual(len(tokens), 1) + self.assertEqual(tokens[0].type, TokenType.EOF) + + +class SearchQueryParserTest(TestCase): + """Test cases for the search query parser.""" + + def test_empty_query(self): + result = parse_search_query("") + self.assertIsNone(result) + + def test_whitespace_only_query(self): + result = parse_search_query(" ") + self.assertIsNone(result) + + def test_single_term(self): + result = parse_search_query("programming") + expected = _term("programming") + self.assertEqual(result, expected) + + def test_and_expression(self): + result = parse_search_query("programming and books") + expected = _and(_term("programming"), _term("books")) + self.assertEqual(result, expected) + + def test_or_expression(self): + result = parse_search_query("programming or books") + expected = _or(_term("programming"), _term("books")) + self.assertEqual(result, expected) + + def test_not_expression(self): + result = parse_search_query("not programming") + expected = _not(_term("programming")) + self.assertEqual(result, expected) + + def test_operator_precedence_and_over_or(self): + # "a or b and c" should parse as "a or (b and c)" + result = parse_search_query("programming or books and streaming") + expected = _or(_term("programming"), _and(_term("books"), _term("streaming"))) + self.assertEqual(result, expected) + + def test_operator_precedence_not_over_and(self): + # "not a and b" should parse as "(not a) and b" + result = parse_search_query("not programming and books") + expected = _and(_not(_term("programming")), _term("books")) + self.assertEqual(result, expected) + + def test_multiple_and_operators(self): + # "a and b and c" should parse as "(a and b) and c" (left associative) + result = parse_search_query("programming and books and streaming") + expected = _and(_and(_term("programming"), _term("books")), _term("streaming")) + self.assertEqual(result, expected) + + def test_multiple_or_operators(self): + # "a or b or c" should parse as "(a or b) or c" (left associative) + result = parse_search_query("programming or books or streaming") + expected = _or(_or(_term("programming"), _term("books")), _term("streaming")) + self.assertEqual(result, expected) + + def test_multiple_not_operators(self): + result = parse_search_query("not not programming") + expected = _not(_not(_term("programming"))) + self.assertEqual(result, expected) + + def test_parentheses_basic(self): + result = parse_search_query("(programming)") + expected = _term("programming") + self.assertEqual(result, expected) + + def test_parentheses_change_precedence(self): + # "(a or b) and c" should parse as "(a or b) and c" + result = parse_search_query("(programming or books) and streaming") + expected = _and(_or(_term("programming"), _term("books")), _term("streaming")) + self.assertEqual(result, expected) + + def test_nested_parentheses(self): + result = parse_search_query("((programming))") + expected = _term("programming") + self.assertEqual(result, expected) + + def test_complex_expression(self): + result = parse_search_query( + "programming and (books or streaming) and not client-side" + ) + # Should be parsed as "(programming and (books or streaming)) and (not client-side)" + expected = _and( + _and(_term("programming"), _or(_term("books"), _term("streaming"))), + _not(_term("client-side")), + ) + self.assertEqual(result, expected) + + def test_hyphenated_terms(self): + result = parse_search_query("client-side") + expected = _term("client-side") + self.assertEqual(result, expected) + + def test_case_insensitive_operators(self): + result = parse_search_query("programming AND books OR streaming") + expected = _or(_and(_term("programming"), _term("books")), _term("streaming")) + self.assertEqual(result, expected) + + # Test implicit AND with NOT + result = parse_search_query("programming AND books OR streaming NOT videos") + expected = _or( + _and(_term("programming"), _term("books")), + _and(_term("streaming"), _not(_term("videos"))), + ) + self.assertEqual(result, expected) + + def test_case_insensitive_operators_with_explicit_operators(self): + result = parse_search_query("programming AND books OR streaming AND NOT videos") + # Should parse as: (programming AND books) OR (streaming AND (NOT videos)) + expected = _or( + _and(_term("programming"), _term("books")), + _and(_term("streaming"), _not(_term("videos"))), + ) + self.assertEqual(result, expected) + + def test_single_character_terms(self): + result = parse_search_query("a and b") + expected = _and(_term("a"), _term("b")) + self.assertEqual(result, expected) + + def test_numeric_terms(self): + result = parse_search_query("123 and 456") + expected = _and(_term("123"), _term("456")) + self.assertEqual(result, expected) + + def test_special_characters_in_terms(self): + result = parse_search_query("test@example.com and file.txt") + expected = _and(_term("test@example.com"), _term("file.txt")) + self.assertEqual(result, expected) + + def test_url_terms(self): + result = parse_search_query("https://example.com/foo/bar") + expected = _term("https://example.com/foo/bar") + self.assertEqual(result, expected) + + def test_url_with_operators(self): + result = parse_search_query("https://github.com or https://gitlab.com") + expected = _or(_term("https://github.com"), _term("https://gitlab.com")) + self.assertEqual(result, expected) + + def test_quoted_strings(self): + # Basic quoted string + result = parse_search_query('"good and bad"') + expected = _term("good and bad") + self.assertEqual(result, expected) + + # Single quotes + result = parse_search_query("'hello world'") + expected = _term("hello world") + self.assertEqual(result, expected) + + def test_quoted_strings_with_operators(self): + # Quoted string with OR + result = parse_search_query('"good and bad" or programming') + expected = _or(_term("good and bad"), _term("programming")) + self.assertEqual(result, expected) + + # Quoted string with AND + result = parse_search_query('documentation and "API reference"') + expected = _and(_term("documentation"), _term("API reference")) + self.assertEqual(result, expected) + + # Quoted string with NOT + result = parse_search_query('programming and not "bad practices"') + expected = _and(_term("programming"), _not(_term("bad practices"))) + self.assertEqual(result, expected) + + def test_multiple_quoted_strings(self): + result = parse_search_query('"hello world" and "goodbye moon"') + expected = _and(_term("hello world"), _term("goodbye moon")) + self.assertEqual(result, expected) + + def test_quoted_strings_with_parentheses(self): + result = parse_search_query('("good morning" or "good evening") and coffee') + expected = _and( + _or(_term("good morning"), _term("good evening")), _term("coffee") + ) + self.assertEqual(result, expected) + + def test_escaped_quotes_in_terms(self): + result = parse_search_query('"say \\"hello\\""') + expected = _term('say "hello"') + self.assertEqual(result, expected) + + def test_tags(self): + # Basic tag + result = parse_search_query("#python") + expected = _tag("python") + self.assertEqual(result, expected) + + # Tag with hyphens + result = parse_search_query("#machine-learning") + expected = _tag("machine-learning") + self.assertEqual(result, expected) + + def test_tags_with_operators(self): + # Tag with AND + result = parse_search_query("#python and #django") + expected = _and(_tag("python"), _tag("django")) + self.assertEqual(result, expected) + + # Tag with OR + result = parse_search_query("#frontend or #backend") + expected = _or(_tag("frontend"), _tag("backend")) + self.assertEqual(result, expected) + + # Tag with NOT + result = parse_search_query("not #deprecated") + expected = _not(_tag("deprecated")) + self.assertEqual(result, expected) + + def test_tags_mixed_with_terms(self): + result = parse_search_query("programming and #python and tutorial") + expected = _and(_and(_term("programming"), _tag("python")), _term("tutorial")) + self.assertEqual(result, expected) + + def test_tags_with_quoted_strings(self): + result = parse_search_query('"machine learning" and #python') + expected = _and(_term("machine learning"), _tag("python")) + self.assertEqual(result, expected) + + def test_tags_with_parentheses(self): + result = parse_search_query("(#frontend or #backend) and javascript") + expected = _and(_or(_tag("frontend"), _tag("backend")), _term("javascript")) + self.assertEqual(result, expected) + + def test_empty_tags_ignored(self): + # Test single empty tag + result = parse_search_query("#") + expected = None # Empty query + self.assertEqual(result, expected) + + # Test query that's just an empty tag and whitespace + result = parse_search_query("# ") + expected = None # Empty query + self.assertEqual(result, expected) + + def test_special_keywords(self): + result = parse_search_query("!unread") + expected = _keyword("unread") + self.assertEqual(result, expected) + + result = parse_search_query("!untagged") + expected = _keyword("untagged") + self.assertEqual(result, expected) + + def test_special_keywords_with_operators(self): + # Special keyword with AND + result = parse_search_query("!unread and !untagged") + expected = _and(_keyword("unread"), _keyword("untagged")) + self.assertEqual(result, expected) + + # Special keyword with OR + result = parse_search_query("!unread or !untagged") + expected = _or(_keyword("unread"), _keyword("untagged")) + self.assertEqual(result, expected) + + # Special keyword with NOT + result = parse_search_query("not !unread") + expected = _not(_keyword("unread")) + self.assertEqual(result, expected) + + def test_special_keywords_mixed_with_terms_and_tags(self): + result = parse_search_query("!unread and #python and tutorial") + expected = _and(_and(_keyword("unread"), _tag("python")), _term("tutorial")) + self.assertEqual(result, expected) + + def test_special_keywords_with_quoted_strings(self): + result = parse_search_query('"machine learning" and !unread') + expected = _and(_term("machine learning"), _keyword("unread")) + self.assertEqual(result, expected) + + def test_special_keywords_with_parentheses(self): + result = parse_search_query("(!unread or !untagged) and javascript") + expected = _and( + _or(_keyword("unread"), _keyword("untagged")), _term("javascript") + ) + self.assertEqual(result, expected) + + def test_special_keywords_within_quoted_string(self): + result = parse_search_query("'!unread and !untagged'") + expected = _term("!unread and !untagged") + self.assertEqual(result, expected) + + def test_implicit_and_basic(self): + # Basic implicit AND between terms + result = parse_search_query("programming book") + expected = _and(_term("programming"), _term("book")) + self.assertEqual(result, expected) + + # Three terms with implicit AND + result = parse_search_query("python machine learning") + expected = _and(_and(_term("python"), _term("machine")), _term("learning")) + self.assertEqual(result, expected) + + def test_implicit_and_with_tags(self): + # Implicit AND between term and tag + result = parse_search_query("tutorial #python") + expected = _and(_term("tutorial"), _tag("python")) + self.assertEqual(result, expected) + + # Implicit AND between tag and term + result = parse_search_query("#javascript tutorial") + expected = _and(_tag("javascript"), _term("tutorial")) + self.assertEqual(result, expected) + + # Multiple tags with implicit AND + result = parse_search_query("#python #django #tutorial") + expected = _and(_and(_tag("python"), _tag("django")), _tag("tutorial")) + self.assertEqual(result, expected) + + def test_implicit_and_with_quoted_strings(self): + # Implicit AND with quoted strings + result = parse_search_query('"machine learning" tutorial') + expected = _and(_term("machine learning"), _term("tutorial")) + self.assertEqual(result, expected) + + # Mixed types with implicit AND + result = parse_search_query('"deep learning" #python tutorial') + expected = _and(_and(_term("deep learning"), _tag("python")), _term("tutorial")) + self.assertEqual(result, expected) + + def test_implicit_and_with_explicit_operators(self): + # Mixed implicit and explicit AND + result = parse_search_query("python tutorial and django") + expected = _and(_and(_term("python"), _term("tutorial")), _term("django")) + self.assertEqual(result, expected) + + # Implicit AND with OR + result = parse_search_query("python tutorial or java guide") + expected = _or( + _and(_term("python"), _term("tutorial")), + _and(_term("java"), _term("guide")), + ) + self.assertEqual(result, expected) + + def test_implicit_and_with_not(self): + # NOT with implicit AND + result = parse_search_query("not deprecated tutorial") + expected = _and(_not(_term("deprecated")), _term("tutorial")) + self.assertEqual(result, expected) + + # Implicit AND with NOT at end + result = parse_search_query("python tutorial not deprecated") + expected = _and( + _and(_term("python"), _term("tutorial")), _not(_term("deprecated")) + ) + self.assertEqual(result, expected) + + def test_implicit_and_with_parentheses(self): + # Parentheses with implicit AND + result = parse_search_query("(python tutorial) or java") + expected = _or(_and(_term("python"), _term("tutorial")), _term("java")) + self.assertEqual(result, expected) + + # Complex parentheses with implicit AND + result = parse_search_query( + "(machine learning #python) and (web development #javascript)" + ) + expected = _and( + _and(_and(_term("machine"), _term("learning")), _tag("python")), + _and(_and(_term("web"), _term("development")), _tag("javascript")), + ) + self.assertEqual(result, expected) + + def test_complex_precedence_with_implicit_and(self): + result = parse_search_query("python tutorial or javascript guide") + expected = _or( + _and(_term("python"), _term("tutorial")), + _and(_term("javascript"), _term("guide")), + ) + self.assertEqual(result, expected) + + result = parse_search_query( + "machine learning and (python or r) tutorial #beginner" + ) + expected = _and( + _and( + _and( + _and(_term("machine"), _term("learning")), + _or(_term("python"), _term("r")), + ), + _term("tutorial"), + ), + _tag("beginner"), + ) + self.assertEqual(result, expected) + + def test_operator_words_as_substrings(self): + # Terms that contain operator words as substrings should be treated as terms + result = parse_search_query("android and notification") + expected = _and(_term("android"), _term("notification")) + self.assertEqual(result, expected) + + def test_complex_queries(self): + test_cases = [ + ( + "(programming or software) and not client-side and (javascript or python)", + _and( + _and( + _or(_term("programming"), _term("software")), + _not(_term("client-side")), + ), + _or(_term("javascript"), _term("python")), + ), + ), + ( + "(machine-learning or ai) and python and not deprecated", + _and( + _and( + _or(_term("machine-learning"), _term("ai")), + _term("python"), + ), + _not(_term("deprecated")), + ), + ), + ( + "frontend and (react or vue or angular) and not jquery", + _and( + _and( + _term("frontend"), + _or( + _or(_term("react"), _term("vue")), + _term("angular"), + ), + ), + _not(_term("jquery")), + ), + ), + ( + '"machine learning" and (python or r) and not "deep learning"', + _and( + _and( + _term("machine learning"), + _or(_term("python"), _term("r")), + ), + _not(_term("deep learning")), + ), + ), + ( + "(#python or #javascript) and tutorial and not #deprecated", + _and( + _and( + _or(_tag("python"), _tag("javascript")), + _term("tutorial"), + ), + _not(_tag("deprecated")), + ), + ), + ( + "machine learning tutorial #python beginner", + _and( + _and( + _and( + _and(_term("machine"), _term("learning")), _term("tutorial") + ), + _tag("python"), + ), + _term("beginner"), + ), + ), + ] + + for query, expected_ast in test_cases: + with self.subTest(query=query): + result = parse_search_query(query) + self.assertEqual(result, expected_ast, f"Failed for query: {query}") + + +class SearchQueryParserErrorTest(TestCase): + def test_unmatched_left_parenthesis(self): + with self.assertRaises(SearchQueryParseError) as cm: + parse_search_query("(programming and books") + self.assertIn("Expected RPAREN", str(cm.exception)) + + def test_unmatched_right_parenthesis(self): + with self.assertRaises(SearchQueryParseError) as cm: + parse_search_query("programming and books)") + self.assertIn("Unexpected token", str(cm.exception)) + + def test_empty_parentheses(self): + with self.assertRaises(SearchQueryParseError) as cm: + parse_search_query("()") + self.assertIn("Unexpected token RPAREN", str(cm.exception)) + + def test_operator_without_operand(self): + with self.assertRaises(SearchQueryParseError) as cm: + parse_search_query("and") + self.assertIn("Unexpected token AND", str(cm.exception)) + + def test_trailing_operator(self): + with self.assertRaises(SearchQueryParseError) as cm: + parse_search_query("programming and") + self.assertIn("Unexpected token EOF", str(cm.exception)) + + def test_consecutive_operators(self): + with self.assertRaises(SearchQueryParseError) as cm: + parse_search_query("programming and or books") + self.assertIn("Unexpected token OR", str(cm.exception)) + + def test_not_without_operand(self): + with self.assertRaises(SearchQueryParseError) as cm: + parse_search_query("not") + self.assertIn("Unexpected token EOF", str(cm.exception)) + + +class ExpressionToStringTest(TestCase): + def test_simple_term(self): + expr = _term("python") + self.assertEqual(expression_to_string(expr), "python") + + def test_simple_tag(self): + expr = _tag("python") + self.assertEqual(expression_to_string(expr), "#python") + + def test_simple_keyword(self): + expr = _keyword("unread") + self.assertEqual(expression_to_string(expr), "!unread") + + def test_term_with_spaces(self): + expr = _term("machine learning") + self.assertEqual(expression_to_string(expr), '"machine learning"') + + def test_term_with_quotes(self): + expr = _term('say "hello"') + self.assertEqual(expression_to_string(expr), '"say \\"hello\\""') + + def test_and_expression_implicit(self): + expr = _and(_term("python"), _term("tutorial")) + self.assertEqual(expression_to_string(expr), "python tutorial") + + def test_and_expression_with_tags(self): + expr = _and(_tag("python"), _tag("django")) + self.assertEqual(expression_to_string(expr), "#python #django") + + def test_and_expression_complex(self): + expr = _and(_or(_term("python"), _term("ruby")), _term("tutorial")) + self.assertEqual(expression_to_string(expr), "(python or ruby) tutorial") + + def test_or_expression(self): + expr = _or(_term("python"), _term("ruby")) + self.assertEqual(expression_to_string(expr), "python or ruby") + + def test_or_expression_with_and(self): + expr = _or(_and(_term("python"), _term("tutorial")), _term("ruby")) + self.assertEqual(expression_to_string(expr), "python tutorial or ruby") + + def test_not_expression(self): + expr = _not(_term("deprecated")) + self.assertEqual(expression_to_string(expr), "not deprecated") + + def test_not_with_tag(self): + expr = _not(_tag("deprecated")) + self.assertEqual(expression_to_string(expr), "not #deprecated") + + def test_not_with_and(self): + expr = _not(_and(_term("python"), _term("deprecated"))) + self.assertEqual(expression_to_string(expr), "not (python deprecated)") + + def test_complex_nested_expression(self): + expr = _and( + _or(_term("python"), _term("ruby")), + _or(_term("tutorial"), _term("guide")), + ) + result = expression_to_string(expr) + self.assertEqual(result, "(python or ruby) (tutorial or guide)") + + def test_implicit_and_chain(self): + expr = _and(_and(_term("machine"), _term("learning")), _term("tutorial")) + self.assertEqual(expression_to_string(expr), "machine learning tutorial") + + def test_none_expression(self): + self.assertEqual(expression_to_string(None), "") + + def test_round_trip(self): + test_cases = [ + "#python", + "python tutorial", + "#python #django", + "python or ruby", + "not deprecated", + "(python or ruby) and tutorial", + "tutorial and (python or ruby)", + "(python or ruby) tutorial", + "tutorial (python or ruby)", + ] + + for query in test_cases: + with self.subTest(query=query): + ast = parse_search_query(query) + result = expression_to_string(ast) + ast2 = parse_search_query(result) + self.assertEqual(ast, ast2) + + +class StripTagFromQueryTest(TestCase): + def test_single_tag(self): + result = strip_tag_from_query("#books", "books") + self.assertEqual(result, "") + + def test_tag_with_and(self): + result = strip_tag_from_query("#history and #books", "books") + self.assertEqual(result, "#history") + + def test_tag_with_and_not(self): + result = strip_tag_from_query("#history and not #books", "books") + self.assertEqual(result, "#history") + + def test_implicit_and_with_term_and_tags(self): + result = strip_tag_from_query("roman #history #books", "books") + self.assertEqual(result, "roman #history") + + def test_tag_in_or_expression(self): + result = strip_tag_from_query("roman and (#history or #books)", "books") + self.assertEqual(result, "roman #history") + + def test_complex_or_with_and(self): + result = strip_tag_from_query( + "(roman and #books) or (greek and #books)", "books" + ) + self.assertEqual(result, "roman or greek") + + def test_case_insensitive(self): + result = strip_tag_from_query("#Books and #History", "books") + self.assertEqual(result, "#History") + + def test_tag_not_present(self): + result = strip_tag_from_query("#history and #science", "books") + self.assertEqual(result, "#history #science") + + def test_multiple_same_tags(self): + result = strip_tag_from_query("#books or #books", "books") + self.assertEqual(result, "") + + def test_nested_parentheses(self): + result = strip_tag_from_query("((#books and tutorial) or guide)", "books") + self.assertEqual(result, "tutorial or guide") + + def test_not_expression_with_tag(self): + result = strip_tag_from_query("tutorial and not #books", "books") + self.assertEqual(result, "tutorial") + + def test_only_not_tag(self): + result = strip_tag_from_query("not #books", "books") + self.assertEqual(result, "") + + def test_complex_query(self): + result = strip_tag_from_query( + "(#python or #ruby) and tutorial and not #books", "books" + ) + self.assertEqual(result, "(#python or #ruby) tutorial") + + def test_empty_query(self): + result = strip_tag_from_query("", "books") + self.assertEqual(result, "") + + def test_whitespace_only(self): + result = strip_tag_from_query(" ", "books") + self.assertEqual(result, "") + + def test_special_keywords_preserved(self): + result = strip_tag_from_query("!unread and #books", "books") + self.assertEqual(result, "!unread") + + def test_quoted_terms_preserved(self): + result = strip_tag_from_query('"machine learning" and #books', "books") + self.assertEqual(result, '"machine learning"') + + def test_all_tags_in_and_chain(self): + result = strip_tag_from_query("#books and #books and #books", "books") + self.assertEqual(result, "") + + def test_tag_similar_name(self): + # Should not remove #book when removing #books + result = strip_tag_from_query("#book and #books", "books") + self.assertEqual(result, "#book") + + def test_invalid_query_returns_original(self): + # If query is malformed, should return original + result = strip_tag_from_query("(unclosed paren", "books") + self.assertEqual(result, "(unclosed paren") + + def test_implicit_and_in_output(self): + result = strip_tag_from_query("python tutorial #books #django", "books") + self.assertEqual(result, "python tutorial #django") + + def test_nested_or_simplify_parenthesis(self): + result = strip_tag_from_query( + "(#books or tutorial) and (#books or guide)", "books" + ) + self.assertEqual(result, "tutorial guide") + + def test_nested_or_preserve_parenthesis(self): + result = strip_tag_from_query( + "(#books or tutorial or guide) and (#books or help or lesson)", "books" + ) + self.assertEqual(result, "(tutorial or guide) (help or lesson)") + + def test_left_side_removed(self): + result = strip_tag_from_query("#books and python", "books") + self.assertEqual(result, "python") + + def test_right_side_removed(self): + result = strip_tag_from_query("python and #books", "books") + self.assertEqual(result, "python") + + +class StripTagFromQueryLaxSearchTest(TestCase): + def setUp(self): + self.lax_profile = type( + "UserProfile", (), {"tag_search": UserProfile.TAG_SEARCH_LAX} + )() + self.strict_profile = type( + "UserProfile", (), {"tag_search": UserProfile.TAG_SEARCH_STRICT} + )() + + def test_lax_search_removes_matching_term(self): + result = strip_tag_from_query("books", "books", self.lax_profile) + self.assertEqual(result, "") + + def test_lax_search_removes_term_case_insensitive(self): + result = strip_tag_from_query("Books", "books", self.lax_profile) + self.assertEqual(result, "") + + result = strip_tag_from_query("BOOKS", "books", self.lax_profile) + self.assertEqual(result, "") + + def test_lax_search_multiple_terms(self): + result = strip_tag_from_query("books and history", "books", self.lax_profile) + self.assertEqual(result, "history") + + def test_lax_search_preserves_non_matching_terms(self): + result = strip_tag_from_query("history and science", "books", self.lax_profile) + self.assertEqual(result, "history science") + + def test_lax_search_removes_both_tag_and_term(self): + result = strip_tag_from_query("books #books", "books", self.lax_profile) + self.assertEqual(result, "") + + def test_lax_search_mixed_tag_and_term(self): + result = strip_tag_from_query( + "books and #history and #books", "books", self.lax_profile + ) + self.assertEqual(result, "#history") + + def test_lax_search_term_in_or_expression(self): + result = strip_tag_from_query( + "(books or history) and guide", "books", self.lax_profile + ) + self.assertEqual(result, "history guide") + + def test_lax_search_term_in_not_expression(self): + result = strip_tag_from_query( + "history and not books", "books", self.lax_profile + ) + self.assertEqual(result, "history") + + def test_lax_search_only_not_term(self): + result = strip_tag_from_query("not books", "books", self.lax_profile) + self.assertEqual(result, "") + + def test_lax_search_complex_query(self): + result = strip_tag_from_query( + "(books or #books) and (history or guide)", "books", self.lax_profile + ) + self.assertEqual(result, "history or guide") + + def test_lax_search_quoted_term_with_same_name(self): + result = strip_tag_from_query('"books" and history', "books", self.lax_profile) + self.assertEqual(result, "history") + + def test_lax_search_partial_match_not_removed(self): + result = strip_tag_from_query("bookshelf", "books", self.lax_profile) + self.assertEqual(result, "bookshelf") + + def test_lax_search_multiple_occurrences(self): + result = strip_tag_from_query( + "books or books or history", "books", self.lax_profile + ) + self.assertEqual(result, "history") + + def test_lax_search_nested_expressions(self): + result = strip_tag_from_query( + "((books and tutorial) or guide) and history", "books", self.lax_profile + ) + self.assertEqual(result, "(tutorial or guide) history") + + def test_strict_search_preserves_terms(self): + result = strip_tag_from_query("books", "books", self.strict_profile) + self.assertEqual(result, "books") + + def test_strict_search_preserves_terms_with_tags(self): + result = strip_tag_from_query("books #books", "books", self.strict_profile) + self.assertEqual(result, "books") + + def test_no_profile_defaults_to_strict(self): + result = strip_tag_from_query("books #books", "books", None) + self.assertEqual(result, "books") + + +class ExtractTagNamesFromQueryTest(TestCase): + def test_empty_query(self): + result = extract_tag_names_from_query("") + self.assertEqual(result, []) + + def test_whitespace_query(self): + result = extract_tag_names_from_query(" ") + self.assertEqual(result, []) + + def test_single_tag(self): + result = extract_tag_names_from_query("#python") + self.assertEqual(result, ["python"]) + + def test_multiple_tags(self): + result = extract_tag_names_from_query("#python and #django") + self.assertEqual(result, ["django", "python"]) + + def test_tags_with_or(self): + result = extract_tag_names_from_query("#python or #ruby") + self.assertEqual(result, ["python", "ruby"]) + + def test_tags_with_not(self): + result = extract_tag_names_from_query("not #deprecated") + self.assertEqual(result, ["deprecated"]) + + def test_tags_in_complex_query(self): + result = extract_tag_names_from_query( + "(#python or #ruby) and #tutorial and not #deprecated" + ) + self.assertEqual(result, ["deprecated", "python", "ruby", "tutorial"]) + + def test_duplicate_tags(self): + result = extract_tag_names_from_query("#python and #python") + self.assertEqual(result, ["python"]) + + def test_case_insensitive_deduplication(self): + result = extract_tag_names_from_query("#Python and #PYTHON and #python") + self.assertEqual(result, ["python"]) + + def test_mixed_tags_and_terms(self): + result = extract_tag_names_from_query("tutorial #python guide #django") + self.assertEqual(result, ["django", "python"]) + + def test_only_terms_no_tags(self): + result = extract_tag_names_from_query("tutorial guide") + self.assertEqual(result, []) + + def test_special_keywords_not_extracted(self): + result = extract_tag_names_from_query("!unread and #python") + self.assertEqual(result, ["python"]) + + def test_tags_in_nested_parentheses(self): + result = extract_tag_names_from_query("((#python and #django) or #ruby)") + self.assertEqual(result, ["django", "python", "ruby"]) + + def test_invalid_query_returns_empty(self): + result = extract_tag_names_from_query("(unclosed paren") + self.assertEqual(result, []) + + def test_tags_with_hyphens(self): + result = extract_tag_names_from_query("#machine-learning and #deep-learning") + self.assertEqual(result, ["deep-learning", "machine-learning"]) + + +class ExtractTagNamesFromQueryLaxSearchTest(TestCase): + def setUp(self): + self.lax_profile = type( + "UserProfile", (), {"tag_search": UserProfile.TAG_SEARCH_LAX} + )() + self.strict_profile = type( + "UserProfile", (), {"tag_search": UserProfile.TAG_SEARCH_STRICT} + )() + + def test_lax_search_extracts_terms(self): + result = extract_tag_names_from_query("python and django", self.lax_profile) + self.assertEqual(result, ["django", "python"]) + + def test_lax_search_mixed_tags_and_terms(self): + result = extract_tag_names_from_query( + "tutorial #python guide #django", self.lax_profile + ) + self.assertEqual(result, ["django", "guide", "python", "tutorial"]) + + def test_lax_search_deduplicates_tags_and_terms(self): + result = extract_tag_names_from_query("python #python", self.lax_profile) + self.assertEqual(result, ["python"]) + + def test_lax_search_case_insensitive_dedup(self): + result = extract_tag_names_from_query("Python #python PYTHON", self.lax_profile) + self.assertEqual(result, ["python"]) + + def test_lax_search_terms_in_or_expression(self): + result = extract_tag_names_from_query( + "(python or ruby) and tutorial", self.lax_profile + ) + self.assertEqual(result, ["python", "ruby", "tutorial"]) + + def test_lax_search_terms_in_not_expression(self): + result = extract_tag_names_from_query( + "tutorial and not deprecated", self.lax_profile + ) + self.assertEqual(result, ["deprecated", "tutorial"]) + + def test_lax_search_quoted_terms(self): + result = extract_tag_names_from_query( + '"machine learning" and #python', self.lax_profile + ) + self.assertEqual(result, ["machine learning", "python"]) + + def test_lax_search_complex_query(self): + result = extract_tag_names_from_query( + "(python or #ruby) and tutorial and not #deprecated", self.lax_profile + ) + self.assertEqual(result, ["deprecated", "python", "ruby", "tutorial"]) + + def test_lax_search_special_keywords_not_extracted(self): + result = extract_tag_names_from_query( + "!unread and python and #django", self.lax_profile + ) + self.assertEqual(result, ["django", "python"]) + + def test_strict_search_ignores_terms(self): + result = extract_tag_names_from_query("python and django", self.strict_profile) + self.assertEqual(result, []) + + def test_strict_search_only_tags(self): + result = extract_tag_names_from_query( + "tutorial #python guide #django", self.strict_profile + ) + self.assertEqual(result, ["django", "python"]) + + def test_no_profile_defaults_to_strict(self): + result = extract_tag_names_from_query("python #django", None) + self.assertEqual(result, ["django"]) diff --git a/bookmarks/tests/test_settings_general_view.py b/bookmarks/tests/test_settings_general_view.py index c5036d5..bb8158d 100644 --- a/bookmarks/tests/test_settings_general_view.py +++ b/bookmarks/tests/test_settings_general_view.py @@ -49,6 +49,7 @@ class SettingsGeneralViewTestCase(TestCase, BookmarkFactoryMixin): "sticky_pagination": False, "collapse_side_panel": False, "hide_bundles": False, + "legacy_search": False, } return {**form_data, **overrides} @@ -122,6 +123,7 @@ class SettingsGeneralViewTestCase(TestCase, BookmarkFactoryMixin): "sticky_pagination": True, "collapse_side_panel": True, "hide_bundles": True, + "legacy_search": True, } response = self.client.post( reverse("linkding:settings.update"), form_data, follow=True @@ -206,6 +208,7 @@ class SettingsGeneralViewTestCase(TestCase, BookmarkFactoryMixin): self.user.profile.collapse_side_panel, form_data["collapse_side_panel"] ) self.assertEqual(self.user.profile.hide_bundles, form_data["hide_bundles"]) + self.assertEqual(self.user.profile.legacy_search, form_data["legacy_search"]) self.assertSuccessMessage(html, "Profile updated") diff --git a/bookmarks/tests/test_tag_cloud_template.py b/bookmarks/tests/test_tag_cloud_template.py index 5736cc6..9b401ec 100644 --- a/bookmarks/tests/test_tag_cloud_template.py +++ b/bookmarks/tests/test_tag_cloud_template.py @@ -234,6 +234,21 @@ class TagCloudTemplateTest(TestCase, BookmarkFactoryMixin, HtmlTestMixin): rendered_template, ) + def test_tag_url_wraps_or_expression_in_parenthesis(self): + tag = self.setup_tag(name="tag1") + self.setup_bookmark(tags=[tag], title="term1") + + rendered_template = self.render_template(url="/test?q=term1 or term2") + + self.assertInHTML( + """ + + tag1 + + """, + rendered_template, + ) + def test_selected_tags(self): tags = [ self.setup_tag(name="tag1"), @@ -265,6 +280,63 @@ class TagCloudTemplateTest(TestCase, BookmarkFactoryMixin, HtmlTestMixin): rendered_template, ) + def test_selected_tags_complex_queries(self): + tags = [ + self.setup_tag(name="tag1"), + self.setup_tag(name="tag2"), + ] + self.setup_bookmark(tags=tags) + + rendered_template = self.render_template(url="/test?q=%23tag1 or not %23tag2") + + self.assertNumSelectedTags(rendered_template, 2) + + self.assertInHTML( + """ + + -tag1 + + """, + rendered_template, + ) + + self.assertInHTML( + """ + + -tag2 + + """, + rendered_template, + ) + + rendered_template = self.render_template( + url="/test?q=%23tag1 and not (%23tag2 or term)" + ) + + self.assertNumSelectedTags(rendered_template, 2) + + self.assertInHTML( + """ + + -tag1 + + """, + rendered_template, + ) + + self.assertInHTML( + """ + + -tag2 + + """, + rendered_template, + ) + def test_selected_tags_with_lax_tag_search(self): profile = self.get_or_create_test_user().profile profile.tag_search = UserProfile.TAG_SEARCH_LAX @@ -410,6 +482,12 @@ class TagCloudTemplateTest(TestCase, BookmarkFactoryMixin, HtmlTestMixin): self.assertTagGroups(rendered_template, [["tag3", "tag4", "tag5"]]) + rendered_template = self.render_template( + url="/test?q=%23tag1 or (%23tag2 or not term)" + ) + + self.assertTagGroups(rendered_template, [["tag3", "tag4", "tag5"]]) + def test_with_anonymous_user(self): profile = self.get_or_create_test_user().profile profile.enable_sharing = True diff --git a/bookmarks/views/contexts.py b/bookmarks/views/contexts.py index 065b1fe..df2c30c 100644 --- a/bookmarks/views/contexts.py +++ b/bookmarks/views/contexts.py @@ -19,6 +19,12 @@ from bookmarks.models import ( UserProfile, Tag, ) +from bookmarks.services.search_query_parser import ( + parse_search_query, + strip_tag_from_query, + OrExpression, + SearchQueryParseError, +) from bookmarks.services.wayback import generate_fallback_webarchive_url from bookmarks.type_defs import HttpRequest from bookmarks.views import access @@ -37,6 +43,16 @@ class RequestContext: self.query_params = request.GET.copy() self.query_params.pop("details", None) + self.query_is_valid = True + self.query_error_message = None + self.search_expression = None + if not request.user_profile.legacy_search: + try: + self.search_expression = parse_search_query(request.GET.get("q")) + except SearchQueryParseError as e: + self.query_is_valid = False + self.query_error_message = e.message + def get_url(self, view_url: str, add: dict = None, remove: dict = None) -> str: query_params = self.query_params.copy() if add: @@ -131,6 +147,8 @@ class BookmarkItem: self.description = bookmark.resolved_description self.notes = bookmark.notes self.tag_names = bookmark.tag_names + self.tags = [AddTagItem(context, tag) for tag in bookmark.tags.all()] + self.tags.sort(key=lambda item: item.name) if bookmark.latest_snapshot_id: self.snapshot_url = reverse( "linkding:assets.view", args=[bookmark.latest_snapshot_id] @@ -186,6 +204,8 @@ class BookmarkListContext: self.request = request self.search = search + self.query_is_valid = request_context.query_is_valid + self.query_error_message = request_context.query_error_message query_set = request_context.get_bookmark_query_set(self.search) page_number = request.GET.get("page") @@ -257,58 +277,168 @@ class SharedBookmarkListContext(BookmarkListContext): request_context = SharedBookmarksContext +class AddTagItem: + def __init__(self, context: RequestContext, tag: Tag): + self.tag = tag + self.name = tag.name + + params = context.query_params.copy() + query_with_tag = params.get("q", "") + if isinstance(context.search_expression, OrExpression): + # If the current search expression is an OR expression, wrap in parentheses + query_with_tag = f"({query_with_tag})" + query_with_tag = f"{query_with_tag} #{tag.name}".strip() + + params["q"] = query_with_tag + params.pop("details", None) + params.pop("page", None) + + if context.request.user_profile.legacy_search: + self.query_string = self._generate_query_string_legacy(context, tag) + else: + self.query_string = self._generate_query_string(context, tag) + + @staticmethod + def _generate_query_string(context: RequestContext, tag: Tag) -> str: + params = context.query_params.copy() + query_with_tag = params.get("q", "") + if isinstance(context.search_expression, OrExpression): + # If the current search expression is an OR expression, wrap in parentheses + query_with_tag = f"({query_with_tag})" + query_with_tag = f"{query_with_tag} #{tag.name}".strip() + + params["q"] = query_with_tag + params.pop("details", None) + params.pop("page", None) + + return params.urlencode() + + @staticmethod + def _generate_query_string_legacy(context: RequestContext, tag: Tag) -> str: + params = context.query_params.copy() + query_with_tag = params.get("q", "") + query_with_tag = f"{query_with_tag} #{tag.name}".strip() + + params["q"] = query_with_tag + params.pop("details", None) + params.pop("page", None) + + return params.urlencode() + + +class RemoveTagItem: + def __init__(self, context: RequestContext, tag: Tag): + self.tag = tag + self.name = tag.name + + if context.request.user_profile.legacy_search: + self.query_string = self._generate_query_string_legacy(context, tag) + else: + self.query_string = self._generate_query_string(context, tag) + + @staticmethod + def _generate_query_string(context: RequestContext, tag: Tag) -> str: + params = context.query_params.copy() + query = params.get("q", "") + profile = context.request.user_profile + query_without_tag = strip_tag_from_query(query, tag.name, profile) + + params["q"] = query_without_tag + params.pop("details", None) + params.pop("page", None) + + return params.urlencode() + + @staticmethod + def _generate_query_string_legacy(context: RequestContext, tag: Tag) -> str: + params = context.request.GET.copy() + if params.__contains__("q"): + # Split query string into parts + query_string = params.__getitem__("q") + query_parts = query_string.split() + # Remove tag with hash + tag_name_with_hash = "#" + tag.name + query_parts = [ + part + for part in query_parts + if str.lower(part) != str.lower(tag_name_with_hash) + ] + # When using lax tag search, also remove tag without hash + profile = context.request.user_profile + if profile.tag_search == UserProfile.TAG_SEARCH_LAX: + query_parts = [ + part + for part in query_parts + if str.lower(part) != str.lower(tag.name) + ] + # Rebuild query string + query_string = " ".join(query_parts) + params.__setitem__("q", query_string) + + # Remove details ID and page number + params.pop("details", None) + params.pop("page", None) + + return params.urlencode() + + class TagGroup: - def __init__(self, char: str): + def __init__(self, context: RequestContext, char: str): + self.context = context self.tags = [] self.char = char def __repr__(self): return f"<{self.char} TagGroup>" + def add_tag(self, tag: Tag): + self.tags.append(AddTagItem(self.context, tag)) + @staticmethod - def create_tag_groups(mode: str, tags: Set[Tag]): + def create_tag_groups(context: RequestContext, mode: str, tags: Set[Tag]): if mode == UserProfile.TAG_GROUPING_ALPHABETICAL: - return TagGroup._create_tag_groups_alphabetical(tags) + return TagGroup._create_tag_groups_alphabetical(context, tags) elif mode == UserProfile.TAG_GROUPING_DISABLED: - return TagGroup._create_tag_groups_disabled(tags) + return TagGroup._create_tag_groups_disabled(context, tags) else: raise ValueError(f"{mode} is not a valid tag grouping mode") @staticmethod - def _create_tag_groups_alphabetical(tags: Set[Tag]): + def _create_tag_groups_alphabetical(context: RequestContext, tags: Set[Tag]): # Ensure groups, as well as tags within groups, are ordered alphabetically sorted_tags = sorted(tags, key=lambda x: str.lower(x.name)) group = None groups = [] + cjk_used = False - cjk_group = TagGroup("Ideographic") + cjk_group = TagGroup(context, "Ideographic") # Group tags that start with a different character than the previous one for tag in sorted_tags: tag_char = tag.name[0].lower() if CJK_RE.match(tag_char): cjk_used = True - cjk_group.tags.append(tag) + cjk_group.add_tag(tag) elif not group or group.char != tag_char: - group = TagGroup(tag_char) + group = TagGroup(context, tag_char) groups.append(group) - group.tags.append(tag) + group.add_tag(tag) else: - group.tags.append(tag) + group.add_tag(tag) if cjk_used: groups.append(cjk_group) return groups @staticmethod - def _create_tag_groups_disabled(tags: Set[Tag]): + def _create_tag_groups_disabled(context: RequestContext, tags: Set[Tag]): if len(tags) == 0: return [] sorted_tags = sorted(tags, key=lambda x: str.lower(x.name)) - group = TagGroup("Ungrouped") + group = TagGroup(context, "Ungrouped") for tag in sorted_tags: - group.tags.append(tag) + group.add_tag(tag) return [group] @@ -325,21 +455,30 @@ class TagCloudContext: query_set = request_context.get_tag_query_set(self.search) tags = list(query_set) - selected_tags = self.get_selected_tags(tags) + selected_tags = self.get_selected_tags() unique_tags = utils.unique(tags, key=lambda x: str.lower(x.name)) unique_selected_tags = utils.unique( selected_tags, key=lambda x: str.lower(x.name) ) has_selected_tags = len(unique_selected_tags) > 0 unselected_tags = set(unique_tags).symmetric_difference(unique_selected_tags) - groups = TagGroup.create_tag_groups(user_profile.tag_grouping, unselected_tags) + groups = TagGroup.create_tag_groups( + request_context, user_profile.tag_grouping, unselected_tags + ) + + selected_tag_items = [] + for tag in unique_selected_tags: + selected_tag_items.append(RemoveTagItem(request_context, tag)) self.tags = unique_tags self.groups = groups - self.selected_tags = unique_selected_tags + self.selected_tags = selected_tag_items self.has_selected_tags = has_selected_tags - def get_selected_tags(self, tags: List[Tag]): + def get_selected_tags(self): + raise NotImplementedError("Must be implemented by subclass") + + def get_selected_tags_legacy(self, tags: List[Tag]): parsed_query = queries.parse_query_string(self.search.q) tag_names = parsed_query["tag_names"] if self.request.user_profile.tag_search == UserProfile.TAG_SEARCH_LAX: @@ -352,14 +491,37 @@ class TagCloudContext: class ActiveTagCloudContext(TagCloudContext): request_context = ActiveBookmarksContext + def get_selected_tags(self): + return list( + queries.get_tags_for_query( + self.request.user, self.request.user_profile, self.search.q + ) + ) + class ArchivedTagCloudContext(TagCloudContext): request_context = ArchivedBookmarksContext + def get_selected_tags(self): + return list( + queries.get_tags_for_query( + self.request.user, self.request.user_profile, self.search.q + ) + ) + class SharedTagCloudContext(TagCloudContext): request_context = SharedBookmarksContext + def get_selected_tags(self): + user = User.objects.filter(username=self.search.user).first() + public_only = not self.request.user.is_authenticated + return list( + queries.get_shared_tags_for_query( + user, self.request.user_profile, self.search.q, public_only + ) + ) + class BookmarkAssetItem: def __init__(self, asset: BookmarkAsset): @@ -403,6 +565,9 @@ class BookmarkDetailsContext: self.close_url = request_context.index() self.bookmark = bookmark + self.tags = [AddTagItem(request_context, tag) for tag in bookmark.tags.all()] + self.tags.sort(key=lambda item: item.name) + self.profile = request.user_profile self.is_editable = bookmark.owner == user self.sharing_enabled = user_profile.enable_sharing diff --git a/docs/astro.config.mjs b/docs/astro.config.mjs index f4a1bd2..f6ac3b4 100644 --- a/docs/astro.config.mjs +++ b/docs/astro.config.mjs @@ -31,6 +31,7 @@ export default defineConfig({ label: "Guides", items: [ { label: "Backups", slug: "backups" }, + //{ label: "Bookmark Search", slug: "search" }, { label: "Archiving", slug: "archiving" }, { label: "Auto Tagging", slug: "auto-tagging" }, { label: "Keyboard Shortcuts", slug: "shortcuts" }, diff --git a/docs/src/content/docs/search.md b/docs/src/content/docs/search.md new file mode 100644 index 0000000..209d906 --- /dev/null +++ b/docs/src/content/docs/search.md @@ -0,0 +1,74 @@ +--- +title: Bookmark Search +--- + +linkding provides a comprehensive search function for finding bookmarks. This guide gives on overview of the search capabilities and provides some examples. + +## Search Expressions + +Every search query is made up of one or more expressions. An expression can be a single word, a phrase, a tag, or a combination of these using boolean operators. The table below summarizes the different expression types: + +| Expression | Example | Description | +|--------------|------------------------------------|------------------------------------------------------------| +| Word | `history` | Search for a single word in title, description, notes, URL | +| Phrase | `"history of rome"` | Search for an exact phrase by enclosing it in quotes | +| Tag | `#book` | Search for tag | +| AND operator | `#history and #book` | Both expressions must match | +| OR operator | `#book or #article` | Either expression must match | +| NOT operator | `not #article` | Expression must not match | +| Grouping | `#history and (#book or #article)` | Control evaluation order using parenthesis | + +When combining multiple words, phrases or tags without an explicit operator, the `and` operator is assumed. For example: +``` +history rome #book +``` +is equivalent to: +``` +history and rome and #book +``` + +Some additional rules to keep in mind: +- Words, phrases, tags, and operators are all case-insensitive. +- Tags must be prefixed with a `#` symbol. If the *lax* tag search mode is enabled in the settings, the `#` prefix is optional. In that case searching for a word will return both bookmarks containing that word or bookmarks tagged with that word. +- An operator (`and`, `or`, `not`) can not be used as a search term as such. To explicitly search for these words, use a phrase: `"beyond good and evil"`, `"good or bad"`, `"not found"`. + +## Examples + +Here are some example search queries and their meanings: + +``` +history rome #book +``` +Search bookmarks that contain both "history" and "rome", and are tagged with "book". + +``` +"history of rome" #book +``` +Search bookmarks that contain the exact phrase "history of rome" and are tagged with "book". + +``` +#article or #book +``` +Search bookmarks that are tagged with either "article" or "book". + +``` +rome (#article or #book) +``` +Search bookmarks that contain "rome" and are tagged with either "article" or "book". + +``` +history rome not #article +``` +Search bookmarks that contain both "history" and "rome", but are not tagged with "article". + +``` +history rome not (#article or #book) +``` +Search bookmarks that contain both "history" and "rome", but are not tagged with either "article" or "book". + +## Legacy Search + +A new search engine that supports the above expressions was introduced in linkding v1.44.0. +If you run into any issues with the new search, you can switch back to the old one by enabling legacy search in the settings. +Please report any issues you encounter with the new search on [GitHub](https://github.com/sissbruecker/linkding/issues) so they can be addressed. +This option will be removed in a future version.