Add new search engine that supports logical expressions (and, or, not) (#1198)

* parser implementation

* add support for quoted strings

* add support for tags

* ignore empty tags

* implicit and

* prepare query conversion by disabling tests

* convert query logic

* fix nested combined tag searches

* simplify query logic

* Add special keyword support to parser

* Add special keyword support to query builder

* Handle invalid queries in query builder

* Notify user about invalid queries

* Add helper to strip tags from search query

* Make tag cloud show all tags from search query

* Use new method for extracting tags

* Add query for getting tags from search query

* Get selected tags through specific context

* Properly remove selected tags from complex queries

* cleanup

* Clarify bundle search terms

* Add documentation draft

* Improve adding tags to search query

* Add option to switch back to the old search
This commit is contained in:
Sascha Ißbrücker
2025-10-05 12:51:08 +02:00
committed by GitHub
parent 229d3b511f
commit 051bd39256
22 changed files with 2831 additions and 127 deletions

View File

@@ -15,6 +15,18 @@ from bookmarks.models import (
UserProfile,
parse_tag_string,
)
from bookmarks.services.search_query_parser import (
parse_search_query,
SearchExpression,
TermExpression,
TagExpression,
SpecialKeywordExpression,
AndExpression,
OrExpression,
NotExpression,
SearchQueryParseError,
extract_tag_names_from_query,
)
from bookmarks.utils import unique
@@ -45,6 +57,122 @@ def query_shared_bookmarks(
return _base_bookmarks_query(user, profile, search).filter(conditions)
def _convert_ast_to_q_object(ast_node: SearchExpression, profile: UserProfile) -> Q:
if isinstance(ast_node, TermExpression):
# Search across title, description, notes, URL
conditions = (
Q(title__icontains=ast_node.term)
| Q(description__icontains=ast_node.term)
| Q(notes__icontains=ast_node.term)
| Q(url__icontains=ast_node.term)
)
# In lax mode, also search in tag names
if profile.tag_search == UserProfile.TAG_SEARCH_LAX:
conditions = conditions | Exists(
Bookmark.objects.filter(
id=OuterRef("id"), tags__name__iexact=ast_node.term
)
)
return conditions
elif isinstance(ast_node, TagExpression):
# Use Exists() to avoid reusing the same join when combining multiple tag expressions with and
return Q(
Exists(
Bookmark.objects.filter(
id=OuterRef("id"), tags__name__iexact=ast_node.tag
)
)
)
elif isinstance(ast_node, SpecialKeywordExpression):
# Handle special keywords
if ast_node.keyword.lower() == "unread":
return Q(unread=True)
elif ast_node.keyword.lower() == "untagged":
return Q(tags=None)
else:
# Unknown keyword, return empty Q object (matches all)
return Q()
elif isinstance(ast_node, AndExpression):
# Combine left and right with AND
left_q = _convert_ast_to_q_object(ast_node.left, profile)
right_q = _convert_ast_to_q_object(ast_node.right, profile)
return left_q & right_q
elif isinstance(ast_node, OrExpression):
# Combine left and right with OR
left_q = _convert_ast_to_q_object(ast_node.left, profile)
right_q = _convert_ast_to_q_object(ast_node.right, profile)
return left_q | right_q
elif isinstance(ast_node, NotExpression):
# Negate the operand
operand_q = _convert_ast_to_q_object(ast_node.operand, profile)
return ~operand_q
else:
# Fallback for unknown node types
return Q()
def _filter_search_query(
query_set: QuerySet, query_string: str, profile: UserProfile
) -> QuerySet:
"""New search filtering logic using logical expressions."""
try:
ast = parse_search_query(query_string)
if ast:
search_query = _convert_ast_to_q_object(ast, profile)
query_set = query_set.filter(search_query)
except SearchQueryParseError:
# If the query cannot be parsed, return zero results
return query_set.none()
return query_set
def _filter_search_query_legacy(
query_set: QuerySet, query_string: str, profile: UserProfile
) -> QuerySet:
"""Legacy search filtering logic where everything is just combined with AND."""
# Split query into search terms and tags
query = parse_query_string(query_string)
# Filter for search terms and tags
for term in query["search_terms"]:
conditions = (
Q(title__icontains=term)
| Q(description__icontains=term)
| Q(notes__icontains=term)
| Q(url__icontains=term)
)
if profile.tag_search == UserProfile.TAG_SEARCH_LAX:
conditions = conditions | Exists(
Bookmark.objects.filter(id=OuterRef("id"), tags__name__iexact=term)
)
query_set = query_set.filter(conditions)
for tag_name in query["tag_names"]:
query_set = query_set.filter(tags__name__iexact=tag_name)
# Untagged bookmarks
if query["untagged"]:
query_set = query_set.filter(tags=None)
# Legacy unread bookmarks filter from query
if query["unread"]:
query_set = query_set.filter(unread=True)
return query_set
def _filter_bundle(query_set: QuerySet, bundle: BookmarkBundle) -> QuerySet:
# Search terms
search_terms = parse_query_string(bundle.search)["search_terms"]
@@ -113,34 +241,11 @@ def _base_bookmarks_query(
# If the date format is invalid, ignore the filter
pass
# Split query into search terms and tags
query = parse_query_string(search.q)
# Filter for search terms and tags
for term in query["search_terms"]:
conditions = (
Q(title__icontains=term)
| Q(description__icontains=term)
| Q(notes__icontains=term)
| Q(url__icontains=term)
)
if profile.tag_search == UserProfile.TAG_SEARCH_LAX:
conditions = conditions | Exists(
Bookmark.objects.filter(id=OuterRef("id"), tags__name__iexact=term)
)
query_set = query_set.filter(conditions)
for tag_name in query["tag_names"]:
query_set = query_set.filter(tags__name__iexact=tag_name)
# Untagged bookmarks
if query["untagged"]:
query_set = query_set.filter(tags=None)
# Legacy unread bookmarks filter from query
if query["unread"]:
query_set = query_set.filter(unread=True)
# Filter by search query
if profile.legacy_search:
query_set = _filter_search_query_legacy(query_set, search.q, profile)
else:
query_set = _filter_search_query(query_set, search.q, profile)
# Unread filter from bookmark search
if search.unread == BookmarkSearch.FILTER_UNREAD_YES:
@@ -241,6 +346,45 @@ def get_user_tags(user: User):
return Tag.objects.filter(owner=user).all()
def get_tags_for_query(user: User, profile: UserProfile, query: str) -> QuerySet:
tag_names = extract_tag_names_from_query(query, profile)
if not tag_names:
return Tag.objects.none()
tag_conditions = Q()
for tag_name in tag_names:
tag_conditions |= Q(name__iexact=tag_name)
return Tag.objects.filter(owner=user).filter(tag_conditions).distinct()
def get_shared_tags_for_query(
user: Optional[User], profile: UserProfile, query: str, public_only: bool
) -> QuerySet:
tag_names = extract_tag_names_from_query(query, profile)
if not tag_names:
return Tag.objects.none()
# Build conditions similar to query_shared_bookmarks
conditions = Q(bookmark__shared=True) & Q(
bookmark__owner__profile__enable_sharing=True
)
if public_only:
conditions = conditions & Q(
bookmark__owner__profile__enable_public_sharing=True
)
if user is not None:
conditions = conditions & Q(bookmark__owner=user)
tag_conditions = Q()
for tag_name in tag_names:
tag_conditions |= Q(name__iexact=tag_name)
return Tag.objects.filter(conditions).filter(tag_conditions).distinct()
def parse_query_string(query_string):
# Sanitize query params
if not query_string: