linkding/bookmarks/tests/test_search_query_parser.py

from django.test import TestCase

from bookmarks.services.search_query_parser import (
    SearchQueryTokenizer,
    TokenType,
    SearchExpression,
    TermExpression,
    TagExpression,
    SpecialKeywordExpression,
    AndExpression,
    OrExpression,
    NotExpression,
    SearchQueryParseError,
    parse_search_query,
    expression_to_string,
    strip_tag_from_query,
    extract_tag_names_from_query,
)
from bookmarks.models import UserProfile


def _term(term: str) -> TermExpression:
    return TermExpression(term)


def _tag(tag: str) -> TagExpression:
    return TagExpression(tag)


def _and(left: SearchExpression, right: SearchExpression) -> AndExpression:
    return AndExpression(left, right)


def _or(left: SearchExpression, right: SearchExpression) -> OrExpression:
    return OrExpression(left, right)


def _not(operand: SearchExpression) -> NotExpression:
    return NotExpression(operand)


def _keyword(keyword: str) -> SpecialKeywordExpression:
    return SpecialKeywordExpression(keyword)


class SearchQueryTokenizerTest(TestCase):
    def test_empty_query(self):
        tokenizer = SearchQueryTokenizer("")
        tokens = tokenizer.tokenize()
        self.assertEqual(len(tokens), 1)
        self.assertEqual(tokens[0].type, TokenType.EOF)

    def test_whitespace_only_query(self):
        tokenizer = SearchQueryTokenizer("   ")
        tokens = tokenizer.tokenize()
        self.assertEqual(len(tokens), 1)
        self.assertEqual(tokens[0].type, TokenType.EOF)

    def test_single_term(self):
        tokenizer = SearchQueryTokenizer("programming")
        tokens = tokenizer.tokenize()
        self.assertEqual(len(tokens), 2)
        self.assertEqual(tokens[0].type, TokenType.TERM)
        self.assertEqual(tokens[0].value, "programming")
        self.assertEqual(tokens[1].type, TokenType.EOF)

    def test_multiple_terms(self):
        tokenizer = SearchQueryTokenizer("programming books streaming")
        tokens = tokenizer.tokenize()
        self.assertEqual(len(tokens), 4)
        self.assertEqual(tokens[0].type, TokenType.TERM)
        self.assertEqual(tokens[0].value, "programming")
        self.assertEqual(tokens[1].type, TokenType.TERM)
        self.assertEqual(tokens[1].value, "books")
        self.assertEqual(tokens[2].type, TokenType.TERM)
        self.assertEqual(tokens[2].value, "streaming")
        self.assertEqual(tokens[3].type, TokenType.EOF)

    def test_hyphenated_term(self):
        tokenizer = SearchQueryTokenizer("client-side")
        tokens = tokenizer.tokenize()
        self.assertEqual(len(tokens), 2)
        self.assertEqual(tokens[0].type, TokenType.TERM)
        self.assertEqual(tokens[0].value, "client-side")
        self.assertEqual(tokens[1].type, TokenType.EOF)

    def test_and_operator(self):
        tokenizer = SearchQueryTokenizer("programming and books")
        tokens = tokenizer.tokenize()
        self.assertEqual(len(tokens), 4)
        self.assertEqual(tokens[0].type, TokenType.TERM)
        self.assertEqual(tokens[0].value, "programming")
        self.assertEqual(tokens[1].type, TokenType.AND)
        self.assertEqual(tokens[1].value, "and")
        self.assertEqual(tokens[2].type, TokenType.TERM)
        self.assertEqual(tokens[2].value, "books")
        self.assertEqual(tokens[3].type, TokenType.EOF)

    def test_or_operator(self):
        tokenizer = SearchQueryTokenizer("programming or books")
        tokens = tokenizer.tokenize()
        self.assertEqual(len(tokens), 4)
        self.assertEqual(tokens[0].type, TokenType.TERM)
        self.assertEqual(tokens[0].value, "programming")
        self.assertEqual(tokens[1].type, TokenType.OR)
        self.assertEqual(tokens[1].value, "or")
        self.assertEqual(tokens[2].type, TokenType.TERM)
        self.assertEqual(tokens[2].value, "books")
        self.assertEqual(tokens[3].type, TokenType.EOF)

    def test_not_operator(self):
        tokenizer = SearchQueryTokenizer("programming not books")
        tokens = tokenizer.tokenize()
        self.assertEqual(len(tokens), 4)
        self.assertEqual(tokens[0].type, TokenType.TERM)
        self.assertEqual(tokens[0].value, "programming")
        self.assertEqual(tokens[1].type, TokenType.NOT)
        self.assertEqual(tokens[1].value, "not")
        self.assertEqual(tokens[2].type, TokenType.TERM)
        self.assertEqual(tokens[2].value, "books")
        self.assertEqual(tokens[3].type, TokenType.EOF)

    def test_case_insensitive_operators(self):
        tokenizer = SearchQueryTokenizer(
            "programming AND books OR streaming NOT videos"
        )
        tokens = tokenizer.tokenize()
        self.assertEqual(len(tokens), 8)
        self.assertEqual(tokens[1].type, TokenType.AND)
        self.assertEqual(tokens[3].type, TokenType.OR)
        self.assertEqual(tokens[5].type, TokenType.NOT)

    def test_parentheses(self):
        tokenizer = SearchQueryTokenizer("(programming or books) and streaming")
        tokens = tokenizer.tokenize()
        self.assertEqual(len(tokens), 8)
        self.assertEqual(tokens[0].type, TokenType.LPAREN)
        self.assertEqual(tokens[1].type, TokenType.TERM)
        self.assertEqual(tokens[1].value, "programming")
        self.assertEqual(tokens[2].type, TokenType.OR)
        self.assertEqual(tokens[3].type, TokenType.TERM)
        self.assertEqual(tokens[3].value, "books")
        self.assertEqual(tokens[4].type, TokenType.RPAREN)
        self.assertEqual(tokens[5].type, TokenType.AND)
        self.assertEqual(tokens[6].type, TokenType.TERM)
        self.assertEqual(tokens[6].value, "streaming")
        self.assertEqual(tokens[7].type, TokenType.EOF)

    def test_operator_as_part_of_term(self):
        # Terms containing operator words should be treated as terms
        tokenizer = SearchQueryTokenizer("android notarization")
        tokens = tokenizer.tokenize()
        self.assertEqual(len(tokens), 3)
        self.assertEqual(tokens[0].type, TokenType.TERM)
        self.assertEqual(tokens[0].value, "android")
        self.assertEqual(tokens[1].type, TokenType.TERM)
        self.assertEqual(tokens[1].value, "notarization")
        self.assertEqual(tokens[2].type, TokenType.EOF)

    def test_extra_whitespace(self):
        tokenizer = SearchQueryTokenizer("  programming   and    books  ")
        tokens = tokenizer.tokenize()
        self.assertEqual(len(tokens), 4)
        self.assertEqual(tokens[0].type, TokenType.TERM)
        self.assertEqual(tokens[0].value, "programming")
        self.assertEqual(tokens[1].type, TokenType.AND)
        self.assertEqual(tokens[2].type, TokenType.TERM)
        self.assertEqual(tokens[2].value, "books")
        self.assertEqual(tokens[3].type, TokenType.EOF)

    def test_quoted_strings(self):
        # Double quotes
        tokenizer = SearchQueryTokenizer('"good and bad"')
        tokens = tokenizer.tokenize()
        self.assertEqual(len(tokens), 2)
        self.assertEqual(tokens[0].type, TokenType.TERM)
        self.assertEqual(tokens[0].value, "good and bad")
        self.assertEqual(tokens[1].type, TokenType.EOF)

        # Single quotes
        tokenizer = SearchQueryTokenizer("'hello world'")
        tokens = tokenizer.tokenize()
        self.assertEqual(len(tokens), 2)
        self.assertEqual(tokens[0].type, TokenType.TERM)
        self.assertEqual(tokens[0].value, "hello world")
        self.assertEqual(tokens[1].type, TokenType.EOF)

    def test_quoted_strings_with_operators(self):
        tokenizer = SearchQueryTokenizer('"good and bad" or programming')
        tokens = tokenizer.tokenize()
        self.assertEqual(len(tokens), 4)
        self.assertEqual(tokens[0].type, TokenType.TERM)
        self.assertEqual(tokens[0].value, "good and bad")
        self.assertEqual(tokens[1].type, TokenType.OR)
        self.assertEqual(tokens[2].type, TokenType.TERM)
        self.assertEqual(tokens[2].value, "programming")
        self.assertEqual(tokens[3].type, TokenType.EOF)

    def test_escaped_quotes(self):
        # Escaped double quote within double quotes
        tokenizer = SearchQueryTokenizer('"say \\"hello\\""')
        tokens = tokenizer.tokenize()
        self.assertEqual(len(tokens), 2)
        self.assertEqual(tokens[0].type, TokenType.TERM)
        self.assertEqual(tokens[0].value, 'say "hello"')
        self.assertEqual(tokens[1].type, TokenType.EOF)

        # Escaped single quote within single quotes
        tokenizer = SearchQueryTokenizer("'don\\'t worry'")
        tokens = tokenizer.tokenize()
        self.assertEqual(len(tokens), 2)
        self.assertEqual(tokens[0].type, TokenType.TERM)
        self.assertEqual(tokens[0].value, "don't worry")
        self.assertEqual(tokens[1].type, TokenType.EOF)

    def test_unclosed_quotes(self):
        # Unclosed quote should be handled gracefully
        tokenizer = SearchQueryTokenizer('"unclosed quote')
        tokens = tokenizer.tokenize()
        self.assertEqual(len(tokens), 2)
        self.assertEqual(tokens[0].type, TokenType.TERM)
        self.assertEqual(tokens[0].value, "unclosed quote")
        self.assertEqual(tokens[1].type, TokenType.EOF)

    def test_tags(self):
        # Basic tag
        tokenizer = SearchQueryTokenizer("#python")
        tokens = tokenizer.tokenize()
        self.assertEqual(len(tokens), 2)
        self.assertEqual(tokens[0].type, TokenType.TAG)
        self.assertEqual(tokens[0].value, "python")
        self.assertEqual(tokens[1].type, TokenType.EOF)

        # Tag with hyphens
        tokenizer = SearchQueryTokenizer("#machine-learning")
        tokens = tokenizer.tokenize()
        self.assertEqual(len(tokens), 2)
        self.assertEqual(tokens[0].type, TokenType.TAG)
        self.assertEqual(tokens[0].value, "machine-learning")
        self.assertEqual(tokens[1].type, TokenType.EOF)

    def test_tags_with_operators(self):
        tokenizer = SearchQueryTokenizer("#python and #django")
        tokens = tokenizer.tokenize()
        self.assertEqual(len(tokens), 4)
        self.assertEqual(tokens[0].type, TokenType.TAG)
        self.assertEqual(tokens[0].value, "python")
        self.assertEqual(tokens[1].type, TokenType.AND)
        self.assertEqual(tokens[2].type, TokenType.TAG)
        self.assertEqual(tokens[2].value, "django")
        self.assertEqual(tokens[3].type, TokenType.EOF)

    def test_tags_mixed_with_terms(self):
        tokenizer = SearchQueryTokenizer("programming and #python and web")
        tokens = tokenizer.tokenize()
        self.assertEqual(len(tokens), 6)
        self.assertEqual(tokens[0].type, TokenType.TERM)
        self.assertEqual(tokens[0].value, "programming")
        self.assertEqual(tokens[1].type, TokenType.AND)
        self.assertEqual(tokens[2].type, TokenType.TAG)
        self.assertEqual(tokens[2].value, "python")
        self.assertEqual(tokens[3].type, TokenType.AND)
        self.assertEqual(tokens[4].type, TokenType.TERM)
        self.assertEqual(tokens[4].value, "web")
        self.assertEqual(tokens[5].type, TokenType.EOF)

    def test_empty_tag(self):
        # Tag with just # should be ignored (no token created)
        tokenizer = SearchQueryTokenizer("# ")
        tokens = tokenizer.tokenize()
        self.assertEqual(len(tokens), 1)
        self.assertEqual(tokens[0].type, TokenType.EOF)

        # Empty tag at end of string
        tokenizer = SearchQueryTokenizer("#")
        tokens = tokenizer.tokenize()
        self.assertEqual(len(tokens), 1)
        self.assertEqual(tokens[0].type, TokenType.EOF)

        # Empty tag mixed with other terms
        tokenizer = SearchQueryTokenizer("python # and django")
        tokens = tokenizer.tokenize()
        self.assertEqual(len(tokens), 4)
        self.assertEqual(tokens[0].type, TokenType.TERM)
        self.assertEqual(tokens[0].value, "python")
        self.assertEqual(tokens[1].type, TokenType.AND)
        self.assertEqual(tokens[2].type, TokenType.TERM)
        self.assertEqual(tokens[2].value, "django")
        self.assertEqual(tokens[3].type, TokenType.EOF)

    def test_special_keywords(self):
        tokenizer = SearchQueryTokenizer("!unread")
        tokens = tokenizer.tokenize()
        self.assertEqual(len(tokens), 2)
        self.assertEqual(tokens[0].type, TokenType.SPECIAL_KEYWORD)
        self.assertEqual(tokens[0].value, "unread")
        self.assertEqual(tokens[1].type, TokenType.EOF)

        tokenizer = SearchQueryTokenizer("!untagged")
        tokens = tokenizer.tokenize()
        self.assertEqual(len(tokens), 2)
        self.assertEqual(tokens[0].type, TokenType.SPECIAL_KEYWORD)
        self.assertEqual(tokens[0].value, "untagged")
        self.assertEqual(tokens[1].type, TokenType.EOF)

    def test_special_keywords_with_operators(self):
        tokenizer = SearchQueryTokenizer("!unread and !untagged")
        tokens = tokenizer.tokenize()
        self.assertEqual(len(tokens), 4)
        self.assertEqual(tokens[0].type, TokenType.SPECIAL_KEYWORD)
        self.assertEqual(tokens[0].value, "unread")
        self.assertEqual(tokens[1].type, TokenType.AND)
        self.assertEqual(tokens[2].type, TokenType.SPECIAL_KEYWORD)
        self.assertEqual(tokens[2].value, "untagged")
        self.assertEqual(tokens[3].type, TokenType.EOF)

    def test_special_keywords_mixed_with_terms_and_tags(self):
        tokenizer = SearchQueryTokenizer("!unread and #python and tutorial")
        tokens = tokenizer.tokenize()
        self.assertEqual(len(tokens), 6)
        self.assertEqual(tokens[0].type, TokenType.SPECIAL_KEYWORD)
        self.assertEqual(tokens[0].value, "unread")
        self.assertEqual(tokens[1].type, TokenType.AND)
        self.assertEqual(tokens[2].type, TokenType.TAG)
        self.assertEqual(tokens[2].value, "python")
        self.assertEqual(tokens[3].type, TokenType.AND)
        self.assertEqual(tokens[4].type, TokenType.TERM)
        self.assertEqual(tokens[4].value, "tutorial")
        self.assertEqual(tokens[5].type, TokenType.EOF)

    def test_empty_special_keyword(self):
        # Special keyword with just ! should be ignored (no token created)
        tokenizer = SearchQueryTokenizer("! ")
        tokens = tokenizer.tokenize()
        self.assertEqual(len(tokens), 1)
        self.assertEqual(tokens[0].type, TokenType.EOF)

        # Empty special keyword at end of string
        tokenizer = SearchQueryTokenizer("!")
        tokens = tokenizer.tokenize()
        self.assertEqual(len(tokens), 1)
        self.assertEqual(tokens[0].type, TokenType.EOF)


class SearchQueryParserTest(TestCase):
    """Test cases for the search query parser."""

    def test_empty_query(self):
        result = parse_search_query("")
        self.assertIsNone(result)

    def test_whitespace_only_query(self):
        result = parse_search_query("   ")
        self.assertIsNone(result)

    def test_single_term(self):
        result = parse_search_query("programming")
        expected = _term("programming")
        self.assertEqual(result, expected)

    def test_and_expression(self):
        result = parse_search_query("programming and books")
        expected = _and(_term("programming"), _term("books"))
        self.assertEqual(result, expected)

    def test_or_expression(self):
        result = parse_search_query("programming or books")
        expected = _or(_term("programming"), _term("books"))
        self.assertEqual(result, expected)

    def test_not_expression(self):
        result = parse_search_query("not programming")
        expected = _not(_term("programming"))
        self.assertEqual(result, expected)

    def test_operator_precedence_and_over_or(self):
        # "a or b and c" should parse as "a or (b and c)"
        result = parse_search_query("programming or books and streaming")
        expected = _or(_term("programming"), _and(_term("books"), _term("streaming")))
        self.assertEqual(result, expected)

    def test_operator_precedence_not_over_and(self):
        # "not a and b" should parse as "(not a) and b"
        result = parse_search_query("not programming and books")
        expected = _and(_not(_term("programming")), _term("books"))
        self.assertEqual(result, expected)

    def test_multiple_and_operators(self):
        # "a and b and c" should parse as "(a and b) and c" (left associative)
        result = parse_search_query("programming and books and streaming")
        expected = _and(_and(_term("programming"), _term("books")), _term("streaming"))
        self.assertEqual(result, expected)

    def test_multiple_or_operators(self):
        # "a or b or c" should parse as "(a or b) or c" (left associative)
        result = parse_search_query("programming or books or streaming")
        expected = _or(_or(_term("programming"), _term("books")), _term("streaming"))
        self.assertEqual(result, expected)

    def test_multiple_not_operators(self):
        result = parse_search_query("not not programming")
        expected = _not(_not(_term("programming")))
        self.assertEqual(result, expected)

    def test_parentheses_basic(self):
        result = parse_search_query("(programming)")
        expected = _term("programming")
        self.assertEqual(result, expected)

    def test_parentheses_change_precedence(self):
        # "(a or b) and c" should parse as "(a or b) and c"
        result = parse_search_query("(programming or books) and streaming")
        expected = _and(_or(_term("programming"), _term("books")), _term("streaming"))
        self.assertEqual(result, expected)

    def test_nested_parentheses(self):
        result = parse_search_query("((programming))")
        expected = _term("programming")
        self.assertEqual(result, expected)

    def test_complex_expression(self):
        result = parse_search_query(
            "programming and (books or streaming) and not client-side"
        )
        # Should be parsed as "(programming and (books or streaming)) and (not client-side)"
        expected = _and(
            _and(_term("programming"), _or(_term("books"), _term("streaming"))),
            _not(_term("client-side")),
        )
        self.assertEqual(result, expected)

    def test_hyphenated_terms(self):
        result = parse_search_query("client-side")
        expected = _term("client-side")
        self.assertEqual(result, expected)

    def test_case_insensitive_operators(self):
        result = parse_search_query("programming AND books OR streaming")
        expected = _or(_and(_term("programming"), _term("books")), _term("streaming"))
        self.assertEqual(result, expected)

        # Test implicit AND with NOT
        result = parse_search_query("programming AND books OR streaming NOT videos")
        expected = _or(
            _and(_term("programming"), _term("books")),
            _and(_term("streaming"), _not(_term("videos"))),
        )
        self.assertEqual(result, expected)

    def test_case_insensitive_operators_with_explicit_operators(self):
        result = parse_search_query("programming AND books OR streaming AND NOT videos")
        # Should parse as: (programming AND books) OR (streaming AND (NOT videos))
        expected = _or(
            _and(_term("programming"), _term("books")),
            _and(_term("streaming"), _not(_term("videos"))),
        )
        self.assertEqual(result, expected)

    def test_single_character_terms(self):
        result = parse_search_query("a and b")
        expected = _and(_term("a"), _term("b"))
        self.assertEqual(result, expected)

    def test_numeric_terms(self):
        result = parse_search_query("123 and 456")
        expected = _and(_term("123"), _term("456"))
        self.assertEqual(result, expected)

    def test_special_characters_in_terms(self):
        result = parse_search_query("test@example.com and file.txt")
        expected = _and(_term("test@example.com"), _term("file.txt"))
        self.assertEqual(result, expected)

    def test_url_terms(self):
        result = parse_search_query("https://example.com/foo/bar")
        expected = _term("https://example.com/foo/bar")
        self.assertEqual(result, expected)

    def test_url_with_operators(self):
        result = parse_search_query("https://github.com or https://gitlab.com")
        expected = _or(_term("https://github.com"), _term("https://gitlab.com"))
        self.assertEqual(result, expected)

    def test_quoted_strings(self):
        # Basic quoted string
        result = parse_search_query('"good and bad"')
        expected = _term("good and bad")
        self.assertEqual(result, expected)

        # Single quotes
        result = parse_search_query("'hello world'")
        expected = _term("hello world")
        self.assertEqual(result, expected)

    def test_quoted_strings_with_operators(self):
        # Quoted string with OR
        result = parse_search_query('"good and bad" or programming')
        expected = _or(_term("good and bad"), _term("programming"))
        self.assertEqual(result, expected)

        # Quoted string with AND
        result = parse_search_query('documentation and "API reference"')
        expected = _and(_term("documentation"), _term("API reference"))
        self.assertEqual(result, expected)

        # Quoted string with NOT
        result = parse_search_query('programming and not "bad practices"')
        expected = _and(_term("programming"), _not(_term("bad practices")))
        self.assertEqual(result, expected)

    def test_multiple_quoted_strings(self):
        result = parse_search_query('"hello world" and "goodbye moon"')
        expected = _and(_term("hello world"), _term("goodbye moon"))
        self.assertEqual(result, expected)

    def test_quoted_strings_with_parentheses(self):
        result = parse_search_query('("good morning" or "good evening") and coffee')
        expected = _and(
            _or(_term("good morning"), _term("good evening")), _term("coffee")
        )
        self.assertEqual(result, expected)

    def test_escaped_quotes_in_terms(self):
        result = parse_search_query('"say \\"hello\\""')
        expected = _term('say "hello"')
        self.assertEqual(result, expected)

    def test_tags(self):
        # Basic tag
        result = parse_search_query("#python")
        expected = _tag("python")
        self.assertEqual(result, expected)

        # Tag with hyphens
        result = parse_search_query("#machine-learning")
        expected = _tag("machine-learning")
        self.assertEqual(result, expected)

    def test_tags_with_operators(self):
        # Tag with AND
        result = parse_search_query("#python and #django")
        expected = _and(_tag("python"), _tag("django"))
        self.assertEqual(result, expected)

        # Tag with OR
        result = parse_search_query("#frontend or #backend")
        expected = _or(_tag("frontend"), _tag("backend"))
        self.assertEqual(result, expected)

        # Tag with NOT
        result = parse_search_query("not #deprecated")
        expected = _not(_tag("deprecated"))
        self.assertEqual(result, expected)

    def test_tags_mixed_with_terms(self):
        result = parse_search_query("programming and #python and tutorial")
        expected = _and(_and(_term("programming"), _tag("python")), _term("tutorial"))
        self.assertEqual(result, expected)

    def test_tags_with_quoted_strings(self):
        result = parse_search_query('"machine learning" and #python')
        expected = _and(_term("machine learning"), _tag("python"))
        self.assertEqual(result, expected)

    def test_tags_with_parentheses(self):
        result = parse_search_query("(#frontend or #backend) and javascript")
        expected = _and(_or(_tag("frontend"), _tag("backend")), _term("javascript"))
        self.assertEqual(result, expected)

    def test_empty_tags_ignored(self):
        # Test single empty tag
        result = parse_search_query("#")
        expected = None  # Empty query
        self.assertEqual(result, expected)

        # Test query that's just an empty tag and whitespace
        result = parse_search_query("# ")
        expected = None  # Empty query
        self.assertEqual(result, expected)

    def test_special_keywords(self):
        result = parse_search_query("!unread")
        expected = _keyword("unread")
        self.assertEqual(result, expected)

        result = parse_search_query("!untagged")
        expected = _keyword("untagged")
        self.assertEqual(result, expected)

    def test_special_keywords_with_operators(self):
        # Special keyword with AND
        result = parse_search_query("!unread and !untagged")
        expected = _and(_keyword("unread"), _keyword("untagged"))
        self.assertEqual(result, expected)

        # Special keyword with OR
        result = parse_search_query("!unread or !untagged")
        expected = _or(_keyword("unread"), _keyword("untagged"))
        self.assertEqual(result, expected)

        # Special keyword with NOT
        result = parse_search_query("not !unread")
        expected = _not(_keyword("unread"))
        self.assertEqual(result, expected)

    def test_special_keywords_mixed_with_terms_and_tags(self):
        result = parse_search_query("!unread and #python and tutorial")
        expected = _and(_and(_keyword("unread"), _tag("python")), _term("tutorial"))
        self.assertEqual(result, expected)

    def test_special_keywords_with_quoted_strings(self):
        result = parse_search_query('"machine learning" and !unread')
        expected = _and(_term("machine learning"), _keyword("unread"))
        self.assertEqual(result, expected)

    def test_special_keywords_with_parentheses(self):
        result = parse_search_query("(!unread or !untagged) and javascript")
        expected = _and(
            _or(_keyword("unread"), _keyword("untagged")), _term("javascript")
        )
        self.assertEqual(result, expected)

    def test_special_keywords_within_quoted_string(self):
        result = parse_search_query("'!unread and !untagged'")
        expected = _term("!unread and !untagged")
        self.assertEqual(result, expected)

    def test_implicit_and_basic(self):
        # Basic implicit AND between terms
        result = parse_search_query("programming book")
        expected = _and(_term("programming"), _term("book"))
        self.assertEqual(result, expected)

        # Three terms with implicit AND
        result = parse_search_query("python machine learning")
        expected = _and(_and(_term("python"), _term("machine")), _term("learning"))
        self.assertEqual(result, expected)

    def test_implicit_and_with_tags(self):
        # Implicit AND between term and tag
        result = parse_search_query("tutorial #python")
        expected = _and(_term("tutorial"), _tag("python"))
        self.assertEqual(result, expected)

        # Implicit AND between tag and term
        result = parse_search_query("#javascript tutorial")
        expected = _and(_tag("javascript"), _term("tutorial"))
        self.assertEqual(result, expected)

        # Multiple tags with implicit AND
        result = parse_search_query("#python #django #tutorial")
        expected = _and(_and(_tag("python"), _tag("django")), _tag("tutorial"))
        self.assertEqual(result, expected)

    def test_implicit_and_with_quoted_strings(self):
        # Implicit AND with quoted strings
        result = parse_search_query('"machine learning" tutorial')
        expected = _and(_term("machine learning"), _term("tutorial"))
        self.assertEqual(result, expected)

        # Mixed types with implicit AND
        result = parse_search_query('"deep learning" #python tutorial')
        expected = _and(_and(_term("deep learning"), _tag("python")), _term("tutorial"))
        self.assertEqual(result, expected)

    def test_implicit_and_with_explicit_operators(self):
        # Mixed implicit and explicit AND
        result = parse_search_query("python tutorial and django")
        expected = _and(_and(_term("python"), _term("tutorial")), _term("django"))
        self.assertEqual(result, expected)

        # Implicit AND with OR
        result = parse_search_query("python tutorial or java guide")
        expected = _or(
            _and(_term("python"), _term("tutorial")),
            _and(_term("java"), _term("guide")),
        )
        self.assertEqual(result, expected)

    def test_implicit_and_with_not(self):
        # NOT with implicit AND
        result = parse_search_query("not deprecated tutorial")
        expected = _and(_not(_term("deprecated")), _term("tutorial"))
        self.assertEqual(result, expected)

        # Implicit AND with NOT at end
        result = parse_search_query("python tutorial not deprecated")
        expected = _and(
            _and(_term("python"), _term("tutorial")), _not(_term("deprecated"))
        )
        self.assertEqual(result, expected)

    def test_implicit_and_with_parentheses(self):
        # Parentheses with implicit AND
        result = parse_search_query("(python tutorial) or java")
        expected = _or(_and(_term("python"), _term("tutorial")), _term("java"))
        self.assertEqual(result, expected)

        # Complex parentheses with implicit AND
        result = parse_search_query(
            "(machine learning #python) and (web development #javascript)"
        )
        expected = _and(
            _and(_and(_term("machine"), _term("learning")), _tag("python")),
            _and(_and(_term("web"), _term("development")), _tag("javascript")),
        )
        self.assertEqual(result, expected)

    def test_complex_precedence_with_implicit_and(self):
        result = parse_search_query("python tutorial or javascript guide")
        expected = _or(
            _and(_term("python"), _term("tutorial")),
            _and(_term("javascript"), _term("guide")),
        )
        self.assertEqual(result, expected)

        result = parse_search_query(
            "machine learning and (python or r) tutorial #beginner"
        )
        expected = _and(
            _and(
                _and(
                    _and(_term("machine"), _term("learning")),
                    _or(_term("python"), _term("r")),
                ),
                _term("tutorial"),
            ),
            _tag("beginner"),
        )
        self.assertEqual(result, expected)

    def test_operator_words_as_substrings(self):
        # Terms that contain operator words as substrings should be treated as terms
        result = parse_search_query("android and notification")
        expected = _and(_term("android"), _term("notification"))
        self.assertEqual(result, expected)

    def test_complex_queries(self):
        test_cases = [
            (
                "(programming or software) and not client-side and (javascript or python)",
                _and(
                    _and(
                        _or(_term("programming"), _term("software")),
                        _not(_term("client-side")),
                    ),
                    _or(_term("javascript"), _term("python")),
                ),
            ),
            (
                "(machine-learning or ai) and python and not deprecated",
                _and(
                    _and(
                        _or(_term("machine-learning"), _term("ai")),
                        _term("python"),
                    ),
                    _not(_term("deprecated")),
                ),
            ),
            (
                "frontend and (react or vue or angular) and not jquery",
                _and(
                    _and(
                        _term("frontend"),
                        _or(
                            _or(_term("react"), _term("vue")),
                            _term("angular"),
                        ),
                    ),
                    _not(_term("jquery")),
                ),
            ),
            (
                '"machine learning" and (python or r) and not "deep learning"',
                _and(
                    _and(
                        _term("machine learning"),
                        _or(_term("python"), _term("r")),
                    ),
                    _not(_term("deep learning")),
                ),
            ),
            (
                "(#python or #javascript) and tutorial and not #deprecated",
                _and(
                    _and(
                        _or(_tag("python"), _tag("javascript")),
                        _term("tutorial"),
                    ),
                    _not(_tag("deprecated")),
                ),
            ),
            (
                "machine learning tutorial #python beginner",
                _and(
                    _and(
                        _and(
                            _and(_term("machine"), _term("learning")), _term("tutorial")
                        ),
                        _tag("python"),
                    ),
                    _term("beginner"),
                ),
            ),
        ]

        for query, expected_ast in test_cases:
            with self.subTest(query=query):
                result = parse_search_query(query)
                self.assertEqual(result, expected_ast, f"Failed for query: {query}")


class SearchQueryParserErrorTest(TestCase):
    def test_unmatched_left_parenthesis(self):
        with self.assertRaises(SearchQueryParseError) as cm:
            parse_search_query("(programming and books")
        self.assertIn("Expected RPAREN", str(cm.exception))

    def test_unmatched_right_parenthesis(self):
        with self.assertRaises(SearchQueryParseError) as cm:
            parse_search_query("programming and books)")
        self.assertIn("Unexpected token", str(cm.exception))

    def test_empty_parentheses(self):
        with self.assertRaises(SearchQueryParseError) as cm:
            parse_search_query("()")
        self.assertIn("Unexpected token RPAREN", str(cm.exception))

    def test_operator_without_operand(self):
        with self.assertRaises(SearchQueryParseError) as cm:
            parse_search_query("and")
        self.assertIn("Unexpected token AND", str(cm.exception))

    def test_trailing_operator(self):
        with self.assertRaises(SearchQueryParseError) as cm:
            parse_search_query("programming and")
        self.assertIn("Unexpected token EOF", str(cm.exception))

    def test_consecutive_operators(self):
        with self.assertRaises(SearchQueryParseError) as cm:
            parse_search_query("programming and or books")
        self.assertIn("Unexpected token OR", str(cm.exception))

    def test_not_without_operand(self):
        with self.assertRaises(SearchQueryParseError) as cm:
            parse_search_query("not")
        self.assertIn("Unexpected token EOF", str(cm.exception))


class ExpressionToStringTest(TestCase):
    def test_simple_term(self):
        expr = _term("python")
        self.assertEqual(expression_to_string(expr), "python")

    def test_simple_tag(self):
        expr = _tag("python")
        self.assertEqual(expression_to_string(expr), "#python")

    def test_simple_keyword(self):
        expr = _keyword("unread")
        self.assertEqual(expression_to_string(expr), "!unread")

    def test_term_with_spaces(self):
        expr = _term("machine learning")
        self.assertEqual(expression_to_string(expr), '"machine learning"')

    def test_term_with_quotes(self):
        expr = _term('say "hello"')
        self.assertEqual(expression_to_string(expr), '"say \\"hello\\""')

    def test_and_expression_implicit(self):
        expr = _and(_term("python"), _term("tutorial"))
        self.assertEqual(expression_to_string(expr), "python tutorial")

    def test_and_expression_with_tags(self):
        expr = _and(_tag("python"), _tag("django"))
        self.assertEqual(expression_to_string(expr), "#python #django")

    def test_and_expression_complex(self):
        expr = _and(_or(_term("python"), _term("ruby")), _term("tutorial"))
        self.assertEqual(expression_to_string(expr), "(python or ruby) tutorial")

    def test_or_expression(self):
        expr = _or(_term("python"), _term("ruby"))
        self.assertEqual(expression_to_string(expr), "python or ruby")

    def test_or_expression_with_and(self):
        expr = _or(_and(_term("python"), _term("tutorial")), _term("ruby"))
        self.assertEqual(expression_to_string(expr), "python tutorial or ruby")

    def test_not_expression(self):
        expr = _not(_term("deprecated"))
        self.assertEqual(expression_to_string(expr), "not deprecated")

    def test_not_with_tag(self):
        expr = _not(_tag("deprecated"))
        self.assertEqual(expression_to_string(expr), "not #deprecated")

    def test_not_with_and(self):
        expr = _not(_and(_term("python"), _term("deprecated")))
        self.assertEqual(expression_to_string(expr), "not (python deprecated)")

    def test_complex_nested_expression(self):
        expr = _and(
            _or(_term("python"), _term("ruby")),
            _or(_term("tutorial"), _term("guide")),
        )
        result = expression_to_string(expr)
        self.assertEqual(result, "(python or ruby) (tutorial or guide)")

    def test_implicit_and_chain(self):
        expr = _and(_and(_term("machine"), _term("learning")), _term("tutorial"))
        self.assertEqual(expression_to_string(expr), "machine learning tutorial")

    def test_none_expression(self):
        self.assertEqual(expression_to_string(None), "")

    def test_round_trip(self):
        test_cases = [
            "#python",
            "python tutorial",
            "#python #django",
            "python or ruby",
            "not deprecated",
            "(python or ruby) and tutorial",
            "tutorial and (python or ruby)",
            "(python or ruby) tutorial",
            "tutorial (python or ruby)",
        ]

        for query in test_cases:
            with self.subTest(query=query):
                ast = parse_search_query(query)
                result = expression_to_string(ast)
                ast2 = parse_search_query(result)
                self.assertEqual(ast, ast2)


class StripTagFromQueryTest(TestCase):
    def test_single_tag(self):
        result = strip_tag_from_query("#books", "books")
        self.assertEqual(result, "")

    def test_tag_with_and(self):
        result = strip_tag_from_query("#history and #books", "books")
        self.assertEqual(result, "#history")

    def test_tag_with_and_not(self):
        result = strip_tag_from_query("#history and not #books", "books")
        self.assertEqual(result, "#history")

    def test_implicit_and_with_term_and_tags(self):
        result = strip_tag_from_query("roman #history #books", "books")
        self.assertEqual(result, "roman #history")

    def test_tag_in_or_expression(self):
        result = strip_tag_from_query("roman and (#history or #books)", "books")
        self.assertEqual(result, "roman #history")

    def test_complex_or_with_and(self):
        result = strip_tag_from_query(
            "(roman and #books) or (greek and #books)", "books"
        )
        self.assertEqual(result, "roman or greek")

    def test_case_insensitive(self):
        result = strip_tag_from_query("#Books and #History", "books")
        self.assertEqual(result, "#History")

    def test_tag_not_present(self):
        result = strip_tag_from_query("#history and #science", "books")
        self.assertEqual(result, "#history #science")

    def test_multiple_same_tags(self):
        result = strip_tag_from_query("#books or #books", "books")
        self.assertEqual(result, "")

    def test_nested_parentheses(self):
        result = strip_tag_from_query("((#books and tutorial) or guide)", "books")
        self.assertEqual(result, "tutorial or guide")

    def test_not_expression_with_tag(self):
        result = strip_tag_from_query("tutorial and not #books", "books")
        self.assertEqual(result, "tutorial")

    def test_only_not_tag(self):
        result = strip_tag_from_query("not #books", "books")
        self.assertEqual(result, "")

    def test_complex_query(self):
        result = strip_tag_from_query(
            "(#python or #ruby) and tutorial and not #books", "books"
        )
        self.assertEqual(result, "(#python or #ruby) tutorial")

    def test_empty_query(self):
        result = strip_tag_from_query("", "books")
        self.assertEqual(result, "")

    def test_whitespace_only(self):
        result = strip_tag_from_query("   ", "books")
        self.assertEqual(result, "")

    def test_special_keywords_preserved(self):
        result = strip_tag_from_query("!unread and #books", "books")
        self.assertEqual(result, "!unread")

    def test_quoted_terms_preserved(self):
        result = strip_tag_from_query('"machine learning" and #books', "books")
        self.assertEqual(result, '"machine learning"')

    def test_all_tags_in_and_chain(self):
        result = strip_tag_from_query("#books and #books and #books", "books")
        self.assertEqual(result, "")

    def test_tag_similar_name(self):
        # Should not remove #book when removing #books
        result = strip_tag_from_query("#book and #books", "books")
        self.assertEqual(result, "#book")

    def test_invalid_query_returns_original(self):
        # If query is malformed, should return original
        result = strip_tag_from_query("(unclosed paren", "books")
        self.assertEqual(result, "(unclosed paren")

    def test_implicit_and_in_output(self):
        result = strip_tag_from_query("python tutorial #books #django", "books")
        self.assertEqual(result, "python tutorial #django")

    def test_nested_or_simplify_parenthesis(self):
        result = strip_tag_from_query(
            "(#books or tutorial) and (#books or guide)", "books"
        )
        self.assertEqual(result, "tutorial guide")

    def test_nested_or_preserve_parenthesis(self):
        result = strip_tag_from_query(
            "(#books or tutorial or guide) and (#books or help or lesson)", "books"
        )
        self.assertEqual(result, "(tutorial or guide) (help or lesson)")

    def test_left_side_removed(self):
        result = strip_tag_from_query("#books and python", "books")
        self.assertEqual(result, "python")

    def test_right_side_removed(self):
        result = strip_tag_from_query("python and #books", "books")
        self.assertEqual(result, "python")


class StripTagFromQueryLaxSearchTest(TestCase):
    def setUp(self):
        self.lax_profile = type(
            "UserProfile", (), {"tag_search": UserProfile.TAG_SEARCH_LAX}
        )()
        self.strict_profile = type(
            "UserProfile", (), {"tag_search": UserProfile.TAG_SEARCH_STRICT}
        )()

    def test_lax_search_removes_matching_term(self):
        result = strip_tag_from_query("books", "books", self.lax_profile)
        self.assertEqual(result, "")

    def test_lax_search_removes_term_case_insensitive(self):
        result = strip_tag_from_query("Books", "books", self.lax_profile)
        self.assertEqual(result, "")

        result = strip_tag_from_query("BOOKS", "books", self.lax_profile)
        self.assertEqual(result, "")

    def test_lax_search_multiple_terms(self):
        result = strip_tag_from_query("books and history", "books", self.lax_profile)
        self.assertEqual(result, "history")

    def test_lax_search_preserves_non_matching_terms(self):
        result = strip_tag_from_query("history and science", "books", self.lax_profile)
        self.assertEqual(result, "history science")

    def test_lax_search_removes_both_tag_and_term(self):
        result = strip_tag_from_query("books #books", "books", self.lax_profile)
        self.assertEqual(result, "")

    def test_lax_search_mixed_tag_and_term(self):
        result = strip_tag_from_query(
            "books and #history and #books", "books", self.lax_profile
        )
        self.assertEqual(result, "#history")

    def test_lax_search_term_in_or_expression(self):
        result = strip_tag_from_query(
            "(books or history) and guide", "books", self.lax_profile
        )
        self.assertEqual(result, "history guide")

    def test_lax_search_term_in_not_expression(self):
        result = strip_tag_from_query(
            "history and not books", "books", self.lax_profile
        )
        self.assertEqual(result, "history")

    def test_lax_search_only_not_term(self):
        result = strip_tag_from_query("not books", "books", self.lax_profile)
        self.assertEqual(result, "")

    def test_lax_search_complex_query(self):
        result = strip_tag_from_query(
            "(books or #books) and (history or guide)", "books", self.lax_profile
        )
        self.assertEqual(result, "history or guide")

    def test_lax_search_quoted_term_with_same_name(self):
        result = strip_tag_from_query('"books" and history', "books", self.lax_profile)
        self.assertEqual(result, "history")

    def test_lax_search_partial_match_not_removed(self):
        result = strip_tag_from_query("bookshelf", "books", self.lax_profile)
        self.assertEqual(result, "bookshelf")

    def test_lax_search_multiple_occurrences(self):
        result = strip_tag_from_query(
            "books or books or history", "books", self.lax_profile
        )
        self.assertEqual(result, "history")

    def test_lax_search_nested_expressions(self):
        result = strip_tag_from_query(
            "((books and tutorial) or guide) and history", "books", self.lax_profile
        )
        self.assertEqual(result, "(tutorial or guide) history")

    def test_strict_search_preserves_terms(self):
        result = strip_tag_from_query("books", "books", self.strict_profile)
        self.assertEqual(result, "books")

    def test_strict_search_preserves_terms_with_tags(self):
        result = strip_tag_from_query("books #books", "books", self.strict_profile)
        self.assertEqual(result, "books")

    def test_no_profile_defaults_to_strict(self):
        result = strip_tag_from_query("books #books", "books", None)
        self.assertEqual(result, "books")


class ExtractTagNamesFromQueryTest(TestCase):
    def test_empty_query(self):
        result = extract_tag_names_from_query("")
        self.assertEqual(result, [])

    def test_whitespace_query(self):
        result = extract_tag_names_from_query("   ")
        self.assertEqual(result, [])

    def test_single_tag(self):
        result = extract_tag_names_from_query("#python")
        self.assertEqual(result, ["python"])

    def test_multiple_tags(self):
        result = extract_tag_names_from_query("#python and #django")
        self.assertEqual(result, ["django", "python"])

    def test_tags_with_or(self):
        result = extract_tag_names_from_query("#python or #ruby")
        self.assertEqual(result, ["python", "ruby"])

    def test_tags_with_not(self):
        result = extract_tag_names_from_query("not #deprecated")
        self.assertEqual(result, ["deprecated"])

    def test_tags_in_complex_query(self):
        result = extract_tag_names_from_query(
            "(#python or #ruby) and #tutorial and not #deprecated"
        )
        self.assertEqual(result, ["deprecated", "python", "ruby", "tutorial"])

    def test_duplicate_tags(self):
        result = extract_tag_names_from_query("#python and #python")
        self.assertEqual(result, ["python"])

    def test_case_insensitive_deduplication(self):
        result = extract_tag_names_from_query("#Python and #PYTHON and #python")
        self.assertEqual(result, ["python"])

    def test_mixed_tags_and_terms(self):
        result = extract_tag_names_from_query("tutorial #python guide #django")
        self.assertEqual(result, ["django", "python"])

    def test_only_terms_no_tags(self):
        result = extract_tag_names_from_query("tutorial guide")
        self.assertEqual(result, [])

    def test_special_keywords_not_extracted(self):
        result = extract_tag_names_from_query("!unread and #python")
        self.assertEqual(result, ["python"])

    def test_tags_in_nested_parentheses(self):
        result = extract_tag_names_from_query("((#python and #django) or #ruby)")
        self.assertEqual(result, ["django", "python", "ruby"])

    def test_invalid_query_returns_empty(self):
        result = extract_tag_names_from_query("(unclosed paren")
        self.assertEqual(result, [])

    def test_tags_with_hyphens(self):
        result = extract_tag_names_from_query("#machine-learning and #deep-learning")
        self.assertEqual(result, ["deep-learning", "machine-learning"])


class ExtractTagNamesFromQueryLaxSearchTest(TestCase):
    def setUp(self):
        self.lax_profile = type(
            "UserProfile", (), {"tag_search": UserProfile.TAG_SEARCH_LAX}
        )()
        self.strict_profile = type(
            "UserProfile", (), {"tag_search": UserProfile.TAG_SEARCH_STRICT}
        )()

    def test_lax_search_extracts_terms(self):
        result = extract_tag_names_from_query("python and django", self.lax_profile)
        self.assertEqual(result, ["django", "python"])

    def test_lax_search_mixed_tags_and_terms(self):
        result = extract_tag_names_from_query(
            "tutorial #python guide #django", self.lax_profile
        )
        self.assertEqual(result, ["django", "guide", "python", "tutorial"])

    def test_lax_search_deduplicates_tags_and_terms(self):
        result = extract_tag_names_from_query("python #python", self.lax_profile)
        self.assertEqual(result, ["python"])

    def test_lax_search_case_insensitive_dedup(self):
        result = extract_tag_names_from_query("Python #python PYTHON", self.lax_profile)
        self.assertEqual(result, ["python"])

    def test_lax_search_terms_in_or_expression(self):
        result = extract_tag_names_from_query(
            "(python or ruby) and tutorial", self.lax_profile
        )
        self.assertEqual(result, ["python", "ruby", "tutorial"])

    def test_lax_search_terms_in_not_expression(self):
        result = extract_tag_names_from_query(
            "tutorial and not deprecated", self.lax_profile
        )
        self.assertEqual(result, ["deprecated", "tutorial"])

    def test_lax_search_quoted_terms(self):
        result = extract_tag_names_from_query(
            '"machine learning" and #python', self.lax_profile
        )
        self.assertEqual(result, ["machine learning", "python"])

    def test_lax_search_complex_query(self):
        result = extract_tag_names_from_query(
            "(python or #ruby) and tutorial and not #deprecated", self.lax_profile
        )
        self.assertEqual(result, ["deprecated", "python", "ruby", "tutorial"])

    def test_lax_search_special_keywords_not_extracted(self):
        result = extract_tag_names_from_query(
            "!unread and python and #django", self.lax_profile
        )
        self.assertEqual(result, ["django", "python"])

    def test_strict_search_ignores_terms(self):
        result = extract_tag_names_from_query("python and django", self.strict_profile)
        self.assertEqual(result, [])

    def test_strict_search_only_tags(self):
        result = extract_tag_names_from_query(
            "tutorial #python guide #django", self.strict_profile
        )
        self.assertEqual(result, ["django", "python"])

    def test_no_profile_defaults_to_strict(self):
        result = extract_tag_names_from_query("python #django", None)
        self.assertEqual(result, ["django"])