mirror of
https://github.com/sissbruecker/linkding.git
synced 2025-11-18 12:04:01 +01:00
* parser implementation * add support for quoted strings * add support for tags * ignore empty tags * implicit and * prepare query conversion by disabling tests * convert query logic * fix nested combined tag searches * simplify query logic * Add special keyword support to parser * Add special keyword support to query builder * Handle invalid queries in query builder * Notify user about invalid queries * Add helper to strip tags from search query * Make tag cloud show all tags from search query * Use new method for extracting tags * Add query for getting tags from search query * Get selected tags through specific context * Properly remove selected tags from complex queries * cleanup * Clarify bundle search terms * Add documentation draft * Improve adding tags to search query * Add option to switch back to the old search
1278 lines
50 KiB
Python
1278 lines
50 KiB
Python
from django.test import TestCase
|
|
|
|
from bookmarks.services.search_query_parser import (
|
|
SearchQueryTokenizer,
|
|
TokenType,
|
|
SearchExpression,
|
|
TermExpression,
|
|
TagExpression,
|
|
SpecialKeywordExpression,
|
|
AndExpression,
|
|
OrExpression,
|
|
NotExpression,
|
|
SearchQueryParseError,
|
|
parse_search_query,
|
|
expression_to_string,
|
|
strip_tag_from_query,
|
|
extract_tag_names_from_query,
|
|
)
|
|
from bookmarks.models import UserProfile
|
|
|
|
|
|
def _term(term: str) -> TermExpression:
|
|
return TermExpression(term)
|
|
|
|
|
|
def _tag(tag: str) -> TagExpression:
|
|
return TagExpression(tag)
|
|
|
|
|
|
def _and(left: SearchExpression, right: SearchExpression) -> AndExpression:
|
|
return AndExpression(left, right)
|
|
|
|
|
|
def _or(left: SearchExpression, right: SearchExpression) -> OrExpression:
|
|
return OrExpression(left, right)
|
|
|
|
|
|
def _not(operand: SearchExpression) -> NotExpression:
|
|
return NotExpression(operand)
|
|
|
|
|
|
def _keyword(keyword: str) -> SpecialKeywordExpression:
|
|
return SpecialKeywordExpression(keyword)
|
|
|
|
|
|
class SearchQueryTokenizerTest(TestCase):
|
|
def test_empty_query(self):
|
|
tokenizer = SearchQueryTokenizer("")
|
|
tokens = tokenizer.tokenize()
|
|
self.assertEqual(len(tokens), 1)
|
|
self.assertEqual(tokens[0].type, TokenType.EOF)
|
|
|
|
def test_whitespace_only_query(self):
|
|
tokenizer = SearchQueryTokenizer(" ")
|
|
tokens = tokenizer.tokenize()
|
|
self.assertEqual(len(tokens), 1)
|
|
self.assertEqual(tokens[0].type, TokenType.EOF)
|
|
|
|
def test_single_term(self):
|
|
tokenizer = SearchQueryTokenizer("programming")
|
|
tokens = tokenizer.tokenize()
|
|
self.assertEqual(len(tokens), 2)
|
|
self.assertEqual(tokens[0].type, TokenType.TERM)
|
|
self.assertEqual(tokens[0].value, "programming")
|
|
self.assertEqual(tokens[1].type, TokenType.EOF)
|
|
|
|
def test_multiple_terms(self):
|
|
tokenizer = SearchQueryTokenizer("programming books streaming")
|
|
tokens = tokenizer.tokenize()
|
|
self.assertEqual(len(tokens), 4)
|
|
self.assertEqual(tokens[0].type, TokenType.TERM)
|
|
self.assertEqual(tokens[0].value, "programming")
|
|
self.assertEqual(tokens[1].type, TokenType.TERM)
|
|
self.assertEqual(tokens[1].value, "books")
|
|
self.assertEqual(tokens[2].type, TokenType.TERM)
|
|
self.assertEqual(tokens[2].value, "streaming")
|
|
self.assertEqual(tokens[3].type, TokenType.EOF)
|
|
|
|
def test_hyphenated_term(self):
|
|
tokenizer = SearchQueryTokenizer("client-side")
|
|
tokens = tokenizer.tokenize()
|
|
self.assertEqual(len(tokens), 2)
|
|
self.assertEqual(tokens[0].type, TokenType.TERM)
|
|
self.assertEqual(tokens[0].value, "client-side")
|
|
self.assertEqual(tokens[1].type, TokenType.EOF)
|
|
|
|
def test_and_operator(self):
|
|
tokenizer = SearchQueryTokenizer("programming and books")
|
|
tokens = tokenizer.tokenize()
|
|
self.assertEqual(len(tokens), 4)
|
|
self.assertEqual(tokens[0].type, TokenType.TERM)
|
|
self.assertEqual(tokens[0].value, "programming")
|
|
self.assertEqual(tokens[1].type, TokenType.AND)
|
|
self.assertEqual(tokens[1].value, "and")
|
|
self.assertEqual(tokens[2].type, TokenType.TERM)
|
|
self.assertEqual(tokens[2].value, "books")
|
|
self.assertEqual(tokens[3].type, TokenType.EOF)
|
|
|
|
def test_or_operator(self):
|
|
tokenizer = SearchQueryTokenizer("programming or books")
|
|
tokens = tokenizer.tokenize()
|
|
self.assertEqual(len(tokens), 4)
|
|
self.assertEqual(tokens[0].type, TokenType.TERM)
|
|
self.assertEqual(tokens[0].value, "programming")
|
|
self.assertEqual(tokens[1].type, TokenType.OR)
|
|
self.assertEqual(tokens[1].value, "or")
|
|
self.assertEqual(tokens[2].type, TokenType.TERM)
|
|
self.assertEqual(tokens[2].value, "books")
|
|
self.assertEqual(tokens[3].type, TokenType.EOF)
|
|
|
|
def test_not_operator(self):
|
|
tokenizer = SearchQueryTokenizer("programming not books")
|
|
tokens = tokenizer.tokenize()
|
|
self.assertEqual(len(tokens), 4)
|
|
self.assertEqual(tokens[0].type, TokenType.TERM)
|
|
self.assertEqual(tokens[0].value, "programming")
|
|
self.assertEqual(tokens[1].type, TokenType.NOT)
|
|
self.assertEqual(tokens[1].value, "not")
|
|
self.assertEqual(tokens[2].type, TokenType.TERM)
|
|
self.assertEqual(tokens[2].value, "books")
|
|
self.assertEqual(tokens[3].type, TokenType.EOF)
|
|
|
|
def test_case_insensitive_operators(self):
|
|
tokenizer = SearchQueryTokenizer(
|
|
"programming AND books OR streaming NOT videos"
|
|
)
|
|
tokens = tokenizer.tokenize()
|
|
self.assertEqual(len(tokens), 8)
|
|
self.assertEqual(tokens[1].type, TokenType.AND)
|
|
self.assertEqual(tokens[3].type, TokenType.OR)
|
|
self.assertEqual(tokens[5].type, TokenType.NOT)
|
|
|
|
def test_parentheses(self):
|
|
tokenizer = SearchQueryTokenizer("(programming or books) and streaming")
|
|
tokens = tokenizer.tokenize()
|
|
self.assertEqual(len(tokens), 8)
|
|
self.assertEqual(tokens[0].type, TokenType.LPAREN)
|
|
self.assertEqual(tokens[1].type, TokenType.TERM)
|
|
self.assertEqual(tokens[1].value, "programming")
|
|
self.assertEqual(tokens[2].type, TokenType.OR)
|
|
self.assertEqual(tokens[3].type, TokenType.TERM)
|
|
self.assertEqual(tokens[3].value, "books")
|
|
self.assertEqual(tokens[4].type, TokenType.RPAREN)
|
|
self.assertEqual(tokens[5].type, TokenType.AND)
|
|
self.assertEqual(tokens[6].type, TokenType.TERM)
|
|
self.assertEqual(tokens[6].value, "streaming")
|
|
self.assertEqual(tokens[7].type, TokenType.EOF)
|
|
|
|
def test_operator_as_part_of_term(self):
|
|
# Terms containing operator words should be treated as terms
|
|
tokenizer = SearchQueryTokenizer("android notarization")
|
|
tokens = tokenizer.tokenize()
|
|
self.assertEqual(len(tokens), 3)
|
|
self.assertEqual(tokens[0].type, TokenType.TERM)
|
|
self.assertEqual(tokens[0].value, "android")
|
|
self.assertEqual(tokens[1].type, TokenType.TERM)
|
|
self.assertEqual(tokens[1].value, "notarization")
|
|
self.assertEqual(tokens[2].type, TokenType.EOF)
|
|
|
|
def test_extra_whitespace(self):
|
|
tokenizer = SearchQueryTokenizer(" programming and books ")
|
|
tokens = tokenizer.tokenize()
|
|
self.assertEqual(len(tokens), 4)
|
|
self.assertEqual(tokens[0].type, TokenType.TERM)
|
|
self.assertEqual(tokens[0].value, "programming")
|
|
self.assertEqual(tokens[1].type, TokenType.AND)
|
|
self.assertEqual(tokens[2].type, TokenType.TERM)
|
|
self.assertEqual(tokens[2].value, "books")
|
|
self.assertEqual(tokens[3].type, TokenType.EOF)
|
|
|
|
def test_quoted_strings(self):
|
|
# Double quotes
|
|
tokenizer = SearchQueryTokenizer('"good and bad"')
|
|
tokens = tokenizer.tokenize()
|
|
self.assertEqual(len(tokens), 2)
|
|
self.assertEqual(tokens[0].type, TokenType.TERM)
|
|
self.assertEqual(tokens[0].value, "good and bad")
|
|
self.assertEqual(tokens[1].type, TokenType.EOF)
|
|
|
|
# Single quotes
|
|
tokenizer = SearchQueryTokenizer("'hello world'")
|
|
tokens = tokenizer.tokenize()
|
|
self.assertEqual(len(tokens), 2)
|
|
self.assertEqual(tokens[0].type, TokenType.TERM)
|
|
self.assertEqual(tokens[0].value, "hello world")
|
|
self.assertEqual(tokens[1].type, TokenType.EOF)
|
|
|
|
def test_quoted_strings_with_operators(self):
|
|
tokenizer = SearchQueryTokenizer('"good and bad" or programming')
|
|
tokens = tokenizer.tokenize()
|
|
self.assertEqual(len(tokens), 4)
|
|
self.assertEqual(tokens[0].type, TokenType.TERM)
|
|
self.assertEqual(tokens[0].value, "good and bad")
|
|
self.assertEqual(tokens[1].type, TokenType.OR)
|
|
self.assertEqual(tokens[2].type, TokenType.TERM)
|
|
self.assertEqual(tokens[2].value, "programming")
|
|
self.assertEqual(tokens[3].type, TokenType.EOF)
|
|
|
|
def test_escaped_quotes(self):
|
|
# Escaped double quote within double quotes
|
|
tokenizer = SearchQueryTokenizer('"say \\"hello\\""')
|
|
tokens = tokenizer.tokenize()
|
|
self.assertEqual(len(tokens), 2)
|
|
self.assertEqual(tokens[0].type, TokenType.TERM)
|
|
self.assertEqual(tokens[0].value, 'say "hello"')
|
|
self.assertEqual(tokens[1].type, TokenType.EOF)
|
|
|
|
# Escaped single quote within single quotes
|
|
tokenizer = SearchQueryTokenizer("'don\\'t worry'")
|
|
tokens = tokenizer.tokenize()
|
|
self.assertEqual(len(tokens), 2)
|
|
self.assertEqual(tokens[0].type, TokenType.TERM)
|
|
self.assertEqual(tokens[0].value, "don't worry")
|
|
self.assertEqual(tokens[1].type, TokenType.EOF)
|
|
|
|
def test_unclosed_quotes(self):
|
|
# Unclosed quote should be handled gracefully
|
|
tokenizer = SearchQueryTokenizer('"unclosed quote')
|
|
tokens = tokenizer.tokenize()
|
|
self.assertEqual(len(tokens), 2)
|
|
self.assertEqual(tokens[0].type, TokenType.TERM)
|
|
self.assertEqual(tokens[0].value, "unclosed quote")
|
|
self.assertEqual(tokens[1].type, TokenType.EOF)
|
|
|
|
def test_tags(self):
|
|
# Basic tag
|
|
tokenizer = SearchQueryTokenizer("#python")
|
|
tokens = tokenizer.tokenize()
|
|
self.assertEqual(len(tokens), 2)
|
|
self.assertEqual(tokens[0].type, TokenType.TAG)
|
|
self.assertEqual(tokens[0].value, "python")
|
|
self.assertEqual(tokens[1].type, TokenType.EOF)
|
|
|
|
# Tag with hyphens
|
|
tokenizer = SearchQueryTokenizer("#machine-learning")
|
|
tokens = tokenizer.tokenize()
|
|
self.assertEqual(len(tokens), 2)
|
|
self.assertEqual(tokens[0].type, TokenType.TAG)
|
|
self.assertEqual(tokens[0].value, "machine-learning")
|
|
self.assertEqual(tokens[1].type, TokenType.EOF)
|
|
|
|
def test_tags_with_operators(self):
|
|
tokenizer = SearchQueryTokenizer("#python and #django")
|
|
tokens = tokenizer.tokenize()
|
|
self.assertEqual(len(tokens), 4)
|
|
self.assertEqual(tokens[0].type, TokenType.TAG)
|
|
self.assertEqual(tokens[0].value, "python")
|
|
self.assertEqual(tokens[1].type, TokenType.AND)
|
|
self.assertEqual(tokens[2].type, TokenType.TAG)
|
|
self.assertEqual(tokens[2].value, "django")
|
|
self.assertEqual(tokens[3].type, TokenType.EOF)
|
|
|
|
def test_tags_mixed_with_terms(self):
|
|
tokenizer = SearchQueryTokenizer("programming and #python and web")
|
|
tokens = tokenizer.tokenize()
|
|
self.assertEqual(len(tokens), 6)
|
|
self.assertEqual(tokens[0].type, TokenType.TERM)
|
|
self.assertEqual(tokens[0].value, "programming")
|
|
self.assertEqual(tokens[1].type, TokenType.AND)
|
|
self.assertEqual(tokens[2].type, TokenType.TAG)
|
|
self.assertEqual(tokens[2].value, "python")
|
|
self.assertEqual(tokens[3].type, TokenType.AND)
|
|
self.assertEqual(tokens[4].type, TokenType.TERM)
|
|
self.assertEqual(tokens[4].value, "web")
|
|
self.assertEqual(tokens[5].type, TokenType.EOF)
|
|
|
|
def test_empty_tag(self):
|
|
# Tag with just # should be ignored (no token created)
|
|
tokenizer = SearchQueryTokenizer("# ")
|
|
tokens = tokenizer.tokenize()
|
|
self.assertEqual(len(tokens), 1)
|
|
self.assertEqual(tokens[0].type, TokenType.EOF)
|
|
|
|
# Empty tag at end of string
|
|
tokenizer = SearchQueryTokenizer("#")
|
|
tokens = tokenizer.tokenize()
|
|
self.assertEqual(len(tokens), 1)
|
|
self.assertEqual(tokens[0].type, TokenType.EOF)
|
|
|
|
# Empty tag mixed with other terms
|
|
tokenizer = SearchQueryTokenizer("python # and django")
|
|
tokens = tokenizer.tokenize()
|
|
self.assertEqual(len(tokens), 4)
|
|
self.assertEqual(tokens[0].type, TokenType.TERM)
|
|
self.assertEqual(tokens[0].value, "python")
|
|
self.assertEqual(tokens[1].type, TokenType.AND)
|
|
self.assertEqual(tokens[2].type, TokenType.TERM)
|
|
self.assertEqual(tokens[2].value, "django")
|
|
self.assertEqual(tokens[3].type, TokenType.EOF)
|
|
|
|
def test_special_keywords(self):
|
|
tokenizer = SearchQueryTokenizer("!unread")
|
|
tokens = tokenizer.tokenize()
|
|
self.assertEqual(len(tokens), 2)
|
|
self.assertEqual(tokens[0].type, TokenType.SPECIAL_KEYWORD)
|
|
self.assertEqual(tokens[0].value, "unread")
|
|
self.assertEqual(tokens[1].type, TokenType.EOF)
|
|
|
|
tokenizer = SearchQueryTokenizer("!untagged")
|
|
tokens = tokenizer.tokenize()
|
|
self.assertEqual(len(tokens), 2)
|
|
self.assertEqual(tokens[0].type, TokenType.SPECIAL_KEYWORD)
|
|
self.assertEqual(tokens[0].value, "untagged")
|
|
self.assertEqual(tokens[1].type, TokenType.EOF)
|
|
|
|
def test_special_keywords_with_operators(self):
|
|
tokenizer = SearchQueryTokenizer("!unread and !untagged")
|
|
tokens = tokenizer.tokenize()
|
|
self.assertEqual(len(tokens), 4)
|
|
self.assertEqual(tokens[0].type, TokenType.SPECIAL_KEYWORD)
|
|
self.assertEqual(tokens[0].value, "unread")
|
|
self.assertEqual(tokens[1].type, TokenType.AND)
|
|
self.assertEqual(tokens[2].type, TokenType.SPECIAL_KEYWORD)
|
|
self.assertEqual(tokens[2].value, "untagged")
|
|
self.assertEqual(tokens[3].type, TokenType.EOF)
|
|
|
|
def test_special_keywords_mixed_with_terms_and_tags(self):
|
|
tokenizer = SearchQueryTokenizer("!unread and #python and tutorial")
|
|
tokens = tokenizer.tokenize()
|
|
self.assertEqual(len(tokens), 6)
|
|
self.assertEqual(tokens[0].type, TokenType.SPECIAL_KEYWORD)
|
|
self.assertEqual(tokens[0].value, "unread")
|
|
self.assertEqual(tokens[1].type, TokenType.AND)
|
|
self.assertEqual(tokens[2].type, TokenType.TAG)
|
|
self.assertEqual(tokens[2].value, "python")
|
|
self.assertEqual(tokens[3].type, TokenType.AND)
|
|
self.assertEqual(tokens[4].type, TokenType.TERM)
|
|
self.assertEqual(tokens[4].value, "tutorial")
|
|
self.assertEqual(tokens[5].type, TokenType.EOF)
|
|
|
|
def test_empty_special_keyword(self):
|
|
# Special keyword with just ! should be ignored (no token created)
|
|
tokenizer = SearchQueryTokenizer("! ")
|
|
tokens = tokenizer.tokenize()
|
|
self.assertEqual(len(tokens), 1)
|
|
self.assertEqual(tokens[0].type, TokenType.EOF)
|
|
|
|
# Empty special keyword at end of string
|
|
tokenizer = SearchQueryTokenizer("!")
|
|
tokens = tokenizer.tokenize()
|
|
self.assertEqual(len(tokens), 1)
|
|
self.assertEqual(tokens[0].type, TokenType.EOF)
|
|
|
|
|
|
class SearchQueryParserTest(TestCase):
|
|
"""Test cases for the search query parser."""
|
|
|
|
def test_empty_query(self):
|
|
result = parse_search_query("")
|
|
self.assertIsNone(result)
|
|
|
|
def test_whitespace_only_query(self):
|
|
result = parse_search_query(" ")
|
|
self.assertIsNone(result)
|
|
|
|
def test_single_term(self):
|
|
result = parse_search_query("programming")
|
|
expected = _term("programming")
|
|
self.assertEqual(result, expected)
|
|
|
|
def test_and_expression(self):
|
|
result = parse_search_query("programming and books")
|
|
expected = _and(_term("programming"), _term("books"))
|
|
self.assertEqual(result, expected)
|
|
|
|
def test_or_expression(self):
|
|
result = parse_search_query("programming or books")
|
|
expected = _or(_term("programming"), _term("books"))
|
|
self.assertEqual(result, expected)
|
|
|
|
def test_not_expression(self):
|
|
result = parse_search_query("not programming")
|
|
expected = _not(_term("programming"))
|
|
self.assertEqual(result, expected)
|
|
|
|
def test_operator_precedence_and_over_or(self):
|
|
# "a or b and c" should parse as "a or (b and c)"
|
|
result = parse_search_query("programming or books and streaming")
|
|
expected = _or(_term("programming"), _and(_term("books"), _term("streaming")))
|
|
self.assertEqual(result, expected)
|
|
|
|
def test_operator_precedence_not_over_and(self):
|
|
# "not a and b" should parse as "(not a) and b"
|
|
result = parse_search_query("not programming and books")
|
|
expected = _and(_not(_term("programming")), _term("books"))
|
|
self.assertEqual(result, expected)
|
|
|
|
def test_multiple_and_operators(self):
|
|
# "a and b and c" should parse as "(a and b) and c" (left associative)
|
|
result = parse_search_query("programming and books and streaming")
|
|
expected = _and(_and(_term("programming"), _term("books")), _term("streaming"))
|
|
self.assertEqual(result, expected)
|
|
|
|
def test_multiple_or_operators(self):
|
|
# "a or b or c" should parse as "(a or b) or c" (left associative)
|
|
result = parse_search_query("programming or books or streaming")
|
|
expected = _or(_or(_term("programming"), _term("books")), _term("streaming"))
|
|
self.assertEqual(result, expected)
|
|
|
|
def test_multiple_not_operators(self):
|
|
result = parse_search_query("not not programming")
|
|
expected = _not(_not(_term("programming")))
|
|
self.assertEqual(result, expected)
|
|
|
|
def test_parentheses_basic(self):
|
|
result = parse_search_query("(programming)")
|
|
expected = _term("programming")
|
|
self.assertEqual(result, expected)
|
|
|
|
def test_parentheses_change_precedence(self):
|
|
# "(a or b) and c" should parse as "(a or b) and c"
|
|
result = parse_search_query("(programming or books) and streaming")
|
|
expected = _and(_or(_term("programming"), _term("books")), _term("streaming"))
|
|
self.assertEqual(result, expected)
|
|
|
|
def test_nested_parentheses(self):
|
|
result = parse_search_query("((programming))")
|
|
expected = _term("programming")
|
|
self.assertEqual(result, expected)
|
|
|
|
def test_complex_expression(self):
|
|
result = parse_search_query(
|
|
"programming and (books or streaming) and not client-side"
|
|
)
|
|
# Should be parsed as "(programming and (books or streaming)) and (not client-side)"
|
|
expected = _and(
|
|
_and(_term("programming"), _or(_term("books"), _term("streaming"))),
|
|
_not(_term("client-side")),
|
|
)
|
|
self.assertEqual(result, expected)
|
|
|
|
def test_hyphenated_terms(self):
|
|
result = parse_search_query("client-side")
|
|
expected = _term("client-side")
|
|
self.assertEqual(result, expected)
|
|
|
|
def test_case_insensitive_operators(self):
|
|
result = parse_search_query("programming AND books OR streaming")
|
|
expected = _or(_and(_term("programming"), _term("books")), _term("streaming"))
|
|
self.assertEqual(result, expected)
|
|
|
|
# Test implicit AND with NOT
|
|
result = parse_search_query("programming AND books OR streaming NOT videos")
|
|
expected = _or(
|
|
_and(_term("programming"), _term("books")),
|
|
_and(_term("streaming"), _not(_term("videos"))),
|
|
)
|
|
self.assertEqual(result, expected)
|
|
|
|
def test_case_insensitive_operators_with_explicit_operators(self):
|
|
result = parse_search_query("programming AND books OR streaming AND NOT videos")
|
|
# Should parse as: (programming AND books) OR (streaming AND (NOT videos))
|
|
expected = _or(
|
|
_and(_term("programming"), _term("books")),
|
|
_and(_term("streaming"), _not(_term("videos"))),
|
|
)
|
|
self.assertEqual(result, expected)
|
|
|
|
def test_single_character_terms(self):
|
|
result = parse_search_query("a and b")
|
|
expected = _and(_term("a"), _term("b"))
|
|
self.assertEqual(result, expected)
|
|
|
|
def test_numeric_terms(self):
|
|
result = parse_search_query("123 and 456")
|
|
expected = _and(_term("123"), _term("456"))
|
|
self.assertEqual(result, expected)
|
|
|
|
def test_special_characters_in_terms(self):
|
|
result = parse_search_query("test@example.com and file.txt")
|
|
expected = _and(_term("test@example.com"), _term("file.txt"))
|
|
self.assertEqual(result, expected)
|
|
|
|
def test_url_terms(self):
|
|
result = parse_search_query("https://example.com/foo/bar")
|
|
expected = _term("https://example.com/foo/bar")
|
|
self.assertEqual(result, expected)
|
|
|
|
def test_url_with_operators(self):
|
|
result = parse_search_query("https://github.com or https://gitlab.com")
|
|
expected = _or(_term("https://github.com"), _term("https://gitlab.com"))
|
|
self.assertEqual(result, expected)
|
|
|
|
def test_quoted_strings(self):
|
|
# Basic quoted string
|
|
result = parse_search_query('"good and bad"')
|
|
expected = _term("good and bad")
|
|
self.assertEqual(result, expected)
|
|
|
|
# Single quotes
|
|
result = parse_search_query("'hello world'")
|
|
expected = _term("hello world")
|
|
self.assertEqual(result, expected)
|
|
|
|
def test_quoted_strings_with_operators(self):
|
|
# Quoted string with OR
|
|
result = parse_search_query('"good and bad" or programming')
|
|
expected = _or(_term("good and bad"), _term("programming"))
|
|
self.assertEqual(result, expected)
|
|
|
|
# Quoted string with AND
|
|
result = parse_search_query('documentation and "API reference"')
|
|
expected = _and(_term("documentation"), _term("API reference"))
|
|
self.assertEqual(result, expected)
|
|
|
|
# Quoted string with NOT
|
|
result = parse_search_query('programming and not "bad practices"')
|
|
expected = _and(_term("programming"), _not(_term("bad practices")))
|
|
self.assertEqual(result, expected)
|
|
|
|
def test_multiple_quoted_strings(self):
|
|
result = parse_search_query('"hello world" and "goodbye moon"')
|
|
expected = _and(_term("hello world"), _term("goodbye moon"))
|
|
self.assertEqual(result, expected)
|
|
|
|
def test_quoted_strings_with_parentheses(self):
|
|
result = parse_search_query('("good morning" or "good evening") and coffee')
|
|
expected = _and(
|
|
_or(_term("good morning"), _term("good evening")), _term("coffee")
|
|
)
|
|
self.assertEqual(result, expected)
|
|
|
|
def test_escaped_quotes_in_terms(self):
|
|
result = parse_search_query('"say \\"hello\\""')
|
|
expected = _term('say "hello"')
|
|
self.assertEqual(result, expected)
|
|
|
|
def test_tags(self):
|
|
# Basic tag
|
|
result = parse_search_query("#python")
|
|
expected = _tag("python")
|
|
self.assertEqual(result, expected)
|
|
|
|
# Tag with hyphens
|
|
result = parse_search_query("#machine-learning")
|
|
expected = _tag("machine-learning")
|
|
self.assertEqual(result, expected)
|
|
|
|
def test_tags_with_operators(self):
|
|
# Tag with AND
|
|
result = parse_search_query("#python and #django")
|
|
expected = _and(_tag("python"), _tag("django"))
|
|
self.assertEqual(result, expected)
|
|
|
|
# Tag with OR
|
|
result = parse_search_query("#frontend or #backend")
|
|
expected = _or(_tag("frontend"), _tag("backend"))
|
|
self.assertEqual(result, expected)
|
|
|
|
# Tag with NOT
|
|
result = parse_search_query("not #deprecated")
|
|
expected = _not(_tag("deprecated"))
|
|
self.assertEqual(result, expected)
|
|
|
|
def test_tags_mixed_with_terms(self):
|
|
result = parse_search_query("programming and #python and tutorial")
|
|
expected = _and(_and(_term("programming"), _tag("python")), _term("tutorial"))
|
|
self.assertEqual(result, expected)
|
|
|
|
def test_tags_with_quoted_strings(self):
|
|
result = parse_search_query('"machine learning" and #python')
|
|
expected = _and(_term("machine learning"), _tag("python"))
|
|
self.assertEqual(result, expected)
|
|
|
|
def test_tags_with_parentheses(self):
|
|
result = parse_search_query("(#frontend or #backend) and javascript")
|
|
expected = _and(_or(_tag("frontend"), _tag("backend")), _term("javascript"))
|
|
self.assertEqual(result, expected)
|
|
|
|
def test_empty_tags_ignored(self):
|
|
# Test single empty tag
|
|
result = parse_search_query("#")
|
|
expected = None # Empty query
|
|
self.assertEqual(result, expected)
|
|
|
|
# Test query that's just an empty tag and whitespace
|
|
result = parse_search_query("# ")
|
|
expected = None # Empty query
|
|
self.assertEqual(result, expected)
|
|
|
|
def test_special_keywords(self):
|
|
result = parse_search_query("!unread")
|
|
expected = _keyword("unread")
|
|
self.assertEqual(result, expected)
|
|
|
|
result = parse_search_query("!untagged")
|
|
expected = _keyword("untagged")
|
|
self.assertEqual(result, expected)
|
|
|
|
def test_special_keywords_with_operators(self):
|
|
# Special keyword with AND
|
|
result = parse_search_query("!unread and !untagged")
|
|
expected = _and(_keyword("unread"), _keyword("untagged"))
|
|
self.assertEqual(result, expected)
|
|
|
|
# Special keyword with OR
|
|
result = parse_search_query("!unread or !untagged")
|
|
expected = _or(_keyword("unread"), _keyword("untagged"))
|
|
self.assertEqual(result, expected)
|
|
|
|
# Special keyword with NOT
|
|
result = parse_search_query("not !unread")
|
|
expected = _not(_keyword("unread"))
|
|
self.assertEqual(result, expected)
|
|
|
|
def test_special_keywords_mixed_with_terms_and_tags(self):
|
|
result = parse_search_query("!unread and #python and tutorial")
|
|
expected = _and(_and(_keyword("unread"), _tag("python")), _term("tutorial"))
|
|
self.assertEqual(result, expected)
|
|
|
|
def test_special_keywords_with_quoted_strings(self):
|
|
result = parse_search_query('"machine learning" and !unread')
|
|
expected = _and(_term("machine learning"), _keyword("unread"))
|
|
self.assertEqual(result, expected)
|
|
|
|
def test_special_keywords_with_parentheses(self):
|
|
result = parse_search_query("(!unread or !untagged) and javascript")
|
|
expected = _and(
|
|
_or(_keyword("unread"), _keyword("untagged")), _term("javascript")
|
|
)
|
|
self.assertEqual(result, expected)
|
|
|
|
def test_special_keywords_within_quoted_string(self):
|
|
result = parse_search_query("'!unread and !untagged'")
|
|
expected = _term("!unread and !untagged")
|
|
self.assertEqual(result, expected)
|
|
|
|
def test_implicit_and_basic(self):
|
|
# Basic implicit AND between terms
|
|
result = parse_search_query("programming book")
|
|
expected = _and(_term("programming"), _term("book"))
|
|
self.assertEqual(result, expected)
|
|
|
|
# Three terms with implicit AND
|
|
result = parse_search_query("python machine learning")
|
|
expected = _and(_and(_term("python"), _term("machine")), _term("learning"))
|
|
self.assertEqual(result, expected)
|
|
|
|
def test_implicit_and_with_tags(self):
|
|
# Implicit AND between term and tag
|
|
result = parse_search_query("tutorial #python")
|
|
expected = _and(_term("tutorial"), _tag("python"))
|
|
self.assertEqual(result, expected)
|
|
|
|
# Implicit AND between tag and term
|
|
result = parse_search_query("#javascript tutorial")
|
|
expected = _and(_tag("javascript"), _term("tutorial"))
|
|
self.assertEqual(result, expected)
|
|
|
|
# Multiple tags with implicit AND
|
|
result = parse_search_query("#python #django #tutorial")
|
|
expected = _and(_and(_tag("python"), _tag("django")), _tag("tutorial"))
|
|
self.assertEqual(result, expected)
|
|
|
|
def test_implicit_and_with_quoted_strings(self):
|
|
# Implicit AND with quoted strings
|
|
result = parse_search_query('"machine learning" tutorial')
|
|
expected = _and(_term("machine learning"), _term("tutorial"))
|
|
self.assertEqual(result, expected)
|
|
|
|
# Mixed types with implicit AND
|
|
result = parse_search_query('"deep learning" #python tutorial')
|
|
expected = _and(_and(_term("deep learning"), _tag("python")), _term("tutorial"))
|
|
self.assertEqual(result, expected)
|
|
|
|
def test_implicit_and_with_explicit_operators(self):
|
|
# Mixed implicit and explicit AND
|
|
result = parse_search_query("python tutorial and django")
|
|
expected = _and(_and(_term("python"), _term("tutorial")), _term("django"))
|
|
self.assertEqual(result, expected)
|
|
|
|
# Implicit AND with OR
|
|
result = parse_search_query("python tutorial or java guide")
|
|
expected = _or(
|
|
_and(_term("python"), _term("tutorial")),
|
|
_and(_term("java"), _term("guide")),
|
|
)
|
|
self.assertEqual(result, expected)
|
|
|
|
def test_implicit_and_with_not(self):
|
|
# NOT with implicit AND
|
|
result = parse_search_query("not deprecated tutorial")
|
|
expected = _and(_not(_term("deprecated")), _term("tutorial"))
|
|
self.assertEqual(result, expected)
|
|
|
|
# Implicit AND with NOT at end
|
|
result = parse_search_query("python tutorial not deprecated")
|
|
expected = _and(
|
|
_and(_term("python"), _term("tutorial")), _not(_term("deprecated"))
|
|
)
|
|
self.assertEqual(result, expected)
|
|
|
|
def test_implicit_and_with_parentheses(self):
|
|
# Parentheses with implicit AND
|
|
result = parse_search_query("(python tutorial) or java")
|
|
expected = _or(_and(_term("python"), _term("tutorial")), _term("java"))
|
|
self.assertEqual(result, expected)
|
|
|
|
# Complex parentheses with implicit AND
|
|
result = parse_search_query(
|
|
"(machine learning #python) and (web development #javascript)"
|
|
)
|
|
expected = _and(
|
|
_and(_and(_term("machine"), _term("learning")), _tag("python")),
|
|
_and(_and(_term("web"), _term("development")), _tag("javascript")),
|
|
)
|
|
self.assertEqual(result, expected)
|
|
|
|
def test_complex_precedence_with_implicit_and(self):
|
|
result = parse_search_query("python tutorial or javascript guide")
|
|
expected = _or(
|
|
_and(_term("python"), _term("tutorial")),
|
|
_and(_term("javascript"), _term("guide")),
|
|
)
|
|
self.assertEqual(result, expected)
|
|
|
|
result = parse_search_query(
|
|
"machine learning and (python or r) tutorial #beginner"
|
|
)
|
|
expected = _and(
|
|
_and(
|
|
_and(
|
|
_and(_term("machine"), _term("learning")),
|
|
_or(_term("python"), _term("r")),
|
|
),
|
|
_term("tutorial"),
|
|
),
|
|
_tag("beginner"),
|
|
)
|
|
self.assertEqual(result, expected)
|
|
|
|
def test_operator_words_as_substrings(self):
|
|
# Terms that contain operator words as substrings should be treated as terms
|
|
result = parse_search_query("android and notification")
|
|
expected = _and(_term("android"), _term("notification"))
|
|
self.assertEqual(result, expected)
|
|
|
|
def test_complex_queries(self):
|
|
test_cases = [
|
|
(
|
|
"(programming or software) and not client-side and (javascript or python)",
|
|
_and(
|
|
_and(
|
|
_or(_term("programming"), _term("software")),
|
|
_not(_term("client-side")),
|
|
),
|
|
_or(_term("javascript"), _term("python")),
|
|
),
|
|
),
|
|
(
|
|
"(machine-learning or ai) and python and not deprecated",
|
|
_and(
|
|
_and(
|
|
_or(_term("machine-learning"), _term("ai")),
|
|
_term("python"),
|
|
),
|
|
_not(_term("deprecated")),
|
|
),
|
|
),
|
|
(
|
|
"frontend and (react or vue or angular) and not jquery",
|
|
_and(
|
|
_and(
|
|
_term("frontend"),
|
|
_or(
|
|
_or(_term("react"), _term("vue")),
|
|
_term("angular"),
|
|
),
|
|
),
|
|
_not(_term("jquery")),
|
|
),
|
|
),
|
|
(
|
|
'"machine learning" and (python or r) and not "deep learning"',
|
|
_and(
|
|
_and(
|
|
_term("machine learning"),
|
|
_or(_term("python"), _term("r")),
|
|
),
|
|
_not(_term("deep learning")),
|
|
),
|
|
),
|
|
(
|
|
"(#python or #javascript) and tutorial and not #deprecated",
|
|
_and(
|
|
_and(
|
|
_or(_tag("python"), _tag("javascript")),
|
|
_term("tutorial"),
|
|
),
|
|
_not(_tag("deprecated")),
|
|
),
|
|
),
|
|
(
|
|
"machine learning tutorial #python beginner",
|
|
_and(
|
|
_and(
|
|
_and(
|
|
_and(_term("machine"), _term("learning")), _term("tutorial")
|
|
),
|
|
_tag("python"),
|
|
),
|
|
_term("beginner"),
|
|
),
|
|
),
|
|
]
|
|
|
|
for query, expected_ast in test_cases:
|
|
with self.subTest(query=query):
|
|
result = parse_search_query(query)
|
|
self.assertEqual(result, expected_ast, f"Failed for query: {query}")
|
|
|
|
|
|
class SearchQueryParserErrorTest(TestCase):
|
|
def test_unmatched_left_parenthesis(self):
|
|
with self.assertRaises(SearchQueryParseError) as cm:
|
|
parse_search_query("(programming and books")
|
|
self.assertIn("Expected RPAREN", str(cm.exception))
|
|
|
|
def test_unmatched_right_parenthesis(self):
|
|
with self.assertRaises(SearchQueryParseError) as cm:
|
|
parse_search_query("programming and books)")
|
|
self.assertIn("Unexpected token", str(cm.exception))
|
|
|
|
def test_empty_parentheses(self):
|
|
with self.assertRaises(SearchQueryParseError) as cm:
|
|
parse_search_query("()")
|
|
self.assertIn("Unexpected token RPAREN", str(cm.exception))
|
|
|
|
def test_operator_without_operand(self):
|
|
with self.assertRaises(SearchQueryParseError) as cm:
|
|
parse_search_query("and")
|
|
self.assertIn("Unexpected token AND", str(cm.exception))
|
|
|
|
def test_trailing_operator(self):
|
|
with self.assertRaises(SearchQueryParseError) as cm:
|
|
parse_search_query("programming and")
|
|
self.assertIn("Unexpected token EOF", str(cm.exception))
|
|
|
|
def test_consecutive_operators(self):
|
|
with self.assertRaises(SearchQueryParseError) as cm:
|
|
parse_search_query("programming and or books")
|
|
self.assertIn("Unexpected token OR", str(cm.exception))
|
|
|
|
def test_not_without_operand(self):
|
|
with self.assertRaises(SearchQueryParseError) as cm:
|
|
parse_search_query("not")
|
|
self.assertIn("Unexpected token EOF", str(cm.exception))
|
|
|
|
|
|
class ExpressionToStringTest(TestCase):
|
|
def test_simple_term(self):
|
|
expr = _term("python")
|
|
self.assertEqual(expression_to_string(expr), "python")
|
|
|
|
def test_simple_tag(self):
|
|
expr = _tag("python")
|
|
self.assertEqual(expression_to_string(expr), "#python")
|
|
|
|
def test_simple_keyword(self):
|
|
expr = _keyword("unread")
|
|
self.assertEqual(expression_to_string(expr), "!unread")
|
|
|
|
def test_term_with_spaces(self):
|
|
expr = _term("machine learning")
|
|
self.assertEqual(expression_to_string(expr), '"machine learning"')
|
|
|
|
def test_term_with_quotes(self):
|
|
expr = _term('say "hello"')
|
|
self.assertEqual(expression_to_string(expr), '"say \\"hello\\""')
|
|
|
|
def test_and_expression_implicit(self):
|
|
expr = _and(_term("python"), _term("tutorial"))
|
|
self.assertEqual(expression_to_string(expr), "python tutorial")
|
|
|
|
def test_and_expression_with_tags(self):
|
|
expr = _and(_tag("python"), _tag("django"))
|
|
self.assertEqual(expression_to_string(expr), "#python #django")
|
|
|
|
def test_and_expression_complex(self):
|
|
expr = _and(_or(_term("python"), _term("ruby")), _term("tutorial"))
|
|
self.assertEqual(expression_to_string(expr), "(python or ruby) tutorial")
|
|
|
|
def test_or_expression(self):
|
|
expr = _or(_term("python"), _term("ruby"))
|
|
self.assertEqual(expression_to_string(expr), "python or ruby")
|
|
|
|
def test_or_expression_with_and(self):
|
|
expr = _or(_and(_term("python"), _term("tutorial")), _term("ruby"))
|
|
self.assertEqual(expression_to_string(expr), "python tutorial or ruby")
|
|
|
|
def test_not_expression(self):
|
|
expr = _not(_term("deprecated"))
|
|
self.assertEqual(expression_to_string(expr), "not deprecated")
|
|
|
|
def test_not_with_tag(self):
|
|
expr = _not(_tag("deprecated"))
|
|
self.assertEqual(expression_to_string(expr), "not #deprecated")
|
|
|
|
def test_not_with_and(self):
|
|
expr = _not(_and(_term("python"), _term("deprecated")))
|
|
self.assertEqual(expression_to_string(expr), "not (python deprecated)")
|
|
|
|
def test_complex_nested_expression(self):
|
|
expr = _and(
|
|
_or(_term("python"), _term("ruby")),
|
|
_or(_term("tutorial"), _term("guide")),
|
|
)
|
|
result = expression_to_string(expr)
|
|
self.assertEqual(result, "(python or ruby) (tutorial or guide)")
|
|
|
|
def test_implicit_and_chain(self):
|
|
expr = _and(_and(_term("machine"), _term("learning")), _term("tutorial"))
|
|
self.assertEqual(expression_to_string(expr), "machine learning tutorial")
|
|
|
|
def test_none_expression(self):
|
|
self.assertEqual(expression_to_string(None), "")
|
|
|
|
def test_round_trip(self):
|
|
test_cases = [
|
|
"#python",
|
|
"python tutorial",
|
|
"#python #django",
|
|
"python or ruby",
|
|
"not deprecated",
|
|
"(python or ruby) and tutorial",
|
|
"tutorial and (python or ruby)",
|
|
"(python or ruby) tutorial",
|
|
"tutorial (python or ruby)",
|
|
]
|
|
|
|
for query in test_cases:
|
|
with self.subTest(query=query):
|
|
ast = parse_search_query(query)
|
|
result = expression_to_string(ast)
|
|
ast2 = parse_search_query(result)
|
|
self.assertEqual(ast, ast2)
|
|
|
|
|
|
class StripTagFromQueryTest(TestCase):
|
|
def test_single_tag(self):
|
|
result = strip_tag_from_query("#books", "books")
|
|
self.assertEqual(result, "")
|
|
|
|
def test_tag_with_and(self):
|
|
result = strip_tag_from_query("#history and #books", "books")
|
|
self.assertEqual(result, "#history")
|
|
|
|
def test_tag_with_and_not(self):
|
|
result = strip_tag_from_query("#history and not #books", "books")
|
|
self.assertEqual(result, "#history")
|
|
|
|
def test_implicit_and_with_term_and_tags(self):
|
|
result = strip_tag_from_query("roman #history #books", "books")
|
|
self.assertEqual(result, "roman #history")
|
|
|
|
def test_tag_in_or_expression(self):
|
|
result = strip_tag_from_query("roman and (#history or #books)", "books")
|
|
self.assertEqual(result, "roman #history")
|
|
|
|
def test_complex_or_with_and(self):
|
|
result = strip_tag_from_query(
|
|
"(roman and #books) or (greek and #books)", "books"
|
|
)
|
|
self.assertEqual(result, "roman or greek")
|
|
|
|
def test_case_insensitive(self):
|
|
result = strip_tag_from_query("#Books and #History", "books")
|
|
self.assertEqual(result, "#History")
|
|
|
|
def test_tag_not_present(self):
|
|
result = strip_tag_from_query("#history and #science", "books")
|
|
self.assertEqual(result, "#history #science")
|
|
|
|
def test_multiple_same_tags(self):
|
|
result = strip_tag_from_query("#books or #books", "books")
|
|
self.assertEqual(result, "")
|
|
|
|
def test_nested_parentheses(self):
|
|
result = strip_tag_from_query("((#books and tutorial) or guide)", "books")
|
|
self.assertEqual(result, "tutorial or guide")
|
|
|
|
def test_not_expression_with_tag(self):
|
|
result = strip_tag_from_query("tutorial and not #books", "books")
|
|
self.assertEqual(result, "tutorial")
|
|
|
|
def test_only_not_tag(self):
|
|
result = strip_tag_from_query("not #books", "books")
|
|
self.assertEqual(result, "")
|
|
|
|
def test_complex_query(self):
|
|
result = strip_tag_from_query(
|
|
"(#python or #ruby) and tutorial and not #books", "books"
|
|
)
|
|
self.assertEqual(result, "(#python or #ruby) tutorial")
|
|
|
|
def test_empty_query(self):
|
|
result = strip_tag_from_query("", "books")
|
|
self.assertEqual(result, "")
|
|
|
|
def test_whitespace_only(self):
|
|
result = strip_tag_from_query(" ", "books")
|
|
self.assertEqual(result, "")
|
|
|
|
def test_special_keywords_preserved(self):
|
|
result = strip_tag_from_query("!unread and #books", "books")
|
|
self.assertEqual(result, "!unread")
|
|
|
|
def test_quoted_terms_preserved(self):
|
|
result = strip_tag_from_query('"machine learning" and #books', "books")
|
|
self.assertEqual(result, '"machine learning"')
|
|
|
|
def test_all_tags_in_and_chain(self):
|
|
result = strip_tag_from_query("#books and #books and #books", "books")
|
|
self.assertEqual(result, "")
|
|
|
|
def test_tag_similar_name(self):
|
|
# Should not remove #book when removing #books
|
|
result = strip_tag_from_query("#book and #books", "books")
|
|
self.assertEqual(result, "#book")
|
|
|
|
def test_invalid_query_returns_original(self):
|
|
# If query is malformed, should return original
|
|
result = strip_tag_from_query("(unclosed paren", "books")
|
|
self.assertEqual(result, "(unclosed paren")
|
|
|
|
def test_implicit_and_in_output(self):
|
|
result = strip_tag_from_query("python tutorial #books #django", "books")
|
|
self.assertEqual(result, "python tutorial #django")
|
|
|
|
def test_nested_or_simplify_parenthesis(self):
|
|
result = strip_tag_from_query(
|
|
"(#books or tutorial) and (#books or guide)", "books"
|
|
)
|
|
self.assertEqual(result, "tutorial guide")
|
|
|
|
def test_nested_or_preserve_parenthesis(self):
|
|
result = strip_tag_from_query(
|
|
"(#books or tutorial or guide) and (#books or help or lesson)", "books"
|
|
)
|
|
self.assertEqual(result, "(tutorial or guide) (help or lesson)")
|
|
|
|
def test_left_side_removed(self):
|
|
result = strip_tag_from_query("#books and python", "books")
|
|
self.assertEqual(result, "python")
|
|
|
|
def test_right_side_removed(self):
|
|
result = strip_tag_from_query("python and #books", "books")
|
|
self.assertEqual(result, "python")
|
|
|
|
|
|
class StripTagFromQueryLaxSearchTest(TestCase):
|
|
def setUp(self):
|
|
self.lax_profile = type(
|
|
"UserProfile", (), {"tag_search": UserProfile.TAG_SEARCH_LAX}
|
|
)()
|
|
self.strict_profile = type(
|
|
"UserProfile", (), {"tag_search": UserProfile.TAG_SEARCH_STRICT}
|
|
)()
|
|
|
|
def test_lax_search_removes_matching_term(self):
|
|
result = strip_tag_from_query("books", "books", self.lax_profile)
|
|
self.assertEqual(result, "")
|
|
|
|
def test_lax_search_removes_term_case_insensitive(self):
|
|
result = strip_tag_from_query("Books", "books", self.lax_profile)
|
|
self.assertEqual(result, "")
|
|
|
|
result = strip_tag_from_query("BOOKS", "books", self.lax_profile)
|
|
self.assertEqual(result, "")
|
|
|
|
def test_lax_search_multiple_terms(self):
|
|
result = strip_tag_from_query("books and history", "books", self.lax_profile)
|
|
self.assertEqual(result, "history")
|
|
|
|
def test_lax_search_preserves_non_matching_terms(self):
|
|
result = strip_tag_from_query("history and science", "books", self.lax_profile)
|
|
self.assertEqual(result, "history science")
|
|
|
|
def test_lax_search_removes_both_tag_and_term(self):
|
|
result = strip_tag_from_query("books #books", "books", self.lax_profile)
|
|
self.assertEqual(result, "")
|
|
|
|
def test_lax_search_mixed_tag_and_term(self):
|
|
result = strip_tag_from_query(
|
|
"books and #history and #books", "books", self.lax_profile
|
|
)
|
|
self.assertEqual(result, "#history")
|
|
|
|
def test_lax_search_term_in_or_expression(self):
|
|
result = strip_tag_from_query(
|
|
"(books or history) and guide", "books", self.lax_profile
|
|
)
|
|
self.assertEqual(result, "history guide")
|
|
|
|
def test_lax_search_term_in_not_expression(self):
|
|
result = strip_tag_from_query(
|
|
"history and not books", "books", self.lax_profile
|
|
)
|
|
self.assertEqual(result, "history")
|
|
|
|
def test_lax_search_only_not_term(self):
|
|
result = strip_tag_from_query("not books", "books", self.lax_profile)
|
|
self.assertEqual(result, "")
|
|
|
|
def test_lax_search_complex_query(self):
|
|
result = strip_tag_from_query(
|
|
"(books or #books) and (history or guide)", "books", self.lax_profile
|
|
)
|
|
self.assertEqual(result, "history or guide")
|
|
|
|
def test_lax_search_quoted_term_with_same_name(self):
|
|
result = strip_tag_from_query('"books" and history', "books", self.lax_profile)
|
|
self.assertEqual(result, "history")
|
|
|
|
def test_lax_search_partial_match_not_removed(self):
|
|
result = strip_tag_from_query("bookshelf", "books", self.lax_profile)
|
|
self.assertEqual(result, "bookshelf")
|
|
|
|
def test_lax_search_multiple_occurrences(self):
|
|
result = strip_tag_from_query(
|
|
"books or books or history", "books", self.lax_profile
|
|
)
|
|
self.assertEqual(result, "history")
|
|
|
|
def test_lax_search_nested_expressions(self):
|
|
result = strip_tag_from_query(
|
|
"((books and tutorial) or guide) and history", "books", self.lax_profile
|
|
)
|
|
self.assertEqual(result, "(tutorial or guide) history")
|
|
|
|
def test_strict_search_preserves_terms(self):
|
|
result = strip_tag_from_query("books", "books", self.strict_profile)
|
|
self.assertEqual(result, "books")
|
|
|
|
def test_strict_search_preserves_terms_with_tags(self):
|
|
result = strip_tag_from_query("books #books", "books", self.strict_profile)
|
|
self.assertEqual(result, "books")
|
|
|
|
def test_no_profile_defaults_to_strict(self):
|
|
result = strip_tag_from_query("books #books", "books", None)
|
|
self.assertEqual(result, "books")
|
|
|
|
|
|
class ExtractTagNamesFromQueryTest(TestCase):
|
|
def test_empty_query(self):
|
|
result = extract_tag_names_from_query("")
|
|
self.assertEqual(result, [])
|
|
|
|
def test_whitespace_query(self):
|
|
result = extract_tag_names_from_query(" ")
|
|
self.assertEqual(result, [])
|
|
|
|
def test_single_tag(self):
|
|
result = extract_tag_names_from_query("#python")
|
|
self.assertEqual(result, ["python"])
|
|
|
|
def test_multiple_tags(self):
|
|
result = extract_tag_names_from_query("#python and #django")
|
|
self.assertEqual(result, ["django", "python"])
|
|
|
|
def test_tags_with_or(self):
|
|
result = extract_tag_names_from_query("#python or #ruby")
|
|
self.assertEqual(result, ["python", "ruby"])
|
|
|
|
def test_tags_with_not(self):
|
|
result = extract_tag_names_from_query("not #deprecated")
|
|
self.assertEqual(result, ["deprecated"])
|
|
|
|
def test_tags_in_complex_query(self):
|
|
result = extract_tag_names_from_query(
|
|
"(#python or #ruby) and #tutorial and not #deprecated"
|
|
)
|
|
self.assertEqual(result, ["deprecated", "python", "ruby", "tutorial"])
|
|
|
|
def test_duplicate_tags(self):
|
|
result = extract_tag_names_from_query("#python and #python")
|
|
self.assertEqual(result, ["python"])
|
|
|
|
def test_case_insensitive_deduplication(self):
|
|
result = extract_tag_names_from_query("#Python and #PYTHON and #python")
|
|
self.assertEqual(result, ["python"])
|
|
|
|
def test_mixed_tags_and_terms(self):
|
|
result = extract_tag_names_from_query("tutorial #python guide #django")
|
|
self.assertEqual(result, ["django", "python"])
|
|
|
|
def test_only_terms_no_tags(self):
|
|
result = extract_tag_names_from_query("tutorial guide")
|
|
self.assertEqual(result, [])
|
|
|
|
def test_special_keywords_not_extracted(self):
|
|
result = extract_tag_names_from_query("!unread and #python")
|
|
self.assertEqual(result, ["python"])
|
|
|
|
def test_tags_in_nested_parentheses(self):
|
|
result = extract_tag_names_from_query("((#python and #django) or #ruby)")
|
|
self.assertEqual(result, ["django", "python", "ruby"])
|
|
|
|
def test_invalid_query_returns_empty(self):
|
|
result = extract_tag_names_from_query("(unclosed paren")
|
|
self.assertEqual(result, [])
|
|
|
|
def test_tags_with_hyphens(self):
|
|
result = extract_tag_names_from_query("#machine-learning and #deep-learning")
|
|
self.assertEqual(result, ["deep-learning", "machine-learning"])
|
|
|
|
|
|
class ExtractTagNamesFromQueryLaxSearchTest(TestCase):
|
|
def setUp(self):
|
|
self.lax_profile = type(
|
|
"UserProfile", (), {"tag_search": UserProfile.TAG_SEARCH_LAX}
|
|
)()
|
|
self.strict_profile = type(
|
|
"UserProfile", (), {"tag_search": UserProfile.TAG_SEARCH_STRICT}
|
|
)()
|
|
|
|
def test_lax_search_extracts_terms(self):
|
|
result = extract_tag_names_from_query("python and django", self.lax_profile)
|
|
self.assertEqual(result, ["django", "python"])
|
|
|
|
def test_lax_search_mixed_tags_and_terms(self):
|
|
result = extract_tag_names_from_query(
|
|
"tutorial #python guide #django", self.lax_profile
|
|
)
|
|
self.assertEqual(result, ["django", "guide", "python", "tutorial"])
|
|
|
|
def test_lax_search_deduplicates_tags_and_terms(self):
|
|
result = extract_tag_names_from_query("python #python", self.lax_profile)
|
|
self.assertEqual(result, ["python"])
|
|
|
|
def test_lax_search_case_insensitive_dedup(self):
|
|
result = extract_tag_names_from_query("Python #python PYTHON", self.lax_profile)
|
|
self.assertEqual(result, ["python"])
|
|
|
|
def test_lax_search_terms_in_or_expression(self):
|
|
result = extract_tag_names_from_query(
|
|
"(python or ruby) and tutorial", self.lax_profile
|
|
)
|
|
self.assertEqual(result, ["python", "ruby", "tutorial"])
|
|
|
|
def test_lax_search_terms_in_not_expression(self):
|
|
result = extract_tag_names_from_query(
|
|
"tutorial and not deprecated", self.lax_profile
|
|
)
|
|
self.assertEqual(result, ["deprecated", "tutorial"])
|
|
|
|
def test_lax_search_quoted_terms(self):
|
|
result = extract_tag_names_from_query(
|
|
'"machine learning" and #python', self.lax_profile
|
|
)
|
|
self.assertEqual(result, ["machine learning", "python"])
|
|
|
|
def test_lax_search_complex_query(self):
|
|
result = extract_tag_names_from_query(
|
|
"(python or #ruby) and tutorial and not #deprecated", self.lax_profile
|
|
)
|
|
self.assertEqual(result, ["deprecated", "python", "ruby", "tutorial"])
|
|
|
|
def test_lax_search_special_keywords_not_extracted(self):
|
|
result = extract_tag_names_from_query(
|
|
"!unread and python and #django", self.lax_profile
|
|
)
|
|
self.assertEqual(result, ["django", "python"])
|
|
|
|
def test_strict_search_ignores_terms(self):
|
|
result = extract_tag_names_from_query("python and django", self.strict_profile)
|
|
self.assertEqual(result, [])
|
|
|
|
def test_strict_search_only_tags(self):
|
|
result = extract_tag_names_from_query(
|
|
"tutorial #python guide #django", self.strict_profile
|
|
)
|
|
self.assertEqual(result, ["django", "python"])
|
|
|
|
def test_no_profile_defaults_to_strict(self):
|
|
result = extract_tag_names_from_query("python #django", None)
|
|
self.assertEqual(result, ["django"])
|