search: fix parsing of quoted metatag values.

* Fix #4552: Multiple quoted search terms not parsed correctly.
* Allow quotes to be escaped in quoted metatags.
* Allow spaces to be escaped in unquoted metatags.
* Allow the empty string to be used in metatags.

Examples:

* `source:""` and `source:''` (same as `source:none`)
* `source:foo\ bar\ baz` (same as `source:"foo bar baz"`)
* `source:"don't say \"lazy\""` (use \" to write a literal ")
* `source:'don\'t say "lazy"'` (use \' to write a literal ')
* `source:"C:\\Windows"` (use \\ to write a literal \)
This commit is contained in:
evazion
2020-08-12 17:57:22 -05:00
parent dd8c3fad2c
commit d884cb6642
2 changed files with 74 additions and 24 deletions

View File

@@ -332,8 +332,9 @@ class PostQueryBuilder
end
def source_matches(source, quoted = false)
case source.downcase
in "none" unless quoted
if source.empty?
Post.where_like(:source, "")
elsif source.downcase == "none" && !quoted
Post.where_like(:source, "")
else
Post.where_ilike(:source, source + "*")
@@ -644,14 +645,7 @@ class PostQueryBuilder
if scanner.scan(/(-)?(#{METATAGS.join("|")}):/io)
operator = scanner.captures.first
metatag = scanner.captures.second.downcase
if scanner.scan(/"(.+)"/) || scanner.scan(/'(.+)'/)
value = scanner.captures.first
quoted = true
else
value = scanner.scan(/[^ ]*/)
quoted = false
end
value, quoted = scan_string(scanner)
if metatag.in?(COUNT_METATAG_SYNONYMS)
metatag = metatag.singularize + "_count"
@@ -675,23 +669,41 @@ class PostQueryBuilder
terms
end
def scan_string(scanner)
if scanner.scan(/"((?:\\"|[^"])*)"/)
value = scanner.captures.first.gsub(/\\(.)/) { $1 }
quoted = true
elsif scanner.scan(/'((?:\\'|[^'])*)'/)
value = scanner.captures.first.gsub(/\\(.)/) { $1 }
quoted = true
else
value = scanner.scan(/(\\ |[^ ])*/)
value = value.gsub(/\\ /) { " " }
quoted = false
end
[value, quoted]
end
def split_query
terms.map do |term|
if term.type == :metatag && !term.negated && !term.quoted
"#{term.name}:#{term.value}"
elsif term.type == :metatag && !term.negated && term.quoted
"#{term.name}:\"#{term.value}\""
elsif term.type == :metatag && term.negated && !term.quoted
"-#{term.name}:#{term.value}"
elsif term.type == :metatag && term.negated && term.quoted
"-#{term.name}:\"#{term.value}\""
elsif term.type == :tag && term.negated
"-#{term.name}"
elsif term.type == :tag && term.optional
"~#{term.name}"
elsif term.type == :tag
term.name
type, name, value = term.type, term.name, term.value
str = ""
str += "-" if term.negated
str += "~" if term.optional
if type == :tag
str += name
elsif type == :metatag && (term.quoted || value.include?(" "))
value = value.gsub(/\\/) { '\\\\' }
value = value.gsub(/"/) { '\\"' }
str += "#{name}:\"#{value}\""
elsif type == :metatag
str += "#{name}:#{value}"
end
str
end
end

View File

@@ -9,6 +9,13 @@ class PostQueryBuilderTest < ActiveSupport::TestCase
assert_equal(count, PostQueryBuilder.new(query, **query_options).normalized_query.fast_count(**fast_count_options))
end
def assert_parse_equals(expected, query)
assert_equal(expected, PostQueryBuilder.new(query).split_query)
# parsing, serializing, then parsing again should produce the same result.
assert_equal(PostQueryBuilder.new(query).to_s, PostQueryBuilder.new(PostQueryBuilder.new(query).to_s).to_s)
end
setup do
CurrentUser.user = create(:user)
CurrentUser.ip_addr = "127.0.0.1"
@@ -718,7 +725,11 @@ class PostQueryBuilderTest < ActiveSupport::TestCase
assert_tag_match([post3], "source:none")
assert_tag_match([post3], "source:NONE")
assert_tag_match([post3], 'source:""')
assert_tag_match([post3], "source:''")
assert_tag_match([post2, post1], "-source:none")
assert_tag_match([post2, post1], "-source:''")
assert_tag_match([post2, post1], '-source:""')
assert_tag_match([], "source:'none'")
assert_tag_match([], "source:none source:abcde")
@@ -1066,6 +1077,33 @@ class PostQueryBuilderTest < ActiveSupport::TestCase
assert_equal(false, PostQueryBuilder.new('source:"foo bar baz"').is_simple_tag?)
assert_equal(false, PostQueryBuilder.new("foo bar").is_simple_tag?)
end
should "parse quoted metatags correctly" do
assert_parse_equals(%w[status:"active" source:"https"], %q(status:'active' source:'https'))
assert_parse_equals(%w[source:"https" status:"active"], %q(source:'https' status:'active'))
assert_parse_equals(%w[status:"active" source:"https"], %q(status:"active" source:'https'))
assert_parse_equals(%w[status:"active" source:"https"], %q(status:'active' source:"https"))
assert_parse_equals(%w[status:"active" source:https], %q(status:'active' source:https))
assert_parse_equals(%w[status:active source:"https"], %q(status:active source:'https'))
assert_parse_equals(%w[limit:"5" status:"active" source:"x"], %q(limit:"5" status:"active" source:"x"))
assert_parse_equals(%w[source:"" limit:"1" status:"deleted"], %q(source:"" limit:'1' status:'deleted'))
assert_parse_equals(['source:"bar baz"', 'don\'t_say_"lazy"'], %q(source:"bar baz" don't_say_"lazy"))
assert_parse_equals(['source:"bar baz"', 'don\'t_say_"lazy"'], %q(source:"bar baz" don't_say_"lazy"))
assert_parse_equals(['source:"bar baz"', 'don\'t_say_"lazy"'], %q(source:'bar baz' don't_say_"lazy"))
assert_parse_equals([%q(source:"foo")], %q(source:"\f\o\o"))
assert_parse_equals([%q(source:"foo")], %q(source:'\f\o\o'))
assert_parse_equals([%q(source:foo\bar)], %q(source:foo\bar))
assert_parse_equals([%q(source:"foo)], %q(source:"foo))
assert_parse_equals([%q(source:'foo)], %q(source:'foo))
assert_parse_equals([%q(source:"foo bar")], %q(source:foo\ bar))
assert_parse_equals([%q(source:"\"foo bar\\\\")], %q(source:"foo\ bar\\))
assert_parse_equals(['source:"don\'t_say_\\"lazy\\""', 'don\'t_say_"lazy"'], %q(source:"don't_say_\"lazy\"" don't_say_"lazy"))
assert_parse_equals(['source:"don\'t_say_\\"lazy\\""', 'don\'t_say_"lazy"'], %q(source:'don\'t_say_"lazy"' don't_say_"lazy"))
end
end
context "The normalized_query method" do