Files
danbooru/config/initializers/core_extensions.rb
evazion d9dc84325f Fix #5365: Don't allow whitespace-only text submission.
Fix bug where it was possible to submit blank text in various text fields.

Caused by `String#blank?` not considering certain Unicode characters as blank. `blank?` is defined
as `match?(/\A[[:space:]]*\z/)`, where `[[:space:]]` matches ASCII spaces (space, tab, newline, etc)
and Unicode characters in the Space category ([1]). However, there are other space-like characters
not in the Space category. This includes U+200B (Zero-Width Space), and many more.

It turns out the "Default ignorable code points" [2][3] are what we're after. These are the set of 400
or so formatting and control characters that are invisible when displayed.

Note that there are other control characters that aren't invisible when rendered, instead they're
shown with a placeholder glyph. These include the ASCII C0 and C1 control codes [4], certain Unicode
control characters [5], and unassigned, reserved, and private use codepoints.

There is one outlier: the Braille pattern blank (U+2800) [6]. This character is visually blank, but is
not considered to be a space or an ignorable code point.

[1]: https://codepoints.net/search?gc[]=Z
[2]: https://codepoints.net/search?DI=1
[3]: https://www.unicode.org/review/pr-5.html
[4]: https://codepoints.net/search?gc[]=Cc
[5]: https://codepoints.net/search?gc[]=Cf
[6]: https://codepoints.net/U+2800
[7]: https://en.wikipedia.org/wiki/Whitespace_character
[8]: https://character.construction/blanks
[9]: https://invisible-characters.com
2022-12-05 01:58:34 -06:00

125 lines
3.6 KiB
Ruby

# frozen_string_literal: true
require "danbooru"
module Danbooru
module Extensions
module String
# https://invisible-characters.com
# https://character.construction/blanks
# https://www.unicode.org/review/pr-5.html (5.22 Default Ignorable Code Points)
# https://en.wikipedia.org/wiki/Whitespace_character
#
# [[:space:]] = https://codepoints.net/search?gc[]=Z (Space_Separator | Line_Separator | Paragraph_Separator | U+0009 | U+000A | U+000B | U+000C | U+000D | U+0085)
# \p{di} = https://codepoints.net/search?DI=1 (Default_Ignorable_Code_Point)
# \u2800 = https://codepoints.net/U+2800 (BRAILLE PATTERN BLANK)
INVISIBLE_REGEX = /\A[[:space:]\p{di}\u2800]*\z/
# Returns true if the string consists entirely of invisible characters. Like `#blank?`, but includes control
# characters and certain other invisible Unicode characters that aren't classified as spaces.
def invisible?
match?(INVISIBLE_REGEX)
end
def to_escaped_for_sql_like
string = self.gsub(/%|_|\*|\\\*|\\\\|\\/) do |str|
case str
when '%' then '\%'
when '_' then '\_'
when '*' then '%'
when '\*' then '*'
when '\\\\' then '\\\\'
when '\\' then '\\\\'
end
end
string
end
# escape \ and * characters so that they're treated literally in LIKE searches.
def escape_wildcards
gsub(/\\/, '\\\\').gsub(/\*/, '\*')
end
def to_escaped_for_tsquery_split
scan(/\S+/).map {|x| x.to_escaped_for_tsquery}.join(" & ")
end
def to_escaped_for_tsquery
"'#{gsub(/\0/, '').gsub(/'/, '\0\0').gsub(/\\/, '\0\0\0\0')}'"
end
def truthy?
self.match?(/\A(true|t|yes|y|on|1)\z/i)
end
def falsy?
self.match?(/\A(false|f|no|n|off|0)\z/i)
end
def ilike?(pattern)
pattern = Regexp.escape(pattern).gsub(/\\\*/, ".*")
match?(/\A#{pattern}\z/i)
end
def normalize_whitespace
# Normalize various horizontal space characters to ASCII space.
text = gsub(/\p{Zs}|\t/, " ")
# Strip various zero width space characters. Zero width joiner (200D)
# is allowed because it's used in emoji.
text = text.gsub(/[\u180E\u200B\u200C\u2060\uFEFF]/, "")
# Normalize various line ending characters to CRLF.
text = text.gsub(/\r?\n|\r|\v|\f|\u0085|\u2028|\u2029/, "\r\n")
text
end
# @return [Boolean] True if the string contains only balanced parentheses; false if the string contains unbalanced parentheses.
def has_balanced_parens?(open = "(", close = ")")
parens = 0
chars.each do |char|
if char == open
parens += 1
elsif char == close
parens -= 1
return false if parens < 0
end
end
parens == 0
end
end
end
end
class String
include Danbooru::Extensions::String
end
module MimeNegotationExtension
# Ignore all file extensions except for .html, .js, .json, and .xml when
# parsing the file extension from the URL. Needed for wiki pages (e.g.
# /wiki_pages/rnd.jpg).
private def format_from_path_extension
mime = super
if mime&.symbol.in?(%i[html js json xml])
mime
else
nil
end
end
end
ActionDispatch::Http::MimeNegotiation.prepend(MimeNegotationExtension)
# Make Symbol#to_s return a frozen string. This reduces allocations, but may be
# incompatible with some libraries.
#
# https://bugs.ruby-lang.org/issues/16150
# https://github.com/Shopify/symbol-fstring
Symbol.alias_method(:to_s, :name)