Files
danbooru/app/logical/d_text.rb
evazion 654d2175b6 aliases/implications: remove processing state.
Remove the `processing` state from aliases and implications. This state
was used to mark when an alias or implication had been approved but the
alias or implication was still being processed. Aliases in the
processing state were still considered active, so there was no
functional difference between the active state and the processing state.

This fixes a problem where it was possible for implications to get stuck
in the processing state. This happened when a BUR contained a duplicate
implication. Transitioning from the processing state to the active state
failed in this case because we used `update` instead of `update!`, which
meant validation errors were silently ignored.
2020-11-12 18:22:51 -06:00

329 lines
11 KiB
Ruby

require 'cgi'
require 'uri'
class DText
MENTION_REGEXP = /(?<=^| )@\S+/
def self.format_text(text, data: nil, **options)
return nil if text.nil?
data = preprocess([text]) if data.nil?
text = parse_embedded_tag_request(text)
html = DTextRagel.parse(text, **options)
html = postprocess(html, *data)
html
rescue DTextRagel::Error
""
end
def self.preprocess(dtext_messages)
dtext_messages = dtext_messages.map { |message| parse_embedded_tag_request(message) }
names = dtext_messages.map { |message| parse_wiki_titles(message) }.flatten.uniq
wiki_pages = WikiPage.where(title: names)
tags = Tag.where(name: names)
artists = Artist.where(name: names)
[wiki_pages, tags, artists]
end
def self.postprocess(html, wiki_pages, tags, artists)
fragment = Nokogiri::HTML.fragment(html)
fragment.css("a.dtext-wiki-link").each do |node|
path = Addressable::URI.parse(node["href"]).path
name = path[%r!\A/wiki_pages/(.*)\z!i, 1]
name = CGI.unescape(name)
name = WikiPage.normalize_title(name)
wiki = wiki_pages.find { |wiki| wiki.title == name }
tag = tags.find { |tag| tag.name == name }
artist = artists.find { |artist| artist.name == name }
if tag.present? && tag.artist?
node["href"] = "/artists/show_or_new?name=#{CGI.escape(name)}"
if artist.blank?
node["class"] += " dtext-artist-does-not-exist"
node["title"] = "This artist page does not exist"
end
node["class"] += " tag-type-#{tag.category}"
else
if wiki.blank?
node["class"] += " dtext-wiki-does-not-exist"
node["title"] = "This wiki page does not exist"
end
if WikiPage.is_meta_wiki?(name)
# skip (meta wikis aren't expected to have a tag)
elsif tag.blank?
node["class"] += " dtext-tag-does-not-exist"
node["title"] = "This wiki page does not have a tag"
elsif tag.empty?
node["class"] += " dtext-tag-empty"
node["title"] = "This wiki page does not have a tag"
else
node["class"] += " tag-type-#{tag.category}"
end
end
end
fragment.to_s
end
def self.quote(message, creator_name)
stripped_body = DText.strip_blocks(message, "quote")
"[quote]\n#{creator_name} said:\n\n#{stripped_body}\n[/quote]\n\n"
end
def self.parse_embedded_tag_request(text)
text = parse_embedded_tag_request_type(text, TagAlias, /\[ta:(?<id>\d+)\]/m)
text = parse_embedded_tag_request_type(text, TagImplication, /\[ti:(?<id>\d+)\]/m)
text = parse_embedded_tag_request_type(text, BulkUpdateRequest, /\[bur:(?<id>\d+)\]/m)
text
end
def self.parse_embedded_tag_request_type(text, tag_request, pattern)
text.gsub(pattern) do |match|
obj = tag_request.find_by_id($~[:id])
tag_request_message(obj) || match
end
end
def self.tag_request_message(obj)
if obj.is_a?(TagRelationship)
if obj.is_active?
"The #{obj.relationship} ##{obj.id} [[#{obj.antecedent_name}]] -> [[#{obj.consequent_name}]] has been approved."
elsif obj.is_retired?
"The #{obj.relationship} ##{obj.id} [[#{obj.antecedent_name}]] -> [[#{obj.consequent_name}]] has been retired."
elsif obj.is_deleted?
"The #{obj.relationship} ##{obj.id} [[#{obj.antecedent_name}]] -> [[#{obj.consequent_name}]] has been rejected."
elsif obj.is_pending?
"The #{obj.relationship} ##{obj.id} [[#{obj.antecedent_name}]] -> [[#{obj.consequent_name}]] is pending approval."
elsif obj.is_errored?
"The #{obj.relationship} ##{obj.id} [[#{obj.antecedent_name}]] -> [[#{obj.consequent_name}]] (#{relationship} failed during processing."
else # should never happen
"The #{obj.relationship} ##{obj.id} [[#{obj.antecedent_name}]] -> [[#{obj.consequent_name}]] has an unknown status."
end
elsif obj.is_a?(BulkUpdateRequest)
if obj.script.size < 700
embedded_script = obj.processor.to_dtext
else
embedded_script = "[expand]#{obj.processor.to_dtext}[/expand]"
end
if obj.is_approved?
"The bulk update request ##{obj.id} is active.\n\n#{embedded_script}"
elsif obj.is_pending?
"The \"bulk update request ##{obj.id}\":/bulk_update_requests/#{obj.id} is pending approval.\n\n#{embedded_script}"
elsif obj.is_rejected?
"The bulk update request ##{obj.id} has been rejected.\n\n#{embedded_script}"
end
end
end
def self.parse_mentions(text)
text = strip_blocks(text.to_s, "quote")
names = text.scan(MENTION_REGEXP).map do |mention|
mention.gsub(/(?:^\s*@)|(?:[:;,.!?\)\]<>]$)/, "")
end
names.uniq
end
def self.parse_wiki_titles(text)
html = DTextRagel.parse(text)
fragment = Nokogiri::HTML.fragment(html)
titles = fragment.css("a.dtext-wiki-link").map do |node|
title = node["href"][%r{\A/wiki_pages/(.*)\z}i, 1]
title = CGI.unescape(title)
title = WikiPage.normalize_title(title)
title
end
titles.uniq
end
def self.parse_external_links(text)
html = DTextRagel.parse(text)
fragment = Nokogiri::HTML.fragment(html)
links = fragment.css("a.dtext-external-link").map { |node| node["href"] }
links.uniq
end
def self.dtext_links_differ?(a, b)
Set.new(parse_wiki_titles(a)) != Set.new(parse_wiki_titles(b)) ||
Set.new(parse_external_links(a)) != Set.new(parse_external_links(b))
end
# Rewrite wiki links to [[old_name]] with [[new_name]]. We attempt to match
# the capitalization of the old tag when rewriting it to the new tag, but if
# we can't determine how the new tag should be capitalized based on some
# simple heuristics, then we skip rewriting the tag.
def self.rewrite_wiki_links(dtext, old_name, new_name)
old_name = old_name.downcase.squeeze("_").tr("_", " ").strip
new_name = new_name.downcase.squeeze("_").tr("_", " ").strip
# Match `[[name]]` or `[[name|title]]`
dtext.gsub(/\[\[(.*?)(?:\|(.*?))?\]\]/) do |match|
name = $1
title = $2
# Skip this link if it isn't the tag we're trying to replace.
normalized_name = name.downcase.tr("_", " ").squeeze(" ").strip
next match if normalized_name != old_name
# Strip qualifiers, e.g. `atago (midsummer march) (azur lane)` => `atago`
unqualified_name = name.tr("_", " ").squeeze(" ").strip.gsub(/( \(.*\))+\z/, "")
# If old tag was lowercase, e.g. [[ink tank (Splatoon)]], then keep new tag in lowercase.
if unqualified_name == unqualified_name.downcase
final_name = new_name
# If old tag was capitalized, e.g. [[Colored pencil (medium)]], then capitialize new tag.
elsif unqualified_name == unqualified_name.downcase.capitalize
final_name = new_name.capitalize
# If old tag was in titlecase, e.g. [[Hatsune Miku (cosplay)]], then titlecase new tag.
elsif unqualified_name == unqualified_name.split.map(&:capitalize).join(" ")
final_name = new_name.split.map(&:capitalize).join(" ")
# If we can't determine how to capitalize the new tag, then keep the old tag.
# e.g. [[Suzumiya Haruhi no Yuuutsu]] -> [[The Melancholy of Haruhi Suzumiya]]
else
next match
end
if title.present?
"[[#{final_name}|#{title}]]"
# If the new name has a qualifier, then hide the qualifier in the link.
elsif final_name.match?(/( \(.*\))+\z/)
"[[#{final_name}|]]"
else
"[[#{final_name}]]"
end
end
end
def self.strip_blocks(string, tag)
n = 0
stripped = ""
string = string.dup
string.gsub!(/\s*\[#{tag}\](?!\])\s*/mi, "\n\n[#{tag}]\n\n")
string.gsub!(%r{\s*\[/#{tag}\]\s*}mi, "\n\n[/#{tag}]\n\n")
string.gsub!(/(?:\r?\n){3,}/, "\n\n")
string.strip!
string.split(/\n{2}/).each do |block|
case block
when "[#{tag}]"
n += 1
when "[/#{tag}]"
n -= 1
else
if n == 0
stripped << "#{block}\n\n"
end
end
end
stripped.strip
end
def self.strip_dtext(dtext)
html = DTextRagel.parse(dtext)
text = to_plaintext(html)
text
end
def self.to_plaintext(html)
text = from_html(html) do |node|
case node.name
when "a", "strong", "em", "u", "s", "h1", "h2", "h3", "h4", "h5", "h6"
node.name = "span"
node.content = node.text
when "blockquote"
node.name = "span"
node.content = to_plaintext(node.inner_html).gsub(/^/, "> ")
end
end
text.gsub(/\A[[:space:]]+|[[:space:]]+\z/, "")
end
def self.from_html(text, inline: false, &block)
html = Nokogiri::HTML.fragment(text)
dtext = html.children.map do |element|
block.call(element) if block.present?
case element.name
when "text"
element.content.gsub(/(?:\r|\n)+$/, "")
when "br"
"\n"
when "p", "ul", "ol"
from_html(element.inner_html, &block).strip + "\n\n"
when "blockquote"
"[quote]#{from_html(element.inner_html, &block).strip}[/quote]\n\n" if element.inner_html.present?
when "small", "sub"
"[tn]#{from_html(element.inner_html, &block)}[/tn]" if element.inner_html.present?
when "b", "strong"
"[b]#{from_html(element.inner_html, &block)}[/b]" if element.inner_html.present?
when "i", "em"
"[i]#{from_html(element.inner_html, &block)}[/i]" if element.inner_html.present?
when "u"
"[u]#{from_html(element.inner_html, &block)}[/u]" if element.inner_html.present?
when "s", "strike"
"[s]#{from_html(element.inner_html, &block)}[/s]" if element.inner_html.present?
when "li"
"* #{from_html(element.inner_html, &block)}\n" if element.inner_html.present?
when "h1", "h2", "h3", "h4", "h5", "h6"
hN = element.name
title = from_html(element.inner_html, &block)
"#{hN}. #{title}\n\n"
when "a"
title = from_html(element.inner_html, inline: true, &block).strip
url = element["href"]
if title.blank? || url.blank?
""
elsif title == url
"<#{url}>"
else
%("#{title}":[#{url}])
end
when "img"
alt_text = element.attributes["title"] || element.attributes["alt"] || ""
src = element["src"]
if inline
alt_text
elsif alt_text.present? && src.present?
%("#{alt_text}":[#{src}]\n\n)
else
""
end
when "comment"
# ignored
else
from_html(element.inner_html, &block)
end
end.join
dtext
end
# extract the first paragraph `needle` occurs in.
def self.extract_mention(dtext, needle)
dtext = dtext.gsub(/\r\n|\r|\n/, "\n")
excerpt = ActionController::Base.helpers.excerpt(dtext, needle, separator: "\n\n", radius: 1, omission: "")
excerpt
end
def self.excerpt(text, length: 160)
strip_dtext(text).split(/\r\n|\r|\n/).first.to_s.truncate(length)
end
end