Files
danbooru/app/logical/d_text.rb
evazion 4bb1bdbe10 dtext: handle [bur:<id>] tags in main parser.
Move the parsing for the [bur:<id>], [ta:<id>], [ti:<id>] pseudo tags to
the main parser in `DText.format_text`. This fixes a bug where wiki
links inside bulk update requests on the forum weren't properly
colorized because the text of the BUR was embedded after we scanned for
wiki links, not before.

This also ensures that tags inside bulk update requests will be recorded
in the dtext_links table, meaning that forum posts can be properly
searched by tags.

This incidentally means that these request pseudo tags can now be used
outside the forum.
2019-10-28 16:40:38 -05:00

260 lines
8.0 KiB
Ruby

require 'cgi'
require 'uri'
class DText
MENTION_REGEXP = /(?<=^| )@\S+/
def self.format_text(text, data: nil, **options)
return nil if text.nil?
data = preprocess([text]) if data.nil?
text = parse_embedded_tag_request_text(text)
html = DTextRagel.parse(text, **options)
html = postprocess(html, *data)
html
rescue DTextRagel::Error => e
""
end
def self.preprocess(dtext_messages)
dtext_messages = dtext_messages.map { |message| parse_embedded_tag_request_text(message) }
names = dtext_messages.map { |message| parse_wiki_titles(message) }.flatten.uniq
wiki_pages = WikiPage.where(title: names)
tags = Tag.where(name: names)
[wiki_pages, tags]
end
def self.postprocess(html, wiki_pages, tags)
fragment = Nokogiri::HTML.fragment(html)
fragment.css("a.dtext-wiki-link").each do |node|
name = node["href"][%r!\A/wiki_pages/show_or_new\?title=(.*)\z!i, 1]
name = CGI.unescape(name)
name = WikiPage.normalize_title(name)
wiki = wiki_pages.find { |wiki| wiki.title == name }
tag = tags.find { |tag| tag.name == name }
if wiki.blank?
node["class"] += " dtext-wiki-does-not-exist"
node["title"] = "This wiki page does not exist"
end
if WikiPage.is_meta_wiki?(name)
# skip (meta wikis aren't expected to have a tag)
elsif tag.blank?
node["class"] += " dtext-tag-does-not-exist"
node["title"] = "This wiki page does not have a tag"
elsif tag.post_count <= 0
node["class"] += " dtext-tag-empty"
node["title"] = "This wiki page does not have a tag"
else
node["class"] += " tag-type-#{tag.category}"
end
end
fragment.to_s
end
def self.quote(message, creator_name)
stripped_body = DText.strip_blocks(message, "quote")
"[quote]\n#{creator_name} said:\n\n#{stripped_body}\n[/quote]\n\n"
end
def self.parse_embedded_tag_request_text(text)
[TagAlias, TagImplication, BulkUpdateRequest].each do |tag_request|
text = text.gsub(tag_request.embedded_pattern) do |match|
begin
obj = tag_request.find($~[:id])
tag_request_message(obj) || match
rescue ActiveRecord::RecordNotFound
match
end
end
end
text
end
def self.tag_request_message(obj)
if obj.is_a?(TagRelationship)
if obj.is_approved?
"The #{obj.relationship} ##{obj.id} [[#{obj.antecedent_name}]] -> [[#{obj.consequent_name}]] has been approved."
elsif obj.is_retired?
"The #{obj.relationship} ##{obj.id} [[#{obj.antecedent_name}]] -> [[#{obj.consequent_name}]] has been retired."
elsif obj.is_deleted?
"The #{obj.relationship} ##{obj.id} [[#{obj.antecedent_name}]] -> [[#{obj.consequent_name}]] has been rejected."
elsif obj.is_pending?
"The #{obj.relationship} ##{obj.id} [[#{obj.antecedent_name}]] -> [[#{obj.consequent_name}]] is pending approval."
elsif obj.is_errored?
"The #{obj.relationship} ##{obj.id} [[#{obj.antecedent_name}]] -> [[#{obj.consequent_name}]] (#{relationship} failed during processing."
else # should never happen
"The #{obj.relationship} ##{obj.id} [[#{obj.antecedent_name}]] -> [[#{obj.consequent_name}]] has an unknown status."
end
elsif obj.is_a?(BulkUpdateRequest)
if obj.script.size < 700
embedded_script = obj.script_with_links
else
embedded_script = "[expand]#{obj.script_with_links}[/expand]"
end
if obj.is_approved?
"The bulk update request ##{obj.id} is active.\n\n#{embedded_script}"
elsif obj.is_pending?
"The \"bulk update request ##{obj.id}\":/bulk_update_requests/#{obj.id} is pending approval.\n\n#{embedded_script}"
elsif obj.is_rejected?
"The bulk update request ##{obj.id} has been rejected.\n\n#{embedded_script}"
end
end
end
def self.parse_mentions(text)
text = strip_blocks(text.to_s, "quote")
names = text.scan(MENTION_REGEXP).map do |mention|
mention.gsub(/(?:^\s*@)|(?:[:;,.!?\)\]<>]$)/, "")
end
names.uniq
end
def self.parse_wiki_titles(text)
html = DTextRagel.parse(text)
fragment = Nokogiri::HTML.fragment(html)
titles = fragment.css("a.dtext-wiki-link").map do |node|
title = node["href"][%r!\A/wiki_pages/show_or_new\?title=(.*)\z!i, 1]
title = CGI.unescape(title)
title = WikiPage.normalize_title(title)
title
end
titles.uniq
end
def self.parse_external_links(text)
html = DTextRagel.parse(text)
fragment = Nokogiri::HTML.fragment(html)
links = fragment.css("a.dtext-external-link").map { |node| node["href"] }
links.uniq
end
def self.dtext_links_differ?(a, b)
Set.new(parse_wiki_titles(a)) != Set.new(parse_wiki_titles(b)) ||
Set.new(parse_external_links(a)) != Set.new(parse_external_links(b))
end
def self.strip_blocks(string, tag)
n = 0
stripped = ""
string = string.dup
string.gsub!(/\s*\[#{tag}\](?!\])\s*/mi, "\n\n[#{tag}]\n\n")
string.gsub!(/\s*\[\/#{tag}\]\s*/mi, "\n\n[/#{tag}]\n\n")
string.gsub!(/(?:\r?\n){3,}/, "\n\n")
string.strip!
string.split(/\n{2}/).each do |block|
case block
when "[#{tag}]"
n += 1
when "[/#{tag}]"
n -= 1
else
if n == 0
stripped << "#{block}\n\n"
end
end
end
stripped.strip
end
def self.strip_dtext(dtext)
html = DTextRagel.parse(dtext)
text = to_plaintext(html)
text
end
def self.to_plaintext(html)
text = from_html(html) do |node|
case node.name
when "a", "strong", "em", "u", "s", "h1", "h2", "h3", "h4", "h5", "h6"
node.name = "span"
node.content = node.text
when "blockquote"
node.name = "span"
node.content = to_plaintext(node.inner_html).gsub(/^/, "> ")
end
end
text = text.gsub(/\A[[:space:]]+|[[:space:]]+\z/, "")
end
def self.from_html(text, inline: false, &block)
html = Nokogiri::HTML.fragment(text)
dtext = html.children.map do |element|
block.call(element) if block.present?
case element.name
when "text"
element.content.gsub(/(?:\r|\n)+$/, "")
when "br"
"\n"
when "p", "ul", "ol"
from_html(element.inner_html, &block).strip + "\n\n"
when "blockquote"
"[quote]#{from_html(element.inner_html, &block).strip}[/quote]\n\n" if element.inner_html.present?
when "small", "sub"
"[tn]#{from_html(element.inner_html, &block)}[/tn]" if element.inner_html.present?
when "b", "strong"
"[b]#{from_html(element.inner_html, &block)}[/b]" if element.inner_html.present?
when "i", "em"
"[i]#{from_html(element.inner_html, &block)}[/i]" if element.inner_html.present?
when "u"
"[u]#{from_html(element.inner_html, &block)}[/u]" if element.inner_html.present?
when "s", "strike"
"[s]#{from_html(element.inner_html, &block)}[/s]" if element.inner_html.present?
when "li"
"* #{from_html(element.inner_html, &block)}\n" if element.inner_html.present?
when "h1", "h2", "h3", "h4", "h5", "h6"
hN = element.name
title = from_html(element.inner_html, &block)
"#{hN}. #{title}\n\n"
when "a"
title = from_html(element.inner_html, inline: true, &block).strip
url = element["href"]
%("#{title}":[#{url}]) if title.present? && url.present?
when "img"
alt_text = element.attributes["title"] || element.attributes["alt"] || ""
src = element["src"]
if inline
alt_text
elsif alt_text.present? && src.present?
%("#{alt_text}":[#{src}]\n\n)
else
""
end
when "comment"
# ignored
else
from_html(element.inner_html, &block)
end
end.join
dtext
end
# extract the first paragraph `needle` occurs in.
def self.excerpt(dtext, needle)
dtext = dtext.gsub(/\r\n|\r|\n/, "\n")
excerpt = ActionController::Base.helpers.excerpt(dtext, needle, separator: "\n\n", radius: 1, omission: "")
excerpt
end
end