dtext: refactor stripping markup from dtext.

There are a handful of places where we need to strip markup from a piece
of dtext, primarily in <meta> description tags in the wiki. Currently
the dtext parser handles this by having a special mode where it parses
the text but doesn't output html tags. Here we refactor to instead parse
the text normally then strip out the html tags after the fact.

This is more flexible and allows us to simplify a lot of things in the
dtext parser. This also produces more readable output than before in
certain cases.
This commit is contained in:
evazion
2019-10-09 15:55:28 -05:00
parent 30091b989d
commit 08b1c76533
4 changed files with 65 additions and 2 deletions

View File

@@ -39,7 +39,7 @@ module ApplicationHelper
end
def strip_dtext(text)
format_text(text, strip: true)
DText.strip_dtext(text)
end
def error_messages_for(instance_name)

View File

@@ -47,6 +47,27 @@ class DText
stripped.strip
end
def self.strip_dtext(dtext)
html = DTextRagel.parse(dtext)
text = to_plaintext(html)
text
end
def self.to_plaintext(html)
text = from_html(html) do |node|
case node.name
when "a", "strong", "em", "u", "s", "h1", "h2", "h3", "h4", "h5", "h6"
node.name = "span"
node.content = node.text
when "blockquote"
node.name = "span"
node.content = to_plaintext(node.inner_html).gsub(/^/, "> ")
end
end
text = text.gsub(/\A[[:space:]]+|[[:space:]]+\z/, "")
end
def self.from_html(text, inline: false, &block)
html = Nokogiri::HTML.fragment(text)

View File

@@ -1,6 +1,6 @@
require "test_helper"
class DTextTest < ActiveSupport::TestCase
class APNGInspectorTest < ActiveSupport::TestCase
def inspect(filename)
apng = APNGInspector.new("#{Rails.root}/test/files/apng/#{filename}")
apng.inspect!

42
test/unit/d_text_test.rb Normal file
View File

@@ -0,0 +1,42 @@
require "test_helper"
class DTextTest < ActiveSupport::TestCase
def assert_strip_dtext(expected, dtext)
assert_equal(expected, DText.strip_dtext(dtext))
end
context "DText" do
context "#strip_dtext" do
should "strip dtext markup from the input" do
assert_strip_dtext("x", "[b]x[/b]")
assert_strip_dtext("x", "[i]x[/i]")
assert_strip_dtext("x", "[tn]x[/tn]")
assert_strip_dtext("x", "[spoilers]x[/spoilers]")
assert_strip_dtext("post #123", "post #123")
assert_strip_dtext("pixiv #123", "pixiv #123")
assert_strip_dtext("bkub", "{{bkub}}")
assert_strip_dtext("bkub", "[[bkub]]")
assert_strip_dtext("Bkub", "[[bkub|Bkub]]")
assert_strip_dtext("http://www.example.com", "http://www.example.com")
assert_strip_dtext("http://www.example.com", "<http://www.example.com>")
assert_strip_dtext("x", '"x":/posts')
assert_strip_dtext("x", '"x":[/posts]')
assert_strip_dtext("@bkub", "@bkub")
assert_strip_dtext("@bkub", "<@bkub>")
assert_strip_dtext("x", "h1. x")
assert_strip_dtext("x", "h2. [i]x[/i]")
assert_strip_dtext("* one\n* two", "* [b]one[/b]\n* [[two]]")
assert_strip_dtext("okay", "[expand][u]okay[/u][/expand]")
assert_strip_dtext("> chen said:\n> \n> honk honk", "[quote]chen said:\n\nhonk honk[/quote]")
assert_strip_dtext("one two three\nfour\n\nfive six", "one [b]two[/b] three\nfour\n\nfive six")
end
end
end
end