dtext: refactor stripping markup from dtext.

There are a handful of places where we need to strip markup from a piece of dtext, primarily in <meta> description tags in the wiki. Currently the dtext parser handles this by having a special mode where it parses the text but doesn't output html tags. Here we refactor to instead parse the text normally then strip out the html tags after the fact. This is more flexible and allows us to simplify a lot of things in the dtext parser. This also produces more readable output than before in certain cases.
2019-10-09 15:55:28 -05:00
parent 30091b989d
commit 08b1c76533
4 changed files with 65 additions and 2 deletions
--- a/app/helpers/application_helper.rb
+++ b/app/helpers/application_helper.rb
@@ -39,7 +39,7 @@ module ApplicationHelper
  end

  def strip_dtext(text)
-    format_text(text, strip: true)
+    DText.strip_dtext(text)
  end

  def error_messages_for(instance_name)
--- a/app/logical/d_text.rb
+++ b/app/logical/d_text.rb
@@ -47,6 +47,27 @@ class DText
    stripped.strip
  end

+  def self.strip_dtext(dtext)
+    html = DTextRagel.parse(dtext)
+    text = to_plaintext(html)
+    text
+  end
+
+  def self.to_plaintext(html)
+    text = from_html(html) do |node|
+      case node.name
+      when "a", "strong", "em", "u", "s", "h1", "h2", "h3", "h4", "h5", "h6"
+        node.name = "span"
+        node.content = node.text
+      when "blockquote"
+        node.name = "span"
+        node.content = to_plaintext(node.inner_html).gsub(/^/, "> ")
+      end
+    end
+
+    text = text.gsub(/\A[[:space:]]+|[[:space:]]+\z/, "")
+  end
+
  def self.from_html(text, inline: false, &block)
    html = Nokogiri::HTML.fragment(text)

--- a/test/unit/apng_inspector_test.rb
+++ b/test/unit/apng_inspector_test.rb
@@ -1,6 +1,6 @@
 require "test_helper"

-class DTextTest < ActiveSupport::TestCase
+class APNGInspectorTest < ActiveSupport::TestCase
  def inspect(filename)
    apng = APNGInspector.new("#{Rails.root}/test/files/apng/#{filename}")
    apng.inspect!
--- a/test/unit/d_text_test.rb
+++ b/test/unit/d_text_test.rb
@@ -0,0 +1,42 @@
+require "test_helper"
+
+class DTextTest < ActiveSupport::TestCase
+  def assert_strip_dtext(expected, dtext)
+    assert_equal(expected, DText.strip_dtext(dtext))
+  end
+
+  context "DText" do
+    context "#strip_dtext" do
+      should "strip dtext markup from the input" do
+        assert_strip_dtext("x", "[b]x[/b]")
+        assert_strip_dtext("x", "[i]x[/i]")
+        assert_strip_dtext("x", "[tn]x[/tn]")
+        assert_strip_dtext("x", "[spoilers]x[/spoilers]")
+
+        assert_strip_dtext("post #123", "post #123")
+        assert_strip_dtext("pixiv #123", "pixiv #123")
+
+        assert_strip_dtext("bkub", "{{bkub}}")
+        assert_strip_dtext("bkub", "[[bkub]]")
+        assert_strip_dtext("Bkub", "[[bkub|Bkub]]")
+
+        assert_strip_dtext("http://www.example.com", "http://www.example.com")
+        assert_strip_dtext("http://www.example.com", "<http://www.example.com>")
+        assert_strip_dtext("x", '"x":/posts')
+        assert_strip_dtext("x", '"x":[/posts]')
+
+        assert_strip_dtext("@bkub", "@bkub")
+        assert_strip_dtext("@bkub", "<@bkub>")
+
+        assert_strip_dtext("x", "h1. x")
+        assert_strip_dtext("x", "h2. [i]x[/i]")
+
+        assert_strip_dtext("* one\n* two", "* [b]one[/b]\n* [[two]]")
+        assert_strip_dtext("okay", "[expand][u]okay[/u][/expand]")
+        assert_strip_dtext("> chen said:\n> \n> honk honk", "[quote]chen said:\n\nhonk honk[/quote]")
+
+        assert_strip_dtext("one two three\nfour\n\nfive six", "one [b]two[/b] three\nfour\n\nfive six")
+      end
+    end
+  end
+end