From 08b1c765334a36b2e1954ca101097a3c33def338 Mon Sep 17 00:00:00 2001
From: evazion <noizave@gmail.com>
Date: Wed, 9 Oct 2019 15:55:28 -0500
Subject: [PATCH] dtext: refactor stripping markup from dtext.

There are a handful of places where we need to strip markup from a piece
of dtext, primarily in <meta> description tags in the wiki. Currently
the dtext parser handles this by having a special mode where it parses
the text but doesn't output html tags. Here we refactor to instead parse
the text normally then strip out the html tags after the fact.

This is more flexible and allows us to simplify a lot of things in the
dtext parser. This also produces more readable output than before in
certain cases.
---
 app/helpers/application_helper.rb |  2 +-
 app/logical/d_text.rb             | 21 ++++++++++++++++
 test/unit/apng_inspector_test.rb  |  2 +-
 test/unit/d_text_test.rb          | 42 +++++++++++++++++++++++++++++++
 4 files changed, 65 insertions(+), 2 deletions(-)
 create mode 100644 test/unit/d_text_test.rb
diff --git a/app/helpers/application_helper.rb b/app/helpers/application_helper.rb
index afb16fa60..c9bc60d5f 100644
--- a/app/helpers/application_helper.rb
+++ b/app/helpers/application_helper.rb
@@ -39,7 +39,7 @@ module ApplicationHelper
   end
 
   def strip_dtext(text)
-    format_text(text, strip: true)
+    DText.strip_dtext(text)
   end
 
   def error_messages_for(instance_name)
diff --git a/app/logical/d_text.rb b/app/logical/d_text.rb
index 1470e8810..a0818a17f 100644
--- a/app/logical/d_text.rb
+++ b/app/logical/d_text.rb
@@ -47,6 +47,27 @@ class DText
     stripped.strip
   end
 
+  def self.strip_dtext(dtext)
+    html = DTextRagel.parse(dtext)
+    text = to_plaintext(html)
+    text
+  end
+
+  def self.to_plaintext(html)
+    text = from_html(html) do |node|
+      case node.name
+      when "a", "strong", "em", "u", "s", "h1", "h2", "h3", "h4", "h5", "h6"
+        node.name = "span"
+        node.content = node.text
+      when "blockquote"
+        node.name = "span"
+        node.content = to_plaintext(node.inner_html).gsub(/^/, "> ")
+      end
+    end
+
+    text = text.gsub(/\A[[:space:]]+|[[:space:]]+\z/, "")
+  end
+
   def self.from_html(text, inline: false, &block)
     html = Nokogiri::HTML.fragment(text)
 
diff --git a/test/unit/apng_inspector_test.rb b/test/unit/apng_inspector_test.rb
index 1f734b532..6332d72bc 100644
--- a/test/unit/apng_inspector_test.rb
+++ b/test/unit/apng_inspector_test.rb
@@ -1,6 +1,6 @@
 require "test_helper"
 
-class DTextTest < ActiveSupport::TestCase
+class APNGInspectorTest < ActiveSupport::TestCase
   def inspect(filename)
     apng = APNGInspector.new("#{Rails.root}/test/files/apng/#{filename}")
     apng.inspect!
diff --git a/test/unit/d_text_test.rb b/test/unit/d_text_test.rb
new file mode 100644
index 000000000..81c2e61f4
--- /dev/null
+++ b/test/unit/d_text_test.rb
@@ -0,0 +1,42 @@
+require "test_helper"
+
+class DTextTest < ActiveSupport::TestCase
+  def assert_strip_dtext(expected, dtext)
+    assert_equal(expected, DText.strip_dtext(dtext))
+  end
+
+  context "DText" do
+    context "#strip_dtext" do
+      should "strip dtext markup from the input" do
+        assert_strip_dtext("x", "[b]x[/b]")
+        assert_strip_dtext("x", "[i]x[/i]")
+        assert_strip_dtext("x", "[tn]x[/tn]")
+        assert_strip_dtext("x", "[spoilers]x[/spoilers]")
+
+        assert_strip_dtext("post #123", "post #123")
+        assert_strip_dtext("pixiv #123", "pixiv #123")
+
+        assert_strip_dtext("bkub", "{{bkub}}")
+        assert_strip_dtext("bkub", "[[bkub]]")
+        assert_strip_dtext("Bkub", "[[bkub|Bkub]]")
+
+        assert_strip_dtext("http://www.example.com", "http://www.example.com")
+        assert_strip_dtext("http://www.example.com", "<http://www.example.com>")
+        assert_strip_dtext("x", '"x":/posts')
+        assert_strip_dtext("x", '"x":[/posts]')
+
+        assert_strip_dtext("@bkub", "@bkub")
+        assert_strip_dtext("@bkub", "<@bkub>")
+
+        assert_strip_dtext("x", "h1. x")
+        assert_strip_dtext("x", "h2. [i]x[/i]")
+
+        assert_strip_dtext("* one\n* two", "* [b]one[/b]\n* [[two]]")
+        assert_strip_dtext("okay", "[expand][u]okay[/u][/expand]")
+        assert_strip_dtext("> chen said:\n> \n> honk honk", "[quote]chen said:\n\nhonk honk[/quote]")
+
+        assert_strip_dtext("one two three\nfour\n\nfive six", "one [b]two[/b] three\nfour\n\nfive six")
+      end
+    end
+  end
+end