From 08b1c765334a36b2e1954ca101097a3c33def338 Mon Sep 17 00:00:00 2001 From: evazion Date: Wed, 9 Oct 2019 15:55:28 -0500 Subject: [PATCH] dtext: refactor stripping markup from dtext. There are a handful of places where we need to strip markup from a piece of dtext, primarily in description tags in the wiki. Currently the dtext parser handles this by having a special mode where it parses the text but doesn't output html tags. Here we refactor to instead parse the text normally then strip out the html tags after the fact. This is more flexible and allows us to simplify a lot of things in the dtext parser. This also produces more readable output than before in certain cases. --- app/helpers/application_helper.rb | 2 +- app/logical/d_text.rb | 21 ++++++++++++++++ test/unit/apng_inspector_test.rb | 2 +- test/unit/d_text_test.rb | 42 +++++++++++++++++++++++++++++++ 4 files changed, 65 insertions(+), 2 deletions(-) create mode 100644 test/unit/d_text_test.rb diff --git a/app/helpers/application_helper.rb b/app/helpers/application_helper.rb index afb16fa60..c9bc60d5f 100644 --- a/app/helpers/application_helper.rb +++ b/app/helpers/application_helper.rb @@ -39,7 +39,7 @@ module ApplicationHelper end def strip_dtext(text) - format_text(text, strip: true) + DText.strip_dtext(text) end def error_messages_for(instance_name) diff --git a/app/logical/d_text.rb b/app/logical/d_text.rb index 1470e8810..a0818a17f 100644 --- a/app/logical/d_text.rb +++ b/app/logical/d_text.rb @@ -47,6 +47,27 @@ class DText stripped.strip end + def self.strip_dtext(dtext) + html = DTextRagel.parse(dtext) + text = to_plaintext(html) + text + end + + def self.to_plaintext(html) + text = from_html(html) do |node| + case node.name + when "a", "strong", "em", "u", "s", "h1", "h2", "h3", "h4", "h5", "h6" + node.name = "span" + node.content = node.text + when "blockquote" + node.name = "span" + node.content = to_plaintext(node.inner_html).gsub(/^/, "> ") + end + end + + text = text.gsub(/\A[[:space:]]+|[[:space:]]+\z/, "") + end + def self.from_html(text, inline: false, &block) html = Nokogiri::HTML.fragment(text) diff --git a/test/unit/apng_inspector_test.rb b/test/unit/apng_inspector_test.rb index 1f734b532..6332d72bc 100644 --- a/test/unit/apng_inspector_test.rb +++ b/test/unit/apng_inspector_test.rb @@ -1,6 +1,6 @@ require "test_helper" -class DTextTest < ActiveSupport::TestCase +class APNGInspectorTest < ActiveSupport::TestCase def inspect(filename) apng = APNGInspector.new("#{Rails.root}/test/files/apng/#{filename}") apng.inspect! diff --git a/test/unit/d_text_test.rb b/test/unit/d_text_test.rb new file mode 100644 index 000000000..81c2e61f4 --- /dev/null +++ b/test/unit/d_text_test.rb @@ -0,0 +1,42 @@ +require "test_helper" + +class DTextTest < ActiveSupport::TestCase + def assert_strip_dtext(expected, dtext) + assert_equal(expected, DText.strip_dtext(dtext)) + end + + context "DText" do + context "#strip_dtext" do + should "strip dtext markup from the input" do + assert_strip_dtext("x", "[b]x[/b]") + assert_strip_dtext("x", "[i]x[/i]") + assert_strip_dtext("x", "[tn]x[/tn]") + assert_strip_dtext("x", "[spoilers]x[/spoilers]") + + assert_strip_dtext("post #123", "post #123") + assert_strip_dtext("pixiv #123", "pixiv #123") + + assert_strip_dtext("bkub", "{{bkub}}") + assert_strip_dtext("bkub", "[[bkub]]") + assert_strip_dtext("Bkub", "[[bkub|Bkub]]") + + assert_strip_dtext("http://www.example.com", "http://www.example.com") + assert_strip_dtext("http://www.example.com", "") + assert_strip_dtext("x", '"x":/posts') + assert_strip_dtext("x", '"x":[/posts]') + + assert_strip_dtext("@bkub", "@bkub") + assert_strip_dtext("@bkub", "<@bkub>") + + assert_strip_dtext("x", "h1. x") + assert_strip_dtext("x", "h2. [i]x[/i]") + + assert_strip_dtext("* one\n* two", "* [b]one[/b]\n* [[two]]") + assert_strip_dtext("okay", "[expand][u]okay[/u][/expand]") + assert_strip_dtext("> chen said:\n> \n> honk honk", "[quote]chen said:\n\nhonk honk[/quote]") + + assert_strip_dtext("one two three\nfour\n\nfive six", "one [b]two[/b] three\nfour\n\nfive six") + end + end + end +end