From 19c0027d1ff1f754e6007933938b9c984f28c440 Mon Sep 17 00:00:00 2001 From: evazion Date: Wed, 1 Sep 2021 01:36:33 -0500 Subject: [PATCH] hentai foundry: fix 'Document tree depth exceeded' when parsing commentaries. Fix a regression in 38c9559fe that caused #4657 to fail again. --- app/logical/d_text.rb | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/app/logical/d_text.rb b/app/logical/d_text.rb index a393b589a..39568af11 100644 --- a/app/logical/d_text.rb +++ b/app/logical/d_text.rb @@ -49,7 +49,7 @@ class DText # @param artists [Array] # @return [String] the HTML output def self.postprocess(html, wiki_pages, tags, artists) - fragment = Nokogiri::HTML5.fragment(html) + fragment = parse_html(html) fragment.css("a.dtext-wiki-link").each do |node| path = Addressable::URI.parse(node["href"]).path @@ -174,7 +174,7 @@ class DText # @return [Array] the list of wiki page names def self.parse_wiki_titles(text) html = DTextRagel.parse(text) - fragment = Nokogiri::HTML5.fragment(html) + fragment = parse_html(html) titles = fragment.css("a.dtext-wiki-link").map do |node| title = node["href"][%r{\A/wiki_pages/(.*)\z}i, 1] @@ -191,7 +191,7 @@ class DText # @return [Array] the list of external URLs def self.parse_external_links(text) html = DTextRagel.parse(text) - fragment = Nokogiri::HTML5.fragment(html) + fragment = parse_html(html) links = fragment.css("a.dtext-external-link").map { |node| node["href"] } links.uniq @@ -326,7 +326,7 @@ class DText # @param html [String] the HTML input # @return [String] the Markdown output def self.html_to_markdown(html) - html = Nokogiri::HTML5.fragment(html) + html = parse_html(html) html.children.map do |node| case node.name @@ -349,7 +349,7 @@ class DText # @param inline [Boolean] if true, convert tags to plaintext # @return [String] the DText output def self.from_html(text, inline: false, &block) - html = Nokogiri::HTML5.fragment(text) + html = parse_html(text) dtext = html.children.map do |element| block.call(element) if block.present? @@ -427,4 +427,11 @@ class DText def self.excerpt(text, length: 160) strip_dtext(text).split(/\r\n|\r|\n/).first.to_s.truncate(length) end + + # Parse a string of HTML to a document object. + # @param html [String] + # @return [Nokogiri::HTML5::DocumentFragment] + def self.parse_html(html) + Nokogiri::HTML5.fragment(html, max_tree_depth: -1) + end end