nokogiri: switch to the nokogumbo-based html5 parser.
https://github.com/sparklemotion/nokogiri/blob/main/CHANGELOG.md#1120--2021-08-02
This commit is contained in:
@@ -49,7 +49,7 @@ class DText
|
|||||||
# @param artists [Array<Artist>]
|
# @param artists [Array<Artist>]
|
||||||
# @return [String] the HTML output
|
# @return [String] the HTML output
|
||||||
def self.postprocess(html, wiki_pages, tags, artists)
|
def self.postprocess(html, wiki_pages, tags, artists)
|
||||||
fragment = Nokogiri::HTML.fragment(html)
|
fragment = Nokogiri::HTML5.fragment(html)
|
||||||
|
|
||||||
fragment.css("a.dtext-wiki-link").each do |node|
|
fragment.css("a.dtext-wiki-link").each do |node|
|
||||||
path = Addressable::URI.parse(node["href"]).path
|
path = Addressable::URI.parse(node["href"]).path
|
||||||
@@ -174,7 +174,7 @@ class DText
|
|||||||
# @return [Array<String>] the list of wiki page names
|
# @return [Array<String>] the list of wiki page names
|
||||||
def self.parse_wiki_titles(text)
|
def self.parse_wiki_titles(text)
|
||||||
html = DTextRagel.parse(text)
|
html = DTextRagel.parse(text)
|
||||||
fragment = Nokogiri::HTML.fragment(html)
|
fragment = Nokogiri::HTML5.fragment(html)
|
||||||
|
|
||||||
titles = fragment.css("a.dtext-wiki-link").map do |node|
|
titles = fragment.css("a.dtext-wiki-link").map do |node|
|
||||||
title = node["href"][%r{\A/wiki_pages/(.*)\z}i, 1]
|
title = node["href"][%r{\A/wiki_pages/(.*)\z}i, 1]
|
||||||
@@ -191,7 +191,7 @@ class DText
|
|||||||
# @return [Array<String>] the list of external URLs
|
# @return [Array<String>] the list of external URLs
|
||||||
def self.parse_external_links(text)
|
def self.parse_external_links(text)
|
||||||
html = DTextRagel.parse(text)
|
html = DTextRagel.parse(text)
|
||||||
fragment = Nokogiri::HTML.fragment(html)
|
fragment = Nokogiri::HTML5.fragment(html)
|
||||||
|
|
||||||
links = fragment.css("a.dtext-external-link").map { |node| node["href"] }
|
links = fragment.css("a.dtext-external-link").map { |node| node["href"] }
|
||||||
links.uniq
|
links.uniq
|
||||||
@@ -326,7 +326,7 @@ class DText
|
|||||||
# @param html [String] the HTML input
|
# @param html [String] the HTML input
|
||||||
# @return [String] the Markdown output
|
# @return [String] the Markdown output
|
||||||
def self.html_to_markdown(html)
|
def self.html_to_markdown(html)
|
||||||
html = Nokogiri::HTML.fragment(html)
|
html = Nokogiri::HTML5.fragment(html)
|
||||||
|
|
||||||
html.children.map do |node|
|
html.children.map do |node|
|
||||||
case node.name
|
case node.name
|
||||||
@@ -349,7 +349,7 @@ class DText
|
|||||||
# @param inline [Boolean] if true, convert <img> tags to plaintext
|
# @param inline [Boolean] if true, convert <img> tags to plaintext
|
||||||
# @return [String] the DText output
|
# @return [String] the DText output
|
||||||
def self.from_html(text, inline: false, &block)
|
def self.from_html(text, inline: false, &block)
|
||||||
html = Nokogiri::HTML.fragment(text)
|
html = Nokogiri::HTML5.fragment(text)
|
||||||
|
|
||||||
dtext = html.children.map do |element|
|
dtext = html.children.map do |element|
|
||||||
block.call(element) if block.present?
|
block.call(element) if block.present?
|
||||||
|
|||||||
@@ -269,7 +269,7 @@ module Sources
|
|||||||
resp = http.cache(1.minute).get(page_url_from_image_url, follow: {max_hops: 1})
|
resp = http.cache(1.minute).get(page_url_from_image_url, follow: {max_hops: 1})
|
||||||
|
|
||||||
if resp.status.success?
|
if resp.status.success?
|
||||||
Nokogiri::HTML(resp.body.to_s)
|
resp.parse
|
||||||
# the work was deleted
|
# the work was deleted
|
||||||
elsif resp.code == 404
|
elsif resp.code == 404
|
||||||
nil
|
nil
|
||||||
|
|||||||
@@ -183,7 +183,7 @@ module Sources::Strategies
|
|||||||
end
|
end
|
||||||
|
|
||||||
def inline_images
|
def inline_images
|
||||||
html = Nokogiri::HTML.fragment(artist_commentary_desc)
|
html = Nokogiri::HTML5.fragment(artist_commentary_desc)
|
||||||
html.css("img").map { |node| node["src"] }
|
html.css("img").map { |node| node["src"] }
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user