diff --git a/app/logical/danbooru/http/html_adapter.rb b/app/logical/danbooru/http/html_adapter.rb index 6c063b026..72a3a5b39 100644 --- a/app/logical/danbooru/http/html_adapter.rb +++ b/app/logical/danbooru/http/html_adapter.rb @@ -5,7 +5,8 @@ module Danbooru HTTP::MimeType.register_alias "text/html", :html def decode(str) - Nokogiri::HTML5(str, max_tree_depth: -1) + # XXX technically should use the charset from the http headers. + Nokogiri::HTML5.parse(str.force_encoding("utf-8"), max_tree_depth: -1) end end end diff --git a/test/unit/sources/foundation_test.rb b/test/unit/sources/foundation_test.rb index b324dbac3..3169f7d78 100644 --- a/test/unit/sources/foundation_test.rb +++ b/test/unit/sources/foundation_test.rb @@ -61,5 +61,29 @@ module Sources assert_equal(image, case2.image_url) end end + + should "parse UTF-8 commentaries correctly" do + source = Sources::Strategies.find("https://foundation.app/@SimaEnaga/~/107338") + + assert_equal(<<~EOS, source.dtext_artist_commentary_desc) + 【須佐之男尊/Susanoo-no-Mikoto】 + He is the youngest child of the three brothers and has older sister "Amaterasu" and older brother "Tsukuyomi". They are children whose father is "Izanagi" and mother is "Izanami".They live in the Land of gods known as "Takamagahara". + He carried out a number of violence and caused trouble to people. + As a result, he was expelled from Takamagahara and moved to the human world. + + 【Meaning】 + There is a theory that "須佐/susa" is a word + that means "凄まじい/susamajii (tremendous)" in Japanese. + ”之/no” is a conjunction "of". + “男/o” means ”male”. + ”尊/mikoto” is a word that after the name of a god or a noble (Lord; Highness). + Colloquially, "The crazy guy." lol + + 【Concept】 + He carries the bronze sword “Kusanagi-no Tsurugi”. This is one of the "three sacred treasures" and is the most famous sword in Japan. “Kusanagi-no Tsurugi” is dedicated to Atsuta Shrine in Aichi Prefecture, Japan. + The sword is now sealed and no one has seen it. + + EOS + end end end