sources: don't escape Unicode characters in tag search URLs.

Fix it so that Unicode characters aren't unnecessarily percent-encoded when generating tag search
URLs. For example, generate URLs like this:

* https://www.pixiv.net/tags/オリジナル/artworks

Not like this:

* https://www.pixiv.net/tags/%E3%82%AA%E3%83%AA%E3%82%B8%E3%83%8A%E3%83%AB/artworks
This commit is contained in:
evazion
2022-12-02 16:04:18 -06:00
parent 9e34f4c3ed
commit c19fc16885
12 changed files with 33 additions and 28 deletions

View File

@@ -68,6 +68,17 @@ module Danbooru
nil nil
end end
# Escape a string for use in an URL path or query parameter. Like `CGI.escape`, but leaves Unicode characters as Unicode.
#
# @example
# Danbooru::URL.escape("fate/stay_night") # => "fate%2Fstay_night"
# Danbooru::URL.escape("大丈夫?おっぱい揉む?") # => "大丈夫%3Fおっぱい揉む%3F"
#
# @return [String] The escaped string
def self.escape(string)
Addressable::URI.encode_component(string, /[\/?#&+%]/).force_encoding("UTF-8")
end
# @return [String] the URL in unnormalized form # @return [String] the URL in unnormalized form
def to_s def to_s
original_url original_url

View File

@@ -48,7 +48,7 @@ class Source::Extractor
def tags def tags
api_response[:tags].to_a.map do |tag| api_response[:tags].to_a.map do |tag|
[tag, "https://www.artstation.com/search?q=#{CGI.escape(tag)}"] [tag, "https://www.artstation.com/search?q=#{Danbooru::URL.escape(tag)}"]
end end
end end

View File

@@ -39,7 +39,7 @@ module Source
tags = api_response[:tags].split + ["rating:#{api_response[:rating]}"] tags = api_response[:tags].split + ["rating:#{api_response[:rating]}"]
tags.map do |tag| tags.map do |tag|
[tag, "https://#{domain}/index.php?page=post&s=list&tags=#{CGI.escape(tag)}"] [tag, "https://#{domain}/index.php?page=post&s=list&tags=#{Danbooru::URL.escape(tag)}"]
end end
end end

View File

@@ -39,7 +39,7 @@ module Source
tags = page&.search(".boxbody [rel='tag']").to_a.map(&:text) tags = page&.search(".boxbody [rel='tag']").to_a.map(&:text)
tags.map do |tag| tags.map do |tag|
[tag, "https://www.hentai-foundry.com/pictures/tagged/#{CGI.escape(tag)}"] [tag, "https://www.hentai-foundry.com/pictures/tagged/#{Danbooru::URL.escape(tag)}"]
end end
end end

View File

@@ -23,7 +23,7 @@ module Source
def tags def tags
api_response[:tags].to_s.split.map do |tag| api_response[:tags].to_s.split.map do |tag|
[tag, "https://#{domain}/post?tags=#{CGI.escape(tag)}"] [tag, "https://#{domain}/post?tags=#{Danbooru::URL.escape(tag)}"]
end end
end end

View File

@@ -77,12 +77,8 @@ module Source
def tags def tags
return [] if api_client.blank? return [] if api_client.blank?
base_url = "https://seiga.nicovideo.jp/"
base_url += "manga/" if manga_id.present?
base_url += "tag/"
api_client.tags.map do |name| api_client.tags.map do |name|
[name, base_url + CGI.escape(name)] [name, "https://seiga.nicovideo.jp/#{"manga/" if manga_id}tag/#{Danbooru::URL.escape(name)}"]
end end
end end

View File

@@ -105,7 +105,7 @@ module Source
def tags def tags
tags = api_illust.dig(:tags, :tags).to_a.map do |item| tags = api_illust.dig(:tags, :tags).to_a.map do |item|
[item[:tag], "https://www.pixiv.net/tags/#{CGI.escape(item[:tag])}/artworks"] [item[:tag], "https://www.pixiv.net/tags/#{Danbooru::URL.escape(item[:tag])}/artworks"]
end end
if api_illust["aiType"] == 2 if api_illust["aiType"] == 2

View File

@@ -28,7 +28,7 @@ module Source
def tags def tags
page&.css("meta[name='keywords']")&.attr("content")&.value.to_s.split(/, /).compact.map do |tag| page&.css("meta[name='keywords']")&.attr("content")&.value.to_s.split(/, /).compact.map do |tag|
[tag.tr(" ", "_"), "https://rule34.us/index.php?r=posts/index&q=#{CGI.escape(tag)}"] [tag.tr(" ", "_"), "https://rule34.us/index.php?r=posts/index&q=#{Danbooru::URL.escape(tag)}"]
end end
end end

View File

@@ -47,7 +47,7 @@ module Source
def tags def tags
page&.css("#view .tag a[href^='/search/list']").to_a.map do |tag| page&.css("#view .tag a[href^='/search/list']").to_a.map do |tag|
[tag.text, "https://www.tinami.com/search/list?keyword=#{CGI.escape(tag.text)}"] [tag.text, "https://www.tinami.com/search/list?keyword=#{Danbooru::URL.escape(tag.text)}"]
end end
end end

View File

@@ -75,7 +75,7 @@ class Source::Extractor
def tags def tags
post[:tags].to_a.map do |tag| post[:tags].to_a.map do |tag|
[tag, "https://tumblr.com/tagged/#{CGI.escape(tag)}"] [tag, "https://tumblr.com/tagged/#{Danbooru::URL.escape(tag)}"]
end.uniq end.uniq
end end

View File

@@ -8,12 +8,12 @@ module Sources
context "A nicoseiga post url" do context "A nicoseiga post url" do
tags = [ tags = [
["アニメ", "https://seiga.nicovideo.jp/tag/%E3%82%A2%E3%83%8B%E3%83%A1"], ["アニメ", "https://seiga.nicovideo.jp/tag/アニメ"],
["コジコジ", "https://seiga.nicovideo.jp/tag/%E3%82%B3%E3%82%B8%E3%82%B3%E3%82%B8"], ["コジコジ", "https://seiga.nicovideo.jp/tag/コジコジ"],
["さくらももこ", "https://seiga.nicovideo.jp/tag/%E3%81%95%E3%81%8F%E3%82%89%E3%82%82%E3%82%82%E3%81%93"], ["さくらももこ", "https://seiga.nicovideo.jp/tag/さくらももこ"],
["ドット絵", "https://seiga.nicovideo.jp/tag/%E3%83%89%E3%83%83%E3%83%88%E7%B5%B5"], ["ドット絵", "https://seiga.nicovideo.jp/tag/ドット絵"],
["ニコニコ大百科", "https://seiga.nicovideo.jp/tag/%E3%83%8B%E3%82%B3%E3%83%8B%E3%82%B3%E5%A4%A7%E7%99%BE%E7%A7%91"], ["ニコニコ大百科", "https://seiga.nicovideo.jp/tag/ニコニコ大百科"],
["お絵カキコ", "https://seiga.nicovideo.jp/tag/%E3%81%8A%E7%B5%B5%E3%82%AB%E3%82%AD%E3%82%B3"], ["お絵カキコ", "https://seiga.nicovideo.jp/tag/お絵カキコ"],
] ]
strategy_should_work( strategy_should_work(
"http://seiga.nicovideo.jp/seiga/im4937663", "http://seiga.nicovideo.jp/seiga/im4937663",
@@ -32,12 +32,12 @@ module Sources
context "A nicoseiga image url" do context "A nicoseiga image url" do
tags = [ tags = [
["アニメ", "https://seiga.nicovideo.jp/tag/%E3%82%A2%E3%83%8B%E3%83%A1"], ["アニメ", "https://seiga.nicovideo.jp/tag/アニメ"],
["コジコジ", "https://seiga.nicovideo.jp/tag/%E3%82%B3%E3%82%B8%E3%82%B3%E3%82%B8"], ["コジコジ", "https://seiga.nicovideo.jp/tag/コジコジ"],
["さくらももこ", "https://seiga.nicovideo.jp/tag/%E3%81%95%E3%81%8F%E3%82%89%E3%82%82%E3%82%82%E3%81%93"], ["さくらももこ", "https://seiga.nicovideo.jp/tag/さくらももこ"],
["ドット絵", "https://seiga.nicovideo.jp/tag/%E3%83%89%E3%83%83%E3%83%88%E7%B5%B5"], ["ドット絵", "https://seiga.nicovideo.jp/tag/ドット絵"],
["ニコニコ大百科", "https://seiga.nicovideo.jp/tag/%E3%83%8B%E3%82%B3%E3%83%8B%E3%82%B3%E5%A4%A7%E7%99%BE%E7%A7%91"], ["ニコニコ大百科", "https://seiga.nicovideo.jp/tag/ニコニコ大百科"],
["お絵カキコ", "https://seiga.nicovideo.jp/tag/%E3%81%8A%E7%B5%B5%E3%82%AB%E3%82%AD%E3%82%B3"], ["お絵カキコ", "https://seiga.nicovideo.jp/tag/お絵カキコ"],
] ]
strategy_should_work( strategy_should_work(
"http://lohas.nicoseiga.jp/o/910aecf08e542285862954017f8a33a8c32a8aec/1433298801/4937663", "http://lohas.nicoseiga.jp/o/910aecf08e542285862954017f8a33a8c32a8aec/1433298801/4937663",
@@ -186,7 +186,7 @@ module Sources
"https://seiga.nicovideo.jp/watch/mg302561", "https://seiga.nicovideo.jp/watch/mg302561",
image_urls: image_urls, image_urls: image_urls,
page_url: "https://seiga.nicovideo.jp/watch/mg302561", page_url: "https://seiga.nicovideo.jp/watch/mg302561",
tags: [["ロリ", "https://seiga.nicovideo.jp/manga/tag/%E3%83%AD%E3%83%AA"]], tags: [["ロリ", "https://seiga.nicovideo.jp/manga/tag/ロリ"]],
artist_name: "とろてい", artist_name: "とろてい",
other_names: ["とろてい"], other_names: ["とろてい"],
tag_name: "nicoseiga_1848060" tag_name: "nicoseiga_1848060"

View File

@@ -94,10 +94,8 @@ module Sources
should "get the tags" do should "get the tags" do
pixiv_tags = @site.tags.map(&:first) pixiv_tags = @site.tags.map(&:first)
pixiv_links = @site.tags.map(&:last)
assert_equal(%w[漫画 test], pixiv_tags) assert_equal(%w[漫画 test], pixiv_tags)
assert_contains(pixiv_links, /search\.php/)
end end
should "get the artist commentary" do should "get the artist commentary" do