moebooru: support md5-based post URLs.

Support URLs like these:

* https://yande.re/post/show?md5=2c95b8975b73744da2bcbed9619c1d59
* https://konachan.com/post/show?md5=955aa45f3b452b415509b47dcc9475ac

This makes scraping images from these sites by md5 easier.
This commit is contained in:
evazion
2022-11-08 21:39:27 -06:00
parent 09f1ace357
commit 153d5f3e67
2 changed files with 39 additions and 1 deletions

View File

@@ -19,6 +19,17 @@ class Source::URL::Moebooru < Source::URL
# https://konachan.com/data/preview/5d/63/5d633771614e4bf5c17df19a0f0f333f.jpg
in _, "data", "preview", *subdirs, /^(\h{32})\.jpg$/
@md5 = $1
@image_url = true
# https://yande.re/post/show?md5=2c95b8975b73744da2bcbed9619c1d59
# https://konachan.com/post/show?md5=955aa45f3b452b415509b47dcc9475ac
in _, "post", "show" if params[:md5].present?
@md5 = params[:md5]
# https://yande.re/post?tags=md5:2c95b8975b73744da2bcbed9619c1d59
# https://konachan.com/post?tags=md5:955aa45f3b452b415509b47dcc9475ac
in _, "post" if params[:tags].to_s.match?(/\Amd5:\h{32}\z/i)
@md5 = params[:tags][/\Amd5:(\h{32})\z/i, 1]
# https://yande.re/sample/ceb6a12e87945413a95b90fada406f91/.jpg
# https://files.yande.re/sample/0d79447ce2c89138146f64ba93633568/yande.re%20290757%20sample%20seifuku%20thighhighs%20tsukudani_norio.jpg
@@ -38,6 +49,7 @@ class Source::URL::Moebooru < Source::URL
@md5 = md5
@work_id = work_id_from_filename
@original_file_ext = file_ext_for(sample_type)
@image_url = true
# https://yande.re/jpeg/22577d2344fe694cf47f80563031b3cd.jpg
# https://files.yande.re/image/22577d2344fe694cf47f80563031b3cd.png
@@ -45,6 +57,7 @@ class Source::URL::Moebooru < Source::URL
in _, ("sample" | "jpeg" | "image") => sample_type, /^(\h{32})\.\w+$/
@md5 = $1
@original_file_ext = file_ext_for(sample_type)
@image_url = true
else
nil
@@ -88,7 +101,7 @@ class Source::URL::Moebooru < Source::URL
end
def image_url?
md5.present?
@image_url.present?
end
def page_url

View File

@@ -60,6 +60,17 @@ module Sources
tags: %w[anchovy bandages darjeeling girls_und_panzer katyusha kay_(girls_und_panzer) mika_(girls_und_panzer) nishi_kinuyo nishizumi_maho nishizumi_miho shimada_arisu uniform]
)
end
context "a https://yande.re/post/show?md5=<md5> URL" do
strategy_should_work(
"https://yande.re/post/show?md5=7ecfdead705d7b956b26b1d37b98d089",
image_urls: ["https://files.yande.re/image/7ecfdead705d7b956b26b1d37b98d089/yande.re%20482880.jpg"],
download_size: 362_554,
tags: ["bayashiko", "journey_to_the_west", "sun_wukong"],
page_url: "https://yande.re/post/show/482880",
profile_url: "https://twitter.com/apononori"
)
end
end
context "For konachan.com," do
@@ -102,6 +113,16 @@ module Sources
profile_url: "https://www.pixiv.net/users/22528152"
)
end
context "a https://konachan.com/post/show?md5=<md5>" do
strategy_should_work(
"https://konachan.com/post/show?md5=ca12cdb79a66d242e95a6f958341bf05",
image_urls: ["https://konachan.com/image/ca12cdb79a66d242e95a6f958341bf05/Konachan.com%20-%20270916.png"],
download_size: 8_167_593,
tags: %w[anthropomorphism bed blonde_hair bow brown_eyes doll girls_frontline hara_shoutarou hoodie long_hair pantyhose scar skirt twintails ump-45_(girls_frontline) ump-9_(girls_frontline)],
profile_url: "https://www.pixiv.net/users/22528152"
)
end
end
should "Parse yande.re URLs correctly" do
@@ -117,6 +138,8 @@ module Sources
assert(Source::URL.image_url?("https://ayase.yande.re/image/2d0d229fd8465a325ee7686fcc7f75d2/yande.re%20192481%20animal_ears%20bunny_ears%20garter_belt%20headphones%20mitha%20stockings%20thighhighs.jpg"))
assert(Source::URL.image_url?("https://yuno.yande.re/image/1764b95ae99e1562854791c232e3444b/yande.re%20281544%20cameltoe%20erect_nipples%20fundoshi%20horns%20loli%20miyama-zero%20sarashi%20sling_bikini%20swimsuits.jpg"))
assert(Source::URL.page_url?("https://yande.re/post/show?md5=2c95b8975b73744da2bcbed9619c1d59"))
assert(Source::URL.page_url?("https://yande.re/post?tags=md5:2c95b8975b73744da2bcbed9619c1d59"))
assert(Source::URL.page_url?("https://yande.re/post/show/3"))
end
@@ -129,6 +152,8 @@ module Sources
assert(Source::URL.image_url?("https://konachan.com/sample/e2e2994bae738ff52fff7f4f50b069d5/Konachan.com%20-%20270803%20sample.jpg"))
assert(Source::URL.image_url?("https://konachan.com/jpeg/e2e2994bae738ff52fff7f4f50b069d5/Konachan.com%20-%20270803%20banishment%20bicycle%20grass%20group%20male%20night%20original%20rooftop%20scenic%20signed%20stars%20tree.jpg"))
assert(Source::URL.page_url?("https://konachan.com/post/show?md5=955aa45f3b452b415509b47dcc9475ac"))
assert(Source::URL.page_url?("https://konachan.com/post?tags=md5:955aa45f3b452b415509b47dcc9475ac"))
assert(Source::URL.page_url?("https://konachan.com/post/show/270803/banishment-bicycle-grass-group-male-night-original"))
end
end