tests: update tumblr tests

This commit is contained in:
nonamethanks
2022-09-15 09:48:28 +02:00
parent e2a3265daf
commit 425a905b83

View File

@@ -7,223 +7,134 @@ module Sources
end end
context "The source for a 'http://*.tumblr.com/post/*' photo post with a single image" do context "The source for a 'http://*.tumblr.com/post/*' photo post with a single image" do
setup do commentary_desc = <<~EOS.chomp
@site = Source::Extractor.find("https://noizave.tumblr.com/post/162206271767") <h2>header</h2>
end
should "get the artist name" do <hr><p>plain <b>bold</b> <i>italics</i> <strike>strike</strike></p>
assert_equal("noizave", @site.artist_name)
end
should "get the profile" do <!-- more -->
assert_equal("https://noizave.tumblr.com", @site.profile_url)
end
should "get the tags" do <ol><li>one</li>
tags = ["tag", "red hair", "red-hair", "red_hair"] <li>two</li>
assert_equal(tags, @site.tags.map(&:first)) </ol><ul><li>one</li>
assert_equal(["red_hair", "tag"], @site.normalized_tags) <ul><li>two</li>
end </ul></ul><blockquote><p>quote</p></blockquote>
should "get the commentary" do <p><a href=\"http://www.google.com\">link</a></p>
desc = <<~EOS.chomp EOS
<h2>header</h2>
<hr><p>plain <b>bold</b> <i>italics</i> <strike>strike</strike></p> commentary_desc_dtext = <<~EOS.chomp
h2. header
<!-- more --> plain [b]bold[/b] [i]italics[/i] [s]strike[/s]
<ol><li>one</li> * one
<li>two</li> * two
</ol><ul><li>one</li>
<ul><li>two</li>
</ul></ul><blockquote><p>quote</p></blockquote>
<p><a href=\"http://www.google.com\">link</a></p> * one
EOS * two
assert_nil(@site.artist_commentary_title) [quote]quote[/quote]
assert_equal(desc, @site.artist_commentary_desc)
end
should "get the dtext-ified commentary" do "link":[http://www.google.com]
desc = <<~EOS.chomp EOS
h2. header
plain [b]bold[/b] [i]italics[/i] [s]strike[/s] strategy_should_work(
"https://noizave.tumblr.com/post/162206271767",
* one image_urls: ["https://media.tumblr.com/3bbfcbf075ddf969c996641b264086fd/tumblr_os2buiIOt51wsfqepo1_1280.png"],
* two page_url: "https://noizave.tumblr.com/post/162206271767",
artist_name: "noizave",
* one profile_url: "https://noizave.tumblr.com",
* two tags: ["tag", "red hair", "red-hair", "red_hair"],
normalized_tags: ["red_hair", "tag"],
[quote]quote[/quote] artist_commentary_title: nil,
artist_commentary_desc: commentary_desc,
"link":[http://www.google.com] dtext_artist_commentary_desc: commentary_desc_dtext
EOS )
assert_equal(desc, @site.dtext_artist_commentary_desc)
end
should "get the image url" do
assert_equal(["https://media.tumblr.com/3bbfcbf075ddf969c996641b264086fd/tumblr_os2buiIOt51wsfqepo1_1280.png"], @site.image_urls)
end
should "get the page url" do
assert_equal("https://noizave.tumblr.com/post/162206271767", @site.page_url)
end
should "get the artist" do
CurrentUser.user = FactoryBot.create(:user)
CurrentUser.ip_addr = "127.0.0.1"
@artist = FactoryBot.create(:artist, name: "noizave", url_string: "https://noizave.tumblr.com/")
assert_equal([@artist], @site.artists)
end
end end
context "The source for a 'http://*.tumblr.com/image/*' image page" do context "The source for a 'http://*.tumblr.com/image/*' image page" do
setup do strategy_should_work(
@site = Source::Extractor.find("https://noizave.tumblr.com/image/162206271767") "https://noizave.tumblr.com/image/162206271767",
end image_urls: ["https://media.tumblr.com/3bbfcbf075ddf969c996641b264086fd/tumblr_os2buiIOt51wsfqepo1_1280.png"],
page_url: "https://noizave.tumblr.com/post/162206271767",
should "get the image url" do tags: ["tag", "red hair", "red-hair", "red_hair"],
assert_equal(["https://media.tumblr.com/3bbfcbf075ddf969c996641b264086fd/tumblr_os2buiIOt51wsfqepo1_1280.png"], @site.image_urls) normalized_tags: ["red_hair", "tag"]
end )
should "get the page url" do
assert_equal("https://noizave.tumblr.com/post/162206271767", @site.page_url)
end
should "get the tags" do
tags = ["tag", "red hair", "red-hair", "red_hair"]
assert_equal(tags, @site.tags.map(&:first))
assert_equal(["red_hair", "tag"], @site.normalized_tags)
end
end end
context "The source for a 'http://*.media.tumblr.com/$hash/tumblr_$id_540.jpg' image" do context "The source for a 'http://*.media.tumblr.com/$hash/tumblr_$id_540.jpg' image" do
setup do strategy_should_work(
@url = "https://78.media.tumblr.com/7c4d2c6843466f92c3dd0516e749ec35/tumblr_orwwptNBCE1wsfqepo2_540.jpg" "https://78.media.tumblr.com/7c4d2c6843466f92c3dd0516e749ec35/tumblr_orwwptNBCE1wsfqepo2_540.jpg",
@ref = "https://noizave.tumblr.com/post/162094447052" image_urls: ["https://media.tumblr.com/7c4d2c6843466f92c3dd0516e749ec35/tumblr_orwwptNBCE1wsfqepo2_1280.jpg"],
end page_url: "https://noizave.tumblr.com/post/162094447052",
artist_name: "noizave",
context "with a referer" do profile_url: "https://noizave.tumblr.com",
should "get all the metadata" do tags: ["tag1", "tag2"]
site = Source::Extractor.find(@url, @ref) )
assert_equal("noizave", site.artist_name)
assert_equal("https://noizave.tumblr.com", site.profile_url)
assert_equal(["tag1", "tag2"], site.tags.map(&:first))
assert_equal(@ref, site.page_url)
assert_equal(["https://media.tumblr.com/7c4d2c6843466f92c3dd0516e749ec35/tumblr_orwwptNBCE1wsfqepo2_1280.jpg"], site.image_urls)
end
end
context "without a referer" do
should "still find all the relevant information" do
site = Source::Extractor.find(@url)
assert_equal("noizave", site.artist_name)
assert_equal("https://noizave.tumblr.com", site.profile_url)
assert_equal(["tag1", "tag2"], site.tags.map(&:first))
assert_equal(@ref, site.page_url)
assert_equal(["https://media.tumblr.com/7c4d2c6843466f92c3dd0516e749ec35/tumblr_orwwptNBCE1wsfqepo2_1280.jpg"], site.image_urls)
end
end
end end
context "The source for a 'http://*.tumblr.com/post/*' text post with inline images" do context "The source for a 'http://*.tumblr.com/post/*' text post with inline images" do
setup do strategy_should_work(
@site = Source::Extractor.find("https://noizave.tumblr.com/post/162221502947") "https://noizave.tumblr.com/post/162221502947",
end image_urls: %w[
should "get the image urls" do
urls = %w[
https://media.tumblr.com/afed9f5b3c33c39dc8c967e262955de2/tumblr_inline_os2zhkfhY01v11u29_1280.png https://media.tumblr.com/afed9f5b3c33c39dc8c967e262955de2/tumblr_inline_os2zhkfhY01v11u29_1280.png
https://media.tumblr.com/7c4d2c6843466f92c3dd0516e749ec35/tumblr_inline_os2zkg02xH1v11u29_1280.jpg https://media.tumblr.com/7c4d2c6843466f92c3dd0516e749ec35/tumblr_inline_os2zkg02xH1v11u29_1280.jpg
] ],
dtext_artist_commentary_title: "test post",
assert_equal(urls.sort, @site.image_urls.sort) artist_commentary_desc: %r{<p>description</p><figure class="tmblr-full" data-orig-height="3000" data-orig-width="3000"><img src="https://\d+.media.tumblr.com/afed9f5b3c33c39dc8c967e262955de2/tumblr_inline_os2zhkfhY01v11u29_540.png" data-orig-height="3000" data-orig-width="3000"/></figure><figure class="tmblr-full" data-orig-height="3000" data-orig-width="3000"><img src="https://\d+.media.tumblr.com/7c4d2c6843466f92c3dd0516e749ec35/tumblr_inline_os2zkg02xH1v11u29_540.jpg" data-orig-height="3000" data-orig-width="3000"/></figure>}
end )
should "get the commentary" do
desc = %r{<p>description</p><figure class="tmblr-full" data-orig-height="3000" data-orig-width="3000"><img src="https://\d+.media.tumblr.com/afed9f5b3c33c39dc8c967e262955de2/tumblr_inline_os2zhkfhY01v11u29_540.png" data-orig-height="3000" data-orig-width="3000"/></figure><figure class="tmblr-full" data-orig-height="3000" data-orig-width="3000"><img src="https://\d+.media.tumblr.com/7c4d2c6843466f92c3dd0516e749ec35/tumblr_inline_os2zkg02xH1v11u29_540.jpg" data-orig-height="3000" data-orig-width="3000"/></figure>}
assert_equal("test post", @site.artist_commentary_title)
assert_match(desc, @site.artist_commentary_desc)
end
end end
context "A video post with inline images" do context "A video post with inline images" do
should "get the video and inline images" do strategy_should_work(
url = "https://noizave.tumblr.com/post/162222617101" "https://noizave.tumblr.com/post/162222617101",
site = Source::Extractor.find(url) image_urls: %w[
urls = %w[
https://va.media.tumblr.com/tumblr_os31dkexhK1wsfqep.mp4 https://va.media.tumblr.com/tumblr_os31dkexhK1wsfqep.mp4
https://media.tumblr.com/afed9f5b3c33c39dc8c967e262955de2/tumblr_inline_os31dclyCR1v11u29_1280.png https://media.tumblr.com/afed9f5b3c33c39dc8c967e262955de2/tumblr_inline_os31dclyCR1v11u29_1280.png
] ]
)
assert_equal(urls, site.image_urls)
assert_equal(url, site.page_url)
end
end end
context "The source for a 'http://*.tumblr.com/post/*' answer post with inline images" do context "The source for a 'http://*.tumblr.com/post/*' answer post with inline images" do
setup do strategy_should_work(
@site = Source::Extractor.find("https://noizave.tumblr.com/post/171237880542/test-ask") "https://noizave.tumblr.com/post/171237880542/test-ask",
end image_urls: ["https://media.tumblr.com/cb481f031010e8ddad564b2150149c9a/tumblr_inline_p4nxoyLrSh1v11u29_1280.png"],
artist_commentary_title: "Anonymous asked: test ask",
should "get the image urls" do dtext_artist_commentary_desc: "test answer",
urls = ["https://media.tumblr.com/cb481f031010e8ddad564b2150149c9a/tumblr_inline_p4nxoyLrSh1v11u29_1280.png"] page_url: "https://noizave.tumblr.com/post/171237880542"
assert_equal(urls, @site.image_urls) )
end
should "get the commentary" do
assert_equal("Anonymous asked: test ask", @site.artist_commentary_title)
assert_match("test answer", @site.artist_commentary_desc)
end
should "get the page url" do
assert_equal("https://noizave.tumblr.com/post/171237880542", @site.page_url)
end
end end
context "A Tumblr post with new image URLs" do context "A Tumblr post with new image URLs" do
should "return the correct image url" do strategy_should_work(
image_url = "https://64.media.tumblr.com/3dfdab77d913ad1ea59f22407d6ac6f3/b1764aa0f9c378d0-23/s1280x1920/46f4af7ec94456f8fef380ee6311eb81178ce7e9.jpg" "https://64.media.tumblr.com/3dfdab77d913ad1ea59f22407d6ac6f3/b1764aa0f9c378d0-23/s1280x1920/46f4af7ec94456f8fef380ee6311eb81178ce7e9.jpg",
page_url = "https://make-do5.tumblr.com/post/619663949657423872" referer: "https://make-do5.tumblr.com/post/619663949657423872",
strategy = Source::Extractor.find(image_url, page_url) page_url: "https://make-do5.tumblr.com/post/619663949657423872",
image_urls: [%r{/3dfdab77d913ad1ea59f22407d6ac6f3/b1764aa0f9c378d0-23/s\d+x\d+/}i],
assert_match(%r{/3dfdab77d913ad1ea59f22407d6ac6f3/b1764aa0f9c378d0-23/s\d+x\d+/}i, image_url) download_size: 7_428_704
assert_equal(page_url, strategy.page_url) )
assert_downloaded(7_428_704, strategy.image_urls.sole)
end
end end
context "A deleted tumblr post" do context "A deleted tumblr post" do
should "extract the info from the url" do strategy_should_work(
site = Source::Extractor.find("http://shimetsukage.tumblr.com/post/176805588268/20180809-ssb-coolboy") "http://shimetsukage.tumblr.com/post/176805588268/20180809-ssb-coolboy",
deleted: true,
assert_nothing_raised { site.to_h } artist_name: "shimetsukage",
assert_equal("shimetsukage", site.artist_name) profile_url: "https://shimetsukage.tumblr.com",
assert_equal("https://shimetsukage.tumblr.com", site.profile_url) page_url: "https://shimetsukage.tumblr.com/post/176805588268",
assert_equal("https://shimetsukage.tumblr.com/post/176805588268", site.page_url) image_urls: [],
assert_equal([], site.image_urls) tags: []
assert_equal([], site.tags) )
end
end end
context "A download for a 'http://*.media.tumblr.com/$hash/tumblr_$id_$size.png' image" do %w[100 250 400 500 500h 540 640 1280].each do |size|
should "find the largest image" do context "A download for a 'http://*.media.tumblr.com/$hash/tumblr_$id_#{size}.png' image" do
%w[100 250 400 500 500h 540 640 1280].each do |size| strategy_should_work(
page = "https://natsuki-teru.tumblr.com/post/178728919271" "https://66.media.tumblr.com/b9395771b2d0435fe4efee926a5a7d9c/tumblr_pg2wu1L9DM1trd056o2_#{size}.png",
image = "https://66.media.tumblr.com/b9395771b2d0435fe4efee926a5a7d9c/tumblr_pg2wu1L9DM1trd056o2_#{size}.png" referer: "https://natsuki-teru.tumblr.com/post/178728919271",
full = "https://media.tumblr.com/b9395771b2d0435fe4efee926a5a7d9c/tumblr_pg2wu1L9DM1trd056o2_1280.png" image_urls: ["https://media.tumblr.com/b9395771b2d0435fe4efee926a5a7d9c/tumblr_pg2wu1L9DM1trd056o2_1280.png"]
site = Source::Extractor.find(image, page) )
assert_equal([full], site.image_urls)
end
end end
end end
@@ -232,7 +143,7 @@ module Sources
"https://64.media.tumblr.com/3bbfcbf075ddf969c996641b264086fd/tumblr_os2buiIOt51wsfqepo1_400.png", "https://64.media.tumblr.com/3bbfcbf075ddf969c996641b264086fd/tumblr_os2buiIOt51wsfqepo1_400.png",
referer: "https://noizave.tumblr.com/post/162206271767", referer: "https://noizave.tumblr.com/post/162206271767",
image_urls: ["https://media.tumblr.com/3bbfcbf075ddf969c996641b264086fd/tumblr_os2buiIOt51wsfqepo1_1280.png"], image_urls: ["https://media.tumblr.com/3bbfcbf075ddf969c996641b264086fd/tumblr_os2buiIOt51wsfqepo1_1280.png"],
download_size: 3655, download_size: 3655
) )
end end
@@ -241,7 +152,7 @@ module Sources
"http://media.tumblr.com/tumblr_m24kbxqKAX1rszquso1_250.jpg", "http://media.tumblr.com/tumblr_m24kbxqKAX1rszquso1_250.jpg",
referer: "https://noizave.tumblr.com/post/162206271767", referer: "https://noizave.tumblr.com/post/162206271767",
image_urls: ["https://media.tumblr.com/tumblr_m24kbxqKAX1rszquso1_1280.jpg"], image_urls: ["https://media.tumblr.com/tumblr_m24kbxqKAX1rszquso1_1280.jpg"],
download_size: 105_963, download_size: 105_963
) )
end end
@@ -251,7 +162,7 @@ module Sources
image_urls: ["https://media.tumblr.com/701a535af224f89684d2cfcc097575ef/tumblr_pjsx70RakC1y0gqjko1_1280.pnj"], image_urls: ["https://media.tumblr.com/701a535af224f89684d2cfcc097575ef/tumblr_pjsx70RakC1y0gqjko1_1280.pnj"],
page_url: nil, page_url: nil,
artist_name: nil, artist_name: nil,
download_size: 296_595, download_size: 296_595
) )
end end
@@ -261,7 +172,7 @@ module Sources
image_urls: ["https://va.media.tumblr.com/tumblr_pgohk0TjhS1u7mrsl.mp4"], image_urls: ["https://va.media.tumblr.com/tumblr_pgohk0TjhS1u7mrsl.mp4"],
page_url: nil, page_url: nil,
artist_name: nil, artist_name: nil,
download_size: 7_960_082, download_size: 7_960_082
) )
end end
@@ -281,9 +192,9 @@ module Sources
end end
should "parse Tumblr URLs correctly" do should "parse Tumblr URLs correctly" do
refute(Source::URL.image_url?("https://tumblr.com")) assert_not(Source::URL.image_url?("https://tumblr.com"))
refute(Source::URL.image_url?("https://www.tumblr.com")) assert_not(Source::URL.image_url?("https://www.tumblr.com"))
refute(Source::URL.image_url?("https://yogurtmedia.tumblr.com/post/45732863347")) assert_not(Source::URL.image_url?("https://yogurtmedia.tumblr.com/post/45732863347"))
assert(Source::URL.image_url?("http://data.tumblr.com/07e7bba538046b2b586433976290ee1f/tumblr_o3gg44HcOg1r9pi29o1_raw.jpg")) assert(Source::URL.image_url?("http://data.tumblr.com/07e7bba538046b2b586433976290ee1f/tumblr_o3gg44HcOg1r9pi29o1_raw.jpg"))
assert(Source::URL.image_url?("https://40.media.tumblr.com/de018501416a465d898d24ad81d76358/tumblr_nfxt7voWDX1rsd4umo1_r23_1280.jpg")) assert(Source::URL.image_url?("https://40.media.tumblr.com/de018501416a465d898d24ad81d76358/tumblr_nfxt7voWDX1rsd4umo1_r23_1280.jpg"))
@@ -304,8 +215,8 @@ module Sources
assert(Source::URL.image_url?("https://66.media.tumblr.com/5a2c3fe25c977e2281392752ab971c90/3dbfaec9b9e0c2e3-92/s500x750/4f92bbaaf95c0b4e7970e62b1d2e1415859dd659.png")) assert(Source::URL.image_url?("https://66.media.tumblr.com/5a2c3fe25c977e2281392752ab971c90/3dbfaec9b9e0c2e3-92/s500x750/4f92bbaaf95c0b4e7970e62b1d2e1415859dd659.png"))
assert(Source::URL.image_url?("https://25.media.tumblr.com/91719d337b218681abc48cdc24e")) assert(Source::URL.image_url?("https://25.media.tumblr.com/91719d337b218681abc48cdc24e"))
refute(Source::URL.page_url?("https://25.media.tumblr.com/91719d337b218681abc48cdc24e")) assert_not(Source::URL.page_url?("https://25.media.tumblr.com/91719d337b218681abc48cdc24e"))
refute(Source::URL.profile_url?("https://25.media.tumblr.com/91719d337b218681abc48cdc24e")) assert_not(Source::URL.profile_url?("https://25.media.tumblr.com/91719d337b218681abc48cdc24e"))
end end
end end
end end