tumblr: fix exception when fetching data for video urls.
Fix an exception when trying to fetch source data for URLs like https://va.media.tumblr.com/tumblr_pgohk0TjhS1u7mrsl.mp4. For these URLs it's not possible to use the trick where we try to open the URL as a HTML page and scrape the post id from the HTML. Instead we get the raw video if we try to to this.
This commit is contained in:
@@ -101,13 +101,16 @@ class Source::Extractor
|
||||
end
|
||||
|
||||
def post_url_from_image_html
|
||||
return nil unless parsed_url.image_url? && parsed_url.file_ext&.in?(%w[jpg png pnj gif])
|
||||
|
||||
extracted = image_url_html(parsed_url)&.at("[href*='/post/']")&.[](:href)
|
||||
Source::URL.parse(extracted)
|
||||
end
|
||||
memoize :post_url_from_image_html
|
||||
|
||||
def image_url_html(image_url)
|
||||
resp = http.cache(1.minute).headers(accept: "text/html").get(image_url)
|
||||
return nil if resp.code != 200
|
||||
return nil if resp.code != 200 || resp.mime_type != "text/html"
|
||||
resp.parse
|
||||
end
|
||||
|
||||
|
||||
@@ -24,6 +24,7 @@ class Source::URL::Tumblr < Source::URL
|
||||
# https://68.media.tumblr.com/ee02048f5578595badc95905e17154b4/tumblr_inline_ofbr4452601sk4jd9_250.gif
|
||||
# https://media.tumblr.com/ee02048f5578595badc95905e17154b4/tumblr_inline_ofbr4452601sk4jd9_500.gif
|
||||
# https://66.media.tumblr.com/b9395771b2d0435fe4efee926a5a7d9c/tumblr_pg2wu1L9DM1trd056o2_500h.png
|
||||
# https://media.tumblr.com/701a535af224f89684d2cfcc097575ef/tumblr_pjsx70RakC1y0gqjko1_1280.pnj
|
||||
in _, directory, file if image_url?
|
||||
@directory = directory
|
||||
parse_filename
|
||||
@@ -87,6 +88,7 @@ class Source::URL::Tumblr < Source::URL
|
||||
# https://68.media.tumblr.com/ee02048f5578595badc95905e17154b4/tumblr_inline_ofbr4452601sk4jd9_250.gif
|
||||
# https://media.tumblr.com/ee02048f5578595badc95905e17154b4/tumblr_inline_ofbr4452601sk4jd9_500.gif
|
||||
# https://66.media.tumblr.com/b9395771b2d0435fe4efee926a5a7d9c/tumblr_pg2wu1L9DM1trd056o2_500h.png
|
||||
# https://media.tumblr.com/701a535af224f89684d2cfcc097575ef/tumblr_pjsx70RakC1y0gqjko1_1280.pnj
|
||||
# https://25.media.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_500.png
|
||||
# https://media.tumblr.com/0DNBGJovY5j3smfeQs8nB53z_500.jpg
|
||||
in *words, /\A\d+h?|raw\z/ => size
|
||||
|
||||
@@ -245,6 +245,26 @@ module Sources
|
||||
)
|
||||
end
|
||||
|
||||
context "A *.media.tumblr.com/tumblr_$id_$size.pnj URL" do
|
||||
strategy_should_work(
|
||||
"https://media.tumblr.com/701a535af224f89684d2cfcc097575ef/tumblr_pjsx70RakC1y0gqjko1_1280.pnj",
|
||||
image_urls: ["https://media.tumblr.com/701a535af224f89684d2cfcc097575ef/tumblr_pjsx70RakC1y0gqjko1_1280.pnj"],
|
||||
page_url: nil,
|
||||
artist_name: nil,
|
||||
download_size: 296_595,
|
||||
)
|
||||
end
|
||||
|
||||
context "A va.media.tumblr.com/tumblr_$id.mp4 URL" do
|
||||
strategy_should_work(
|
||||
"https://va.media.tumblr.com/tumblr_pgohk0TjhS1u7mrsl.mp4",
|
||||
image_urls: ["https://va.media.tumblr.com/tumblr_pgohk0TjhS1u7mrsl.mp4"],
|
||||
page_url: nil,
|
||||
artist_name: nil,
|
||||
download_size: 7_960_082,
|
||||
)
|
||||
end
|
||||
|
||||
context "generating page urls" do
|
||||
should "work" do
|
||||
source1 = "https://octrain1020.tumblr.com/post/190713122589"
|
||||
@@ -272,6 +292,7 @@ module Sources
|
||||
assert(Source::URL.image_url?("https://68.media.tumblr.com/ee02048f5578595badc95905e17154b4/tumblr_inline_ofbr4452601sk4jd9_250.gif"))
|
||||
assert(Source::URL.image_url?("https://media.tumblr.com/ee02048f5578595badc95905e17154b4/tumblr_inline_ofbr4452601sk4jd9_500.gif"))
|
||||
assert(Source::URL.image_url?("https://66.media.tumblr.com/b9395771b2d0435fe4efee926a5a7d9c/tumblr_pg2wu1L9DM1trd056o2_500h.png"))
|
||||
assert(Source::URL.image_url?("https://media.tumblr.com/701a535af224f89684d2cfcc097575ef/tumblr_pjsx70RakC1y0gqjko1_1280.pnj"))
|
||||
|
||||
assert(Source::URL.image_url?("https://25.media.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_500.png"))
|
||||
assert(Source::URL.image_url?("https://media.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_1280.png"))
|
||||
|
||||
Reference in New Issue
Block a user