artstation: add partial support for video clips (#5063).

Add partial support for fetching videos from ArtStation posts that
contain videos. Most of this code is disabled for now because actually
downloading these videos requires bypassing a Cloudflare captcha.
This commit is contained in:
evazion
2022-03-21 16:48:43 -05:00
parent 3fc01de19c
commit 452ce8d165
3 changed files with 54 additions and 11 deletions

View File

@@ -26,6 +26,10 @@ class Source::URL::ArtStation < Source::URL
@file = file
@timestamp = query if query&.match?(/^\d+$/)
# https://cdn-animation.artstation.com/p/video_sources/000/466/622/workout.mp4
in "cdn-animation.artstation.com", "p", "video_sources", *subdirs, file
# pass
# https://www.artstation.com/artwork/04XA4
# https://www.artstation.com/artwork/cody-from-sf (old; redirects to https://www.artstation.com/artwork/3JJA)
# https://sa-dui.artstation.com/projects/DVERn
@@ -66,10 +70,12 @@ class Source::URL::ArtStation < Source::URL
def full_image_url(size = "original")
return nil unless image_url?
if @timestamp.present?
if @asset_type.present? && @asset_subdir.present? && @file.present? && @timestamp.present?
"https://cdn.artstation.com/p/assets/#{@asset_type}/images/#{@asset_subdir}/#{size}/#{@file}?#{@timestamp}"
else
elsif @asset_type.present? && @asset_subdir.present? && @file.present?
"https://cdn.artstation.com/p/assets/#{@asset_type}/images/#{@asset_subdir}/#{size}/#{@file}"
else
to_s
end
end

View File

@@ -8,7 +8,11 @@ module Sources::Strategies
end
def image_urls
@image_urls ||= image_urls_sub.map { |asset| asset_url(asset, :largest) }
if parsed_url.image_url?
[asset_url(url)]
else
image_urls_from_api
end
end
def page_url
@@ -58,12 +62,22 @@ module Sources::Strategies
end
end
def image_urls_sub
if parsed_url.image_url?
[url]
else
api_response[:assets].to_a.select { |asset| asset[:asset_type] == "image" }.pluck(:image_url)
end
def image_urls_from_api
api_response[:assets].to_a.map do |asset|
if asset[:asset_type] == "image"
asset_url(asset[:image_url])
elsif asset[:asset_type] == "video_clip"
next # XXX Skip for now; actually downloading these videos requires bypassing a Cloudflare captcha.
url = Nokogiri::HTML5.parse(asset[:player_embedded]).at("iframe").attr("src")
next if url.nil?
response = http.cache(1.minute).get(url)
next if response.status != 200
response.parse.at("video source").attr("src")
end
end.compact
end
def artist_name_from_url
@@ -84,12 +98,11 @@ module Sources::Strategies
end
memoize :api_response
def asset_url(url, size)
def asset_url(url)
parsed_url = Source::URL.parse(url)
image_sizes = %w[original 4k large medium small]
urls = image_sizes.map { |size| parsed_url.full_image_url(size) }
urls = urls.reverse if size == :smallest
chosen_url = urls.find { |url| http_exists?(url) }
chosen_url || url

View File

@@ -146,6 +146,30 @@ module Sources
end
end
context "A work that includes video clips" do
should_eventually "include the video clips in the image urls" do
@source = Sources::Strategies.find("https://www.artstation.com/artwork/0nP1e8")
assert_equal(%w[
https://cdn.artstation.com/p/assets/images/images/040/979/418/original/yusuf-umar-workout-10mb.gif?1630425406
https://cdn.artstation.com/p/assets/images/images/040/979/435/4k/yusuf-umar-1.jpg?1630425420
https://cdn.artstation.com/p/assets/images/images/040/979/470/4k/yusuf-umar-2.jpg?1630425483
https://cdn.artstation.com/p/assets/images/images/040/979/494/4k/yusuf-umar-3.jpg?1630425530
https://cdn.artstation.com/p/assets/images/images/040/979/503/4k/yusuf-umar-4.jpg?1630425547
https://cdn.artstation.com/p/assets/images/images/040/979/659/4k/yusuf-umar-5.jpg?1630425795
https://cdn.artstation.com/p/assets/images/images/040/980/932/4k/yusuf-umar-tpose.jpg?1630427748
https://cdn-animation.artstation.com/p/video_sources/000/466/622/workout.mp4
https://cdn-animation.artstation.com/p/video_sources/000/466/623/workout-clay.mp4
], @source.image_urls)
end
should "work for the video itself" do
@source = Sources::Strategies.find("https://cdn-animation.artstation.com/p/video_sources/000/466/622/workout.mp4")
assert_equal(["https://cdn-animation.artstation.com/p/video_sources/000/466/622/workout.mp4"], @source.image_urls)
end
end
context "A work that has been deleted" do
should "work" do
url = "https://fiship.artstation.com/projects/x8n8XT"