artstation: add partial support for video clips (#5063).
Add partial support for fetching videos from ArtStation posts that contain videos. Most of this code is disabled for now because actually downloading these videos requires bypassing a Cloudflare captcha.
This commit is contained in:
@@ -26,6 +26,10 @@ class Source::URL::ArtStation < Source::URL
|
|||||||
@file = file
|
@file = file
|
||||||
@timestamp = query if query&.match?(/^\d+$/)
|
@timestamp = query if query&.match?(/^\d+$/)
|
||||||
|
|
||||||
|
# https://cdn-animation.artstation.com/p/video_sources/000/466/622/workout.mp4
|
||||||
|
in "cdn-animation.artstation.com", "p", "video_sources", *subdirs, file
|
||||||
|
# pass
|
||||||
|
|
||||||
# https://www.artstation.com/artwork/04XA4
|
# https://www.artstation.com/artwork/04XA4
|
||||||
# https://www.artstation.com/artwork/cody-from-sf (old; redirects to https://www.artstation.com/artwork/3JJA)
|
# https://www.artstation.com/artwork/cody-from-sf (old; redirects to https://www.artstation.com/artwork/3JJA)
|
||||||
# https://sa-dui.artstation.com/projects/DVERn
|
# https://sa-dui.artstation.com/projects/DVERn
|
||||||
@@ -66,10 +70,12 @@ class Source::URL::ArtStation < Source::URL
|
|||||||
def full_image_url(size = "original")
|
def full_image_url(size = "original")
|
||||||
return nil unless image_url?
|
return nil unless image_url?
|
||||||
|
|
||||||
if @timestamp.present?
|
if @asset_type.present? && @asset_subdir.present? && @file.present? && @timestamp.present?
|
||||||
"https://cdn.artstation.com/p/assets/#{@asset_type}/images/#{@asset_subdir}/#{size}/#{@file}?#{@timestamp}"
|
"https://cdn.artstation.com/p/assets/#{@asset_type}/images/#{@asset_subdir}/#{size}/#{@file}?#{@timestamp}"
|
||||||
else
|
elsif @asset_type.present? && @asset_subdir.present? && @file.present?
|
||||||
"https://cdn.artstation.com/p/assets/#{@asset_type}/images/#{@asset_subdir}/#{size}/#{@file}"
|
"https://cdn.artstation.com/p/assets/#{@asset_type}/images/#{@asset_subdir}/#{size}/#{@file}"
|
||||||
|
else
|
||||||
|
to_s
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|||||||
@@ -8,7 +8,11 @@ module Sources::Strategies
|
|||||||
end
|
end
|
||||||
|
|
||||||
def image_urls
|
def image_urls
|
||||||
@image_urls ||= image_urls_sub.map { |asset| asset_url(asset, :largest) }
|
if parsed_url.image_url?
|
||||||
|
[asset_url(url)]
|
||||||
|
else
|
||||||
|
image_urls_from_api
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def page_url
|
def page_url
|
||||||
@@ -58,12 +62,22 @@ module Sources::Strategies
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def image_urls_sub
|
def image_urls_from_api
|
||||||
if parsed_url.image_url?
|
api_response[:assets].to_a.map do |asset|
|
||||||
[url]
|
if asset[:asset_type] == "image"
|
||||||
else
|
asset_url(asset[:image_url])
|
||||||
api_response[:assets].to_a.select { |asset| asset[:asset_type] == "image" }.pluck(:image_url)
|
elsif asset[:asset_type] == "video_clip"
|
||||||
end
|
next # XXX Skip for now; actually downloading these videos requires bypassing a Cloudflare captcha.
|
||||||
|
|
||||||
|
url = Nokogiri::HTML5.parse(asset[:player_embedded]).at("iframe").attr("src")
|
||||||
|
next if url.nil?
|
||||||
|
|
||||||
|
response = http.cache(1.minute).get(url)
|
||||||
|
next if response.status != 200
|
||||||
|
|
||||||
|
response.parse.at("video source").attr("src")
|
||||||
|
end
|
||||||
|
end.compact
|
||||||
end
|
end
|
||||||
|
|
||||||
def artist_name_from_url
|
def artist_name_from_url
|
||||||
@@ -84,12 +98,11 @@ module Sources::Strategies
|
|||||||
end
|
end
|
||||||
memoize :api_response
|
memoize :api_response
|
||||||
|
|
||||||
def asset_url(url, size)
|
def asset_url(url)
|
||||||
parsed_url = Source::URL.parse(url)
|
parsed_url = Source::URL.parse(url)
|
||||||
|
|
||||||
image_sizes = %w[original 4k large medium small]
|
image_sizes = %w[original 4k large medium small]
|
||||||
urls = image_sizes.map { |size| parsed_url.full_image_url(size) }
|
urls = image_sizes.map { |size| parsed_url.full_image_url(size) }
|
||||||
urls = urls.reverse if size == :smallest
|
|
||||||
|
|
||||||
chosen_url = urls.find { |url| http_exists?(url) }
|
chosen_url = urls.find { |url| http_exists?(url) }
|
||||||
chosen_url || url
|
chosen_url || url
|
||||||
|
|||||||
@@ -146,6 +146,30 @@ module Sources
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
context "A work that includes video clips" do
|
||||||
|
should_eventually "include the video clips in the image urls" do
|
||||||
|
@source = Sources::Strategies.find("https://www.artstation.com/artwork/0nP1e8")
|
||||||
|
|
||||||
|
assert_equal(%w[
|
||||||
|
https://cdn.artstation.com/p/assets/images/images/040/979/418/original/yusuf-umar-workout-10mb.gif?1630425406
|
||||||
|
https://cdn.artstation.com/p/assets/images/images/040/979/435/4k/yusuf-umar-1.jpg?1630425420
|
||||||
|
https://cdn.artstation.com/p/assets/images/images/040/979/470/4k/yusuf-umar-2.jpg?1630425483
|
||||||
|
https://cdn.artstation.com/p/assets/images/images/040/979/494/4k/yusuf-umar-3.jpg?1630425530
|
||||||
|
https://cdn.artstation.com/p/assets/images/images/040/979/503/4k/yusuf-umar-4.jpg?1630425547
|
||||||
|
https://cdn.artstation.com/p/assets/images/images/040/979/659/4k/yusuf-umar-5.jpg?1630425795
|
||||||
|
https://cdn.artstation.com/p/assets/images/images/040/980/932/4k/yusuf-umar-tpose.jpg?1630427748
|
||||||
|
https://cdn-animation.artstation.com/p/video_sources/000/466/622/workout.mp4
|
||||||
|
https://cdn-animation.artstation.com/p/video_sources/000/466/623/workout-clay.mp4
|
||||||
|
], @source.image_urls)
|
||||||
|
end
|
||||||
|
|
||||||
|
should "work for the video itself" do
|
||||||
|
@source = Sources::Strategies.find("https://cdn-animation.artstation.com/p/video_sources/000/466/622/workout.mp4")
|
||||||
|
|
||||||
|
assert_equal(["https://cdn-animation.artstation.com/p/video_sources/000/466/622/workout.mp4"], @source.image_urls)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
context "A work that has been deleted" do
|
context "A work that has been deleted" do
|
||||||
should "work" do
|
should "work" do
|
||||||
url = "https://fiship.artstation.com/projects/x8n8XT"
|
url = "https://fiship.artstation.com/projects/x8n8XT"
|
||||||
|
|||||||
Reference in New Issue
Block a user