From 452ce8d165e4d75c43d7cdca69fef99dfe36f194 Mon Sep 17 00:00:00 2001 From: evazion Date: Mon, 21 Mar 2022 16:48:43 -0500 Subject: [PATCH] artstation: add partial support for video clips (#5063). Add partial support for fetching videos from ArtStation posts that contain videos. Most of this code is disabled for now because actually downloading these videos requires bypassing a Cloudflare captcha. --- app/logical/source/url/art_station.rb | 10 ++++-- app/logical/sources/strategies/art_station.rb | 31 +++++++++++++------ test/unit/sources/art_station_test.rb | 24 ++++++++++++++ 3 files changed, 54 insertions(+), 11 deletions(-) diff --git a/app/logical/source/url/art_station.rb b/app/logical/source/url/art_station.rb index 296f062eb..c67134538 100644 --- a/app/logical/source/url/art_station.rb +++ b/app/logical/source/url/art_station.rb @@ -26,6 +26,10 @@ class Source::URL::ArtStation < Source::URL @file = file @timestamp = query if query&.match?(/^\d+$/) + # https://cdn-animation.artstation.com/p/video_sources/000/466/622/workout.mp4 + in "cdn-animation.artstation.com", "p", "video_sources", *subdirs, file + # pass + # https://www.artstation.com/artwork/04XA4 # https://www.artstation.com/artwork/cody-from-sf (old; redirects to https://www.artstation.com/artwork/3JJA) # https://sa-dui.artstation.com/projects/DVERn @@ -66,10 +70,12 @@ class Source::URL::ArtStation < Source::URL def full_image_url(size = "original") return nil unless image_url? - if @timestamp.present? + if @asset_type.present? && @asset_subdir.present? && @file.present? && @timestamp.present? "https://cdn.artstation.com/p/assets/#{@asset_type}/images/#{@asset_subdir}/#{size}/#{@file}?#{@timestamp}" - else + elsif @asset_type.present? && @asset_subdir.present? && @file.present? "https://cdn.artstation.com/p/assets/#{@asset_type}/images/#{@asset_subdir}/#{size}/#{@file}" + else + to_s end end diff --git a/app/logical/sources/strategies/art_station.rb b/app/logical/sources/strategies/art_station.rb index 4ff5f165e..bdbb99e83 100644 --- a/app/logical/sources/strategies/art_station.rb +++ b/app/logical/sources/strategies/art_station.rb @@ -8,7 +8,11 @@ module Sources::Strategies end def image_urls - @image_urls ||= image_urls_sub.map { |asset| asset_url(asset, :largest) } + if parsed_url.image_url? + [asset_url(url)] + else + image_urls_from_api + end end def page_url @@ -58,12 +62,22 @@ module Sources::Strategies end end - def image_urls_sub - if parsed_url.image_url? - [url] - else - api_response[:assets].to_a.select { |asset| asset[:asset_type] == "image" }.pluck(:image_url) - end + def image_urls_from_api + api_response[:assets].to_a.map do |asset| + if asset[:asset_type] == "image" + asset_url(asset[:image_url]) + elsif asset[:asset_type] == "video_clip" + next # XXX Skip for now; actually downloading these videos requires bypassing a Cloudflare captcha. + + url = Nokogiri::HTML5.parse(asset[:player_embedded]).at("iframe").attr("src") + next if url.nil? + + response = http.cache(1.minute).get(url) + next if response.status != 200 + + response.parse.at("video source").attr("src") + end + end.compact end def artist_name_from_url @@ -84,12 +98,11 @@ module Sources::Strategies end memoize :api_response - def asset_url(url, size) + def asset_url(url) parsed_url = Source::URL.parse(url) image_sizes = %w[original 4k large medium small] urls = image_sizes.map { |size| parsed_url.full_image_url(size) } - urls = urls.reverse if size == :smallest chosen_url = urls.find { |url| http_exists?(url) } chosen_url || url diff --git a/test/unit/sources/art_station_test.rb b/test/unit/sources/art_station_test.rb index 97b796928..aaf544874 100644 --- a/test/unit/sources/art_station_test.rb +++ b/test/unit/sources/art_station_test.rb @@ -146,6 +146,30 @@ module Sources end end + context "A work that includes video clips" do + should_eventually "include the video clips in the image urls" do + @source = Sources::Strategies.find("https://www.artstation.com/artwork/0nP1e8") + + assert_equal(%w[ + https://cdn.artstation.com/p/assets/images/images/040/979/418/original/yusuf-umar-workout-10mb.gif?1630425406 + https://cdn.artstation.com/p/assets/images/images/040/979/435/4k/yusuf-umar-1.jpg?1630425420 + https://cdn.artstation.com/p/assets/images/images/040/979/470/4k/yusuf-umar-2.jpg?1630425483 + https://cdn.artstation.com/p/assets/images/images/040/979/494/4k/yusuf-umar-3.jpg?1630425530 + https://cdn.artstation.com/p/assets/images/images/040/979/503/4k/yusuf-umar-4.jpg?1630425547 + https://cdn.artstation.com/p/assets/images/images/040/979/659/4k/yusuf-umar-5.jpg?1630425795 + https://cdn.artstation.com/p/assets/images/images/040/980/932/4k/yusuf-umar-tpose.jpg?1630427748 + https://cdn-animation.artstation.com/p/video_sources/000/466/622/workout.mp4 + https://cdn-animation.artstation.com/p/video_sources/000/466/623/workout-clay.mp4 + ], @source.image_urls) + end + + should "work for the video itself" do + @source = Sources::Strategies.find("https://cdn-animation.artstation.com/p/video_sources/000/466/622/workout.mp4") + + assert_equal(["https://cdn-animation.artstation.com/p/video_sources/000/466/622/workout.mp4"], @source.image_urls) + end + end + context "A work that has been deleted" do should "work" do url = "https://fiship.artstation.com/projects/x8n8XT"