From 2c36e0281033f450e85c1d7f9cf6e252ef485a62 Mon Sep 17 00:00:00 2001 From: evazion Date: Tue, 23 Aug 2022 21:26:47 -0500 Subject: [PATCH] foundation.app: fix scraping of image urls. Foundation changed their HTML page format and we can no longer scrape the image URL directly from the page. Instead we have to build it based on API data. --- app/logical/source/extractor/foundation.rb | 20 ++++++++++++++++++-- test/test_helpers/source_test_helper.rb | 2 +- test/unit/sources/foundation_test.rb | 11 +++++++++++ 3 files changed, 30 insertions(+), 3 deletions(-) diff --git a/app/logical/source/extractor/foundation.rb b/app/logical/source/extractor/foundation.rb index d09ba156a..69bf2e78f 100644 --- a/app/logical/source/extractor/foundation.rb +++ b/app/logical/source/extractor/foundation.rb @@ -11,8 +11,24 @@ module Source def image_urls if parsed_url.full_image_url.present? [parsed_url.full_image_url] - elsif (image_url = page&.at(".fullscreen img, .fullscreen video")&.attr(:src)) - [Source::URL.parse(image_url).full_image_url].compact + elsif api_response.dig("props", "pageProps", "artwork").present? + artwork = api_response.dig("props", "pageProps", "artwork") + asset_id = artwork["assetId"] + + # Reverse engineered from the Foundation.app Javascript; look for buildVideoUrl in utils/assets.ts. + if artwork["mimeType"].starts_with?("video/") + if artwork["assetVersion"] == 5 + url = "#{artwork["assetScheme"]}#{artwork["assetHost"]}#{artwork["assetPath"]}/nft.mp4" + elsif artwork["assetVersion"] == 3 + url = "https://assets.foundation.app/#{asset_id[-4..-3]}/#{asset_id[-2..-1]}/#{asset_id}/nft_q4.mp4" + else + url = "https://assets.foundation.app/#{asset_id[-4..-3]}/#{asset_id[-2..-1]}/#{asset_id}/nft.mp4" + end + else + url = "#{artwork["assetScheme"]}#{artwork["assetHost"]}/#{artwork["assetPath"]}" + end + + [Source::URL.parse(url).full_image_url].compact else [] end diff --git a/test/test_helpers/source_test_helper.rb b/test/test_helpers/source_test_helper.rb index c4bb83d88..f1ed8521d 100644 --- a/test/test_helpers/source_test_helper.rb +++ b/test/test_helpers/source_test_helper.rb @@ -96,7 +96,7 @@ module SourceTestHelper actual_values = actual_value.sort expected_value.sort.each_with_index { |each_value, index| assert_match(each_value, actual_values[index]) } else - assert_equal(expected_value.sort, actual_value) + assert_equal(expected_value.sort, actual_value.sort) end elsif expected_value.nil? assert_nil(actual_value) diff --git a/test/unit/sources/foundation_test.rb b/test/unit/sources/foundation_test.rb index 792a689ec..2787a2df0 100644 --- a/test/unit/sources/foundation_test.rb +++ b/test/unit/sources/foundation_test.rb @@ -23,6 +23,17 @@ module Sources ) end + context "A foundation gif" do + strategy_should_work( + "https://foundation.app/@patch_oxxo/shine/1", + artist_name: "patch_oxxo", + profile_url: "https://foundation.app/@patch_oxxo", + image_urls: ["https://f8n-ipfs-production.imgix.net/QmWQpt9opcue5F7Q2vTT5P5jPYo1xHhEs7RDxgXzWFHcfK/nft.gif"], + download_size: 52_352_138, + tags: %w[2d animation], + ) + end + context "A foundation.app/@username/foo-bar-1234 URL" do strategy_should_work( "https://foundation.app/@asuka111art/dinner-with-cats-82426",