Fix #3996: Some direct artstation urls cause the bookmarklet to fail.

* Don't fail on urls that don't contain the project id (direct image urls).
* Don't fail when the work is deleted.
* Parse artist name from url when possible. This way the artist finder works on bad_artstation_id posts.
* Set canonical source url to `https://artist.artstation.com/projects/12345` instead of
  `https://www.artstation.com/artwork/1235` (this way we preserve the artist name).
* Cache api call.
* Include api call results in /source.json.
This commit is contained in:
evazion
2018-12-03 15:48:29 -06:00
parent 138a04ed8a
commit fd2d56a388
2 changed files with 102 additions and 85 deletions

View File

@@ -1,25 +1,30 @@
# Page URLs:
#
# * https://www.artstation.com/artwork/04XA4
# * https://www.artstation.com/artwork/cody-from-sf
# * https://sa-dui.artstation.com/projects/DVERn
#
# Profile URLs:
#
# * https://www.artstation.com/artist/sa-dui
# * https://www.artstation.com/sa-dui
# * https://sa-dui.artstation.com/
#
# Image URLs
#
# * https://cdna.artstation.com/p/assets/images/images/005/804/224/large/titapa-khemakavat-sa-dui-srevere.jpg?1493887236
# * https://cdnb.artstation.com/p/assets/images/images/014/410/217/smaller_square/bart-osz-bartosz1812041.jpg?1543866276
module Sources::Strategies module Sources::Strategies
class ArtStation < Base class ArtStation < Base
PROJECT = %r!\Ahttps?://[a-z0-9-]+\.artstation\.com/(?:artwork|projects)/(?<project_id>[a-z0-9-]+)/?\z!i PROJECT1 = %r!\Ahttps?://www\.artstation\.com/artwork/(?<project_id>[a-z0-9-]+)/?\z!i
PROJECT2 = %r!\Ahttps?://(?<artist_name>[a-z0-9-]+)\.artstation\.com/projects/(?<project_id>[a-z0-9-]+)/?\z!i
PROJECT = Regexp.union(PROJECT1, PROJECT2)
ASSET = %r!\Ahttps?://cdn\w*\.artstation\.com/p/assets/images/images/\d+/\d+/\d+/(?:medium|small|large)/!i ASSET = %r!\Ahttps?://cdn\w*\.artstation\.com/p/assets/images/images/\d+/\d+/\d+/(?:medium|small|large)/!i
PROFILE1 = %r!\Ahttps?://(\w+)\.artstation\.com!i
PROFILE2 = %r!\Ahttps?://www.artstation.com/artist/(\w+)!i
PROFILE3 = %r!\Ahttps?://www.artstation.com/(\w+)!i
PROFILE = %r!#{PROFILE2}|#{PROFILE3}|#{PROFILE1}!
attr_reader :json, :image_urls attr_reader :json, :image_urls
# https://www.artstation.com/artwork/04XA4
# https://www.artstation.com/artwork/cody-from-sf
# https://sa-dui.artstation.com/projects/DVERn
def self.project_id(url)
if url =~ PROJECT
$~[:project_id]
else
nil
end
end
def domains def domains
["artstation.com"] ["artstation.com"]
end end
@@ -35,62 +40,44 @@ module Sources::Strategies
memoize :image_urls memoize :image_urls
def page_url def page_url
[url, referer_url].each do |x| return nil unless project_id.present?
if x =~ PROJECT
return "https://www.artstation.com/artwork/#{$~[:project_id]}"
end
end
return super if artist_name.present?
"https://#{artist_name}.artstation.com/projects/#{project_id}"
else
"https://www.artstation.com/artwork/#{project_id}"
end
end end
def profile_url def profile_url
if url =~ PROFILE1 && $1 != "www" return nil unless artist_name.present?
return "https://www.artstation.com/#{$1}" "https://www.artstation.com/#{artist_name}"
end
if url =~ PROFILE2
return "https://www.artstation.com/#{$1}"
end
if url =~ PROFILE3 && url !~ PROJECT
return url
end
api_json["user"]["permalink"]
end end
def artist_name def artist_name
api_json["user"]["username"] artist_name_from_url || api_response.dig(:user, :username)
end end
def artist_commentary_title def artist_commentary_title
api_json["title"] api_response[:title]
end end
def artist_commentary_desc def artist_commentary_desc
ActionView::Base.full_sanitizer.sanitize(api_json["description"]) api_response[:description]
end
def dtext_artist_commentary_desc
ActionView::Base.full_sanitizer.sanitize(artist_commentary_desc)
end end
memoize :artist_commentary_desc
def tags def tags
return nil if !api_json.has_key?("tags") api_response[:tags].to_a.map do |tag|
[tag.downcase.tr(" ", "_"), "https://www.artstation.com/search?q=" + CGI.escape(tag)]
api_json["tags"]. end
map { |tag| [tag.downcase.tr(" ", "_"), tag_url(tag)]}
end end
memoize :tags
def normalized_for_artist_finder? def normalized_for_artist_finder?
url =~ PROFILE3 && url !~ PROFILE2 && url !~ PROJECT profile_url.present? && url == profile_url
end
def normalizable_for_artist_finder?
url =~ PROFILE || url =~ PROJECT
end
def normalize_for_artist_finder
profile_url
end end
public public
@@ -100,39 +87,31 @@ module Sources::Strategies
return [url] return [url]
end end
api_json["assets"] api_response[:assets].to_a
.select { |asset| asset["asset_type"] == "image" } .select { |asset| asset[:asset_type] == "image" }
.map { |asset| asset["image_url"] } .map { |asset| asset[:image_url] }
end end
# these are de facto private methods but are public for testing # these are de facto private methods but are public for testing
# purposes # purposes
def artist_name_from_url
urls.map { |url| url[PROJECT, :artist_name] }.compact.first
end
def project_id def project_id
self.class.project_id(url) || self.class.project_id(referer_url) urls.map { |url| url[PROJECT, :project_id] }.compact.first
end
memoize :project_id
def api_url
"https://www.artstation.com/projects/#{project_id}.json"
end end
def api_json def api_response
if project_id.nil? return {} unless project_id.present?
raise ::Sources::Error.new("Project id could not be determined from (#{url}, #{referer_url})")
end
resp = HTTParty.get(api_url, Danbooru.config.httparty_options) resp, code = HttpartyCache.get("https://www.artstation.com/projects/#{project_id}.json")
return {} if code != 200
if resp.success? JSON.parse(resp, symbolize_names: true)
json = JSON.parse(resp.body)
else
raise HTTParty::ResponseError.new(resp)
end
return json
end end
memoize :api_json memoize :api_response
# Returns the original representation of the asset, if it exists. Otherwise # Returns the original representation of the asset, if it exists. Otherwise
# return the url. # return the url.
@@ -156,10 +135,5 @@ module Sources::Strategies
return x return x
end end
def tag_url(name)
"https://www.artstation.com/search?q=" + CGI.escape(name)
end
end end
end end

View File

@@ -11,6 +11,10 @@ module Sources
assert_equal("https://cdna.artstation.com/p/assets/images/images/000/705/368/large/jey-rain-one1.jpg", @site.image_url.sub(/\?\d+/, "")) assert_equal("https://cdna.artstation.com/p/assets/images/images/000/705/368/large/jey-rain-one1.jpg", @site.image_url.sub(/\?\d+/, ""))
end end
should "get the canonical url" do
assert_equal("https://jeyrain.artstation.com/projects/04XA4", @site.canonical_url)
end
should "get the profile" do should "get the profile" do
assert_equal("https://www.artstation.com/jeyrain", @site.profile_url) assert_equal("https://www.artstation.com/jeyrain", @site.profile_url)
end end
@@ -25,7 +29,7 @@ module Sources
should "get the artist commentary" do should "get the artist commentary" do
assert_equal("pink", @site.artist_commentary_title) assert_equal("pink", @site.artist_commentary_title)
assert_equal("", @site.artist_commentary_desc) assert_equal("", @site.dtext_artist_commentary_desc)
end end
end end
@@ -39,6 +43,10 @@ module Sources
assert_equal(url, @site.image_url) assert_equal(url, @site.image_url)
end end
should "get the canonical url" do
assert_equal("https://dantewontdie.artstation.com/projects/YZK5q", @site.canonical_url)
end
should "get the profile" do should "get the profile" do
assert_equal("https://www.artstation.com/dantewontdie", @site.profile_url) assert_equal("https://www.artstation.com/dantewontdie", @site.profile_url)
end end
@@ -53,7 +61,7 @@ module Sources
should "get the artist commentary" do should "get the artist commentary" do
assert_equal("Reika ", @site.artist_commentary_title) assert_equal("Reika ", @site.artist_commentary_title)
assert_equal("From Gantz.", @site.artist_commentary_desc) assert_equal("From Gantz.", @site.dtext_artist_commentary_desc)
end end
end end
@@ -72,11 +80,32 @@ module Sources
setup do setup do
@url = "https://cdna.artstation.com/p/assets/images/images/006/029/978/large/amama-l-z.jpg" @url = "https://cdna.artstation.com/p/assets/images/images/006/029/978/large/amama-l-z.jpg"
@ref = "https://www.artstation.com/artwork/4BWW2" @ref = "https://www.artstation.com/artwork/4BWW2"
@site = Sources::Strategies.find(@url, @ref)
end end
should "fetch the source data" do context "with a referer" do
assert_equal("amama", @site.artist_name) should "work" do
site = Sources::Strategies.find(@url, @ref)
assert_equal(@url, site.image_url)
assert_equal("https://amama.artstation.com/projects/4BWW2", site.page_url)
assert_equal("https://amama.artstation.com/projects/4BWW2", site.canonical_url)
assert_equal("https://www.artstation.com/amama", site.profile_url)
assert_equal("amama", site.artist_name)
assert_nothing_raised { site.to_h }
end
end
context "without a referer" do
should "work" do
site = Sources::Strategies.find(@url)
assert_equal(@url, site.image_url)
assert_nil(site.page_url)
assert_nil(site.profile_url)
assert_nil(site.artist_name)
assert_equal([], site.tags)
assert_nothing_raised { site.to_h }
end
end end
end end
@@ -90,5 +119,19 @@ module Sources
assert_equal(urls, @site.image_urls) assert_equal(urls, @site.image_urls)
end end
end end
context "A work that has been deleted" do
should "work" do
url = "https://fiship.artstation.com/projects/x8n8XT"
site = Sources::Strategies.find(url)
assert_equal("fiship", site.artist_name)
assert_equal("https://www.artstation.com/fiship", site.profile_url)
assert_equal(url, site.page_url)
assert_equal(url, site.canonical_url)
assert_nil(site.image_url)
assert_nothing_raised { site.to_h }
end
end
end end
end end