Support fetching artist commentary from deviantart

This commit is contained in:
Toks
2015-09-03 20:03:56 -04:00
parent d7b0d2a7e5
commit 12be880fa9
2 changed files with 38 additions and 4 deletions

View File

@@ -1,10 +1,18 @@
module Downloads module Downloads
module RewriteStrategies module RewriteStrategies
class DeviantArt < Base class DeviantArt < Base
attr_accessor :url, :source
def initialize(url)
@url = url
end
def rewrite(url, headers, data = {}) def rewrite(url, headers, data = {})
if url =~ /https?:\/\/(?:.+?\.)?deviantart\.(?:com|net)/ if url =~ /https?:\/\/(?:.+?\.)?deviantart\.(?:com|net)/
url, headers = rewrite_html_pages(url, headers) url, headers = rewrite_html_pages(url, headers)
url, headers = rewrite_thumbnails(url, headers) url, headers = rewrite_thumbnails(url, headers)
data[:artist_commentary_title] = source.artist_commentary_title
data[:artist_commentary_desc] = source.artist_commentary_desc
end end
return [url, headers, data] return [url, headers, data]
@@ -13,8 +21,6 @@ module Downloads
protected protected
def rewrite_html_pages(url, headers) def rewrite_html_pages(url, headers)
if url =~ %r{^https?://.+?\.deviantart\.com/art/} if url =~ %r{^https?://.+?\.deviantart\.com/art/}
source = ::Sources::Strategies::DeviantArt.new(url)
source.get
return [source.image_url, headers] return [source.image_url, headers]
else else
return [url, headers] return [url, headers]
@@ -29,13 +35,21 @@ module Downloads
match = $1 match = $1
url.sub!(match + "PRE/", match) url.sub!(match + "PRE/", match)
elsif url =~ %r{^https?://(?:pre|img)\d{2}\.deviantart\.net/} elsif url =~ %r{^https?://(?:pre|img)\d{2}\.deviantart\.net/}
source = ::Sources::Strategies::DeviantArt.new(url)
source.get
return [source.image_url, headers] return [source.image_url, headers]
end end
return [url, headers] return [url, headers]
end end
# Cache the source data so it gets fetched at most once.
def source
@source ||= begin
source = ::Sources::Strategies::DeviantArt.new(url)
source.get
source
end
end
end end
end end
end end

View File

@@ -27,6 +27,8 @@ module Sources
@artist_name, @profile_url = get_profile_from_page(page) @artist_name, @profile_url = get_profile_from_page(page)
@image_url = get_image_url_from_page(page) @image_url = get_image_url_from_page(page)
@tags = get_tags_from_page(page) @tags = get_tags_from_page(page)
@artist_commentary_title = get_artist_commentary_title_from_page(page)
@artist_commentary_desc = get_artist_commentary_desc_from_page(page)
end end
end end
@@ -70,6 +72,24 @@ module Sources
end end
end end
def get_artist_commentary_title_from_page(page)
title = page.search("div.dev-title-container a").find_all do |node|
node["data-ga_click_event"] =~ /description_title/
end
if title.any?
title[0].inner_text
end
end
def get_artist_commentary_desc_from_page(page)
desc = page.search("div.dev-description div.text.block")
if desc.any?
desc[0].inner_text
end
end
def normalized_url def normalized_url
@normalized_url ||= begin @normalized_url ||= begin
if url =~ %r{\Ahttps?://(?:fc|th|pre|orig|img)\d{2}\.deviantart\.net/.+/[a-z0-9_]*_by_[a-z0-9_]+-d([a-z0-9]+)\.}i if url =~ %r{\Ahttps?://(?:fc|th|pre|orig|img)\d{2}\.deviantart\.net/.+/[a-z0-9_]*_by_[a-z0-9_]+-d([a-z0-9]+)\.}i