Support fetching artist commentary from deviantart

This commit is contained in:
Toks
2015-09-03 20:03:56 -04:00
parent d7b0d2a7e5
commit 12be880fa9
2 changed files with 38 additions and 4 deletions

View File

@@ -1,10 +1,18 @@
module Downloads
module RewriteStrategies
class DeviantArt < Base
attr_accessor :url, :source
def initialize(url)
@url = url
end
def rewrite(url, headers, data = {})
if url =~ /https?:\/\/(?:.+?\.)?deviantart\.(?:com|net)/
url, headers = rewrite_html_pages(url, headers)
url, headers = rewrite_thumbnails(url, headers)
data[:artist_commentary_title] = source.artist_commentary_title
data[:artist_commentary_desc] = source.artist_commentary_desc
end
return [url, headers, data]
@@ -13,8 +21,6 @@ module Downloads
protected
def rewrite_html_pages(url, headers)
if url =~ %r{^https?://.+?\.deviantart\.com/art/}
source = ::Sources::Strategies::DeviantArt.new(url)
source.get
return [source.image_url, headers]
else
return [url, headers]
@@ -29,13 +35,21 @@ module Downloads
match = $1
url.sub!(match + "PRE/", match)
elsif url =~ %r{^https?://(?:pre|img)\d{2}\.deviantart\.net/}
source = ::Sources::Strategies::DeviantArt.new(url)
source.get
return [source.image_url, headers]
end
return [url, headers]
end
# Cache the source data so it gets fetched at most once.
def source
@source ||= begin
source = ::Sources::Strategies::DeviantArt.new(url)
source.get
source
end
end
end
end
end

View File

@@ -27,6 +27,8 @@ module Sources
@artist_name, @profile_url = get_profile_from_page(page)
@image_url = get_image_url_from_page(page)
@tags = get_tags_from_page(page)
@artist_commentary_title = get_artist_commentary_title_from_page(page)
@artist_commentary_desc = get_artist_commentary_desc_from_page(page)
end
end
@@ -70,6 +72,24 @@ module Sources
end
end
def get_artist_commentary_title_from_page(page)
title = page.search("div.dev-title-container a").find_all do |node|
node["data-ga_click_event"] =~ /description_title/
end
if title.any?
title[0].inner_text
end
end
def get_artist_commentary_desc_from_page(page)
desc = page.search("div.dev-description div.text.block")
if desc.any?
desc[0].inner_text
end
end
def normalized_url
@normalized_url ||= begin
if url =~ %r{\Ahttps?://(?:fc|th|pre|orig|img)\d{2}\.deviantart\.net/.+/[a-z0-9_]*_by_[a-z0-9_]+-d([a-z0-9]+)\.}i