203 lines
6.3 KiB
Ruby
203 lines
6.3 KiB
Ruby
module Sources
|
|
module Strategies
|
|
class DeviantArt < Base
|
|
DEVIANTART_SESSION_CACHE_KEY = "deviantart-session"
|
|
|
|
def self.url_match?(url)
|
|
url =~ /^https?:\/\/(?:.+?\.)?deviantart\.(?:com|net)/
|
|
end
|
|
|
|
def referer_url
|
|
if @referer_url =~ /deviantart\.com\/art\// && @url =~ /https?:\/\/(?:fc|th|pre|orig|img)\d{2}\.deviantart\.net\//
|
|
@referer_url
|
|
else
|
|
@url
|
|
end
|
|
end
|
|
|
|
def site_name
|
|
"Deviant Art"
|
|
end
|
|
|
|
def unique_id
|
|
profile_url =~ /https?:\/\/(.+?)\.deviantart\.com/
|
|
"deviantart" + $1
|
|
end
|
|
|
|
def get
|
|
agent.get(URI.parse(normalized_url)) do |page|
|
|
page.encoding = "utf-8"
|
|
@artist_name, @profile_url = get_profile_from_page(page)
|
|
@image_url = get_image_url_from_page(page)
|
|
@tags = get_tags_from_page(page)
|
|
@artist_commentary_title = get_artist_commentary_title_from_page(page)
|
|
@artist_commentary_desc = get_artist_commentary_desc_from_page(page)
|
|
end
|
|
end
|
|
|
|
def self.to_dtext(text)
|
|
html = Nokogiri::HTML.fragment(text)
|
|
|
|
dtext = html.children.map do |element|
|
|
case element.name
|
|
when "text"
|
|
element.content
|
|
when "br"
|
|
"\n"
|
|
when "blockquote"
|
|
"[quote]#{to_dtext(element.inner_html)}[/quote]" if element.inner_html.present?
|
|
when "small", "sub"
|
|
"[tn]#{to_dtext(element.inner_html)}[/tn]" if element.inner_html.present?
|
|
when "b"
|
|
"[b]#{to_dtext(element.inner_html)}[/b]" if element.inner_html.present?
|
|
when "i"
|
|
"[i]#{to_dtext(element.inner_html)}[/i]" if element.inner_html.present?
|
|
when "u"
|
|
"[u]#{to_dtext(element.inner_html)}[/u]" if element.inner_html.present?
|
|
when "strike"
|
|
"[s]#{to_dtext(element.inner_html)}[/s]" if element.inner_html.present?
|
|
when "li"
|
|
"* #{to_dtext(element.inner_html)}" if element.inner_html.present?
|
|
when "h1", "h2", "h3", "h4", "h5", "h6"
|
|
hN = element.name
|
|
title = to_dtext(element.inner_html)
|
|
"#{hN}. #{title}\n"
|
|
when "a"
|
|
title = to_dtext(element.inner_html)
|
|
url = element.attributes["href"].value
|
|
url = url.gsub(%r!\Ahttps?://www\.deviantart\.com/users/outgoing\?!i, "")
|
|
%("#{title}":[#{url}]) if title.present?
|
|
when "img"
|
|
element.attributes["title"] || element.attributes["alt"] || ""
|
|
when "comment"
|
|
# ignored
|
|
else
|
|
to_dtext(element.inner_html)
|
|
end
|
|
end.join
|
|
|
|
dtext
|
|
end
|
|
|
|
protected
|
|
|
|
def get_profile_from_page(page)
|
|
links = page.search("div.dev-title-container a.username")
|
|
|
|
if links.any?
|
|
profile_url = links[0]["href"]
|
|
artist_name = links[0].text
|
|
else
|
|
profile_url = nil
|
|
artist_name = nil
|
|
end
|
|
|
|
return [artist_name, profile_url].compact
|
|
end
|
|
|
|
def get_image_url_from_page(page)
|
|
download_link = page.link_with(:class => /dev-page-download/)
|
|
|
|
if download_link
|
|
download_link.click.uri.to_s # need to follow the redirect now to get the full size url, following it later seems to not work.
|
|
else
|
|
image = page.search("div.dev-view-deviation img.dev-content-full")
|
|
|
|
if image.any?
|
|
image[0]["src"]
|
|
else
|
|
nil
|
|
end
|
|
end
|
|
end
|
|
|
|
def get_tags_from_page(page)
|
|
links = page.search("a.discoverytag")
|
|
|
|
links.map do |node|
|
|
[node.attr("data-canonical-tag"), node.attr("href")]
|
|
end
|
|
end
|
|
|
|
def get_artist_commentary_title_from_page(page)
|
|
title = page.search("div.dev-title-container a").find_all do |node|
|
|
node["data-ga_click_event"] =~ /description_title/
|
|
end
|
|
|
|
if title.any?
|
|
title[0].inner_text
|
|
end
|
|
end
|
|
|
|
def get_artist_commentary_desc_from_page(page)
|
|
desc = page.search("div.dev-description div.text.block")
|
|
|
|
if desc.any?
|
|
desc[0].children.to_s
|
|
end
|
|
end
|
|
|
|
def normalized_url
|
|
@normalized_url ||= begin
|
|
if url =~ %r{\Ahttps?://(?:fc|th|pre|orig|img)\d{2}\.deviantart\.net/.+/[a-z0-9_]*_by_[a-z0-9_]+-d([a-z0-9]+)\.}i
|
|
"http://fav.me/d#{$1}"
|
|
elsif url =~ %r{\Ahttps?://(?:fc|th|pre|orig|img)\d{2}\.deviantart\.net/.+/[a-f0-9]+-d([a-z0-9]+)\.}i
|
|
"http://fav.me/d#{$1}"
|
|
elsif url =~ %r{deviantart\.com/art/}
|
|
url
|
|
else
|
|
nil
|
|
end
|
|
end
|
|
end
|
|
|
|
def agent
|
|
@agent ||= begin
|
|
mech = Mechanize.new
|
|
auth, userinfo = session_cookies(mech)
|
|
|
|
# This cookie needs to be set to allow viewing of mature works
|
|
cookie = Mechanize::Cookie.new("agegate_state", "1")
|
|
cookie.domain = ".deviantart.com"
|
|
cookie.path = "/"
|
|
mech.cookie_jar.add(cookie)
|
|
|
|
cookie = Mechanize::Cookie.new("auth", auth)
|
|
cookie.domain = ".deviantart.com"
|
|
cookie.path = "/"
|
|
mech.cookie_jar.add(cookie)
|
|
|
|
cookie = Mechanize::Cookie.new("userinfo", userinfo)
|
|
cookie.domain = ".deviantart.com"
|
|
cookie.path = "/"
|
|
mech.cookie_jar.add(cookie)
|
|
|
|
mech
|
|
end
|
|
end
|
|
|
|
def session_cookies(mech)
|
|
Cache.get(DEVIANTART_SESSION_CACHE_KEY, 2.hours) do
|
|
page = mech.get("https://www.deviantart.com/users/login")
|
|
validate_key = page.search('input[name="validate_key"]').attribute("value").value
|
|
validate_token = page.search('input[name="validate_token"]').attribute("value").value
|
|
|
|
mech.post("https://www.deviantart.com/users/login", {
|
|
username: Danbooru.config.deviantart_login,
|
|
password: Danbooru.config.deviantart_password,
|
|
validate_key: validate_key,
|
|
validate_token: validate_token,
|
|
remember_me: 1,
|
|
})
|
|
|
|
auth = mech.cookies.find { |cookie| cookie.name == "auth" }.value
|
|
userinfo = mech.cookies.find { |cookie| cookie.name == "userinfo" }.value
|
|
mech.cookie_jar.clear
|
|
|
|
[auth, userinfo]
|
|
end
|
|
end
|
|
end
|
|
end
|
|
end
|