Files
danbooru/app/logical/sources/strategies/deviant_art.rb
lllusion3469 4c3733ce5a Keep auth_secure cookie from deviantart
Unlike this projects, all other DA scrapers I found[0] require that both
"auth" and "auth_secure" are set.
While the current approach, for some reason, seems to have worked fine in
the past, it has stopped functioning on some systems, sometimes
permanently (apparently sometimes only temporarily).
I don't have the means to find out the reason behind that, since all
configurations I've tried (mobile, VPN, and regular connection +
4 seperate accounts) resulted in me being effectively logged out and
unable to view mature works after deleting the "auth_secure" cookie.
The "if auth_cookie" part might not actually be necessary, its only
purpose is to prevent unintended breakage due to lack of information
on my part.

[0]: b662b46a44 (diff-aab4e263d591d1c7f23d015a2e540a32R161)
     cbaecf5507 (diff-9f9821e7d744874a2fe42e0a69e4b14aR33)
2017-08-30 20:47:59 +02:00

185 lines
5.5 KiB
Ruby

module Sources
module Strategies
class DeviantArt < Base
DEVIANTART_SESSION_CACHE_KEY = "deviantart-session"
def self.url_match?(url)
url =~ /^https?:\/\/(?:.+?\.)?deviantart\.(?:com|net)/
end
def referer_url
if @referer_url =~ /deviantart\.com\/art\// && @url =~ /https?:\/\/(?:fc|th|pre|orig|img)\d{2}\.deviantart\.net\//
@referer_url
else
@url
end
end
def site_name
"Deviant Art"
end
def unique_id
profile_url =~ /https?:\/\/(.+?)\.deviantart\.com/
"deviantart" + $1
end
def get
agent.get(URI.parse(normalized_url)) do |page|
page.encoding = "utf-8"
@artist_name, @profile_url = get_profile_from_page(page)
@image_url = get_image_url_from_page(page)
@tags = get_tags_from_page(page)
@artist_commentary_title = get_artist_commentary_title_from_page(page)
@artist_commentary_desc = get_artist_commentary_desc_from_page(page)
end
end
def self.to_dtext(text)
DText.from_html(text) do |element|
if element.name == "a" && element["href"].present?
element["href"] = element["href"].gsub(%r!\Ahttps?://www\.deviantart\.com/users/outgoing\?!i, "")
end
end
end
protected
def get_profile_from_page(page)
links = page.search("div.dev-title-container a.username")
if links.any?
profile_url = links[0]["href"]
artist_name = links[0].text
else
profile_url = nil
artist_name = nil
end
return [artist_name, profile_url].compact
end
def get_image_url_from_page(page)
download_link = page.link_with(:class => /dev-page-download/)
if download_link
download_link.click.uri.to_s # need to follow the redirect now to get the full size url, following it later seems to not work.
else
image = page.search("div.dev-view-deviation img.dev-content-full")
if image.any?
image[0]["src"]
else
nil
end
end
end
def get_tags_from_page(page)
links = page.search("a.discoverytag")
links.map do |node|
[node.attr("data-canonical-tag"), node.attr("href")]
end
end
def get_artist_commentary_title_from_page(page)
title = page.search("div.dev-title-container a").find_all do |node|
node["data-ga_click_event"] =~ /description_title/
end
if title.any?
title[0].inner_text
end
end
def get_artist_commentary_desc_from_page(page)
desc = page.search("div.dev-description div.text.block")
if desc.any?
desc[0].children.to_s
end
end
def normalized_url
@normalized_url ||= begin
if url =~ %r{\Ahttps?://(?:fc|th|pre|orig|img)\d{2}\.deviantart\.net/.+/[a-z0-9_]*_by_[a-z0-9_]+-d([a-z0-9]+)\.}i
"http://fav.me/d#{$1}"
elsif url =~ %r{\Ahttps?://(?:fc|th|pre|orig|img)\d{2}\.deviantart\.net/.+/[a-f0-9]+-d([a-z0-9]+)\.}i
"http://fav.me/d#{$1}"
elsif url =~ %r{deviantart\.com/art/}
url
else
nil
end
end
end
def agent
@agent ||= begin
mech = Mechanize.new
auth, userinfo, auth_secure = session_cookies(mech)
if auth
# This cookie needs to be set to allow viewing of mature works
cookie = Mechanize::Cookie.new("agegate_state", "1")
cookie.domain = ".deviantart.com"
cookie.path = "/"
mech.cookie_jar.add(cookie)
cookie = Mechanize::Cookie.new("auth", auth)
cookie.domain = ".deviantart.com"
cookie.path = "/"
mech.cookie_jar.add(cookie)
cookie = Mechanize::Cookie.new("userinfo", userinfo)
cookie.domain = ".deviantart.com"
cookie.path = "/"
mech.cookie_jar.add(cookie)
if auth_secure
cookie = Mechanize::Cookie.new("auth_secure", auth_secure)
cookie.domain = ".deviantart.com"
cookie.path = "/"
mech.cookie_jar.add(cookie)
end
end
mech
end
end
def session_cookies(mech)
Cache.get(DEVIANTART_SESSION_CACHE_KEY, 2.hours) do
mech.request_headers = Danbooru.config.http_headers
page = mech.get("https://www.deviantart.com/users/login")
if page.search('div[class="g-recaptcha"]').any?
# we got captcha'd, have to abort
return nil
end
validate_key = page.search('input[name="validate_key"]').attribute("value").value
validate_token = page.search('input[name="validate_token"]').attribute("value").value
mech.post("https://www.deviantart.com/users/login", {
username: Danbooru.config.deviantart_login,
password: Danbooru.config.deviantart_password,
validate_key: validate_key,
validate_token: validate_token,
remember_me: 1,
})
auth = mech.cookies.find { |cookie| cookie.name == "auth" }.try(:value)
userinfo = mech.cookies.find { |cookie| cookie.name == "userinfo" }.try(:value)
auth_secure = mech.cookies.find { |cookie| cookie.name == "auth_secure" }.try(:value)
mech.cookie_jar.clear
[auth, userinfo, auth_secure]
end
end
end
end
end