refactored pixiv proxy

This commit is contained in:
albert
2011-09-24 11:39:37 -04:00
parent 5f6f2fa659
commit 0e0f2af6ff
6 changed files with 168 additions and 89 deletions

View File

@@ -0,0 +1,18 @@
module ArtSiteProxies
class Proxy
attr_reader :url, :strategy
delegate :artist_name, :profile_url, :image_url, :tags, :to => :strategy
def initialize(url)
@url = url
case url
when /pixiv\.net/
@strategy = Strategies::Pixiv.new(url)
else
@strategy = Strategies::Default.new(url)
end
end
end
end

View File

@@ -0,0 +1,33 @@
module ArtSiteProxies
module Strategies
class Base
attr_reader :url, :agent
def initialize(url)
@url = url
@agent = create_agent
end
def artist_name
raise NotImplementedError
end
def tags
raise NotImplementedError
end
def profile_url
raise NotImplementedError
end
def image_url
raise NotImplementedError
end
protected
def create_agent
raise NotImplementedError
end
end
end
end

View File

@@ -0,0 +1,21 @@
module ArtSiteProxies
module Strategies
class Default < Base
def artist_name
"?"
end
def profile_url
url
end
def image_url
url
end
def tags
[]
end
end
end
end

View File

@@ -0,0 +1,91 @@
module ArtSiteProxies
module Strategies
class Pixiv < Base
attr_reader :artist_name, :profile_url, :image_url, :tags
def initialize(url)
super
get
end
def is_pixiv?
url =~ /pixiv\.net/
end
def get
url = URI.parse(normalized_url).request_uri
agent.get(url) do |page|
@artist_name, @profile_url = get_profile_from_page(page)
@image_url = get_image_url_from_page(page)
@tags = get_tags_from_page(page)
end
end
protected
def get_profile_from_page(page)
links = page.search("div.front-subContent a").find_all do |node|
node["href"] =~ /member\.php/
end
if links.any?
profile_url = links[0]["href"]
children = links[0].children
artist = children[0]["alt"]
return [artist, profile_url]
else
return []
end
end
def get_image_url_from_page(page)
meta = page.search("meta[property=\"og:image\"]").first
if meta
meta.attr("content").sub(/_m\./, ".")
else
nil
end
end
def get_tags_from_page(page)
links = page.search("div.pedia li a").find_all do |node|
node["href"] =~ /tags\.php/
end
if links.any?
links.map do |node|
[node.inner_text, node.attr("href")]
end
else
[]
end
end
def normalized_url
@normalized_url ||= begin
if url =~ /\/(\d+)(_m|_p\d+)?\.(jpg|jpeg|png|gif)/i
"http://www.pixiv.net/member_illust.php?mode=medium&illust_id=#{$1}"
elsif url =~ /member_illust\.php/ && url =~ /illust_id=/
url
else
nil
end
end
end
def create_agent
mech = Mechanize.new
mech.get("http://www.pixiv.net") do |page|
page.form_with(:action => "/login.php") do |form|
form['mode'] = "login"
form['login_pixiv_id'] = "uroobnad"
form['pass'] = "uroobnad556"
end.click_button
end
mech
end
end
end
end

View File

@@ -1,85 +0,0 @@
class PixivProxy < ActiveRecord::Base
def self.is_pixiv?(url)
url =~ /pixiv\.net/
end
def self.get(url)
if url =~ /\/(\d+)(_m|_p\d+)?\.(jpg|jpeg|png|gif)/i
url = "http://www.pixiv.net/member_illust.php?mode=medium&illust_id=#{$1}"
get_single(url)
elsif url =~ /member_illust\.php/ && url =~ /illust_id=/
get_single(url)
else
{}
end
end
def self.get_profile_from_page(page)
links = page.search("div.front-subContent a").find_all do |node|
node["href"] =~ /member\.php/
end
if links.any?
profile_url = links[0]["href"]
children = links[0].children
artist = children[0]["alt"]
return [artist, profile_url]
else
return []
end
end
def self.get_image_url_from_page(page)
meta = page.search("meta[property=\"og:image\"]").first
if meta
meta.attr("content").sub(/_m\./, ".")
else
nil
end
end
def self.get_jp_tags_from_page(page)
links = page.search("div.pedia li a").find_all do |node|
node["href"] =~ /tags\.php/
end
if links.any?
links.map do |node|
[node.inner_text, node.attr("href")]
end
else
[]
end
end
def self.get_single(url)
url = URI.parse(url).request_uri
mech = create_mechanize
hash = {}
mech.get(url) do |page|
artist, profile_url = get_profile_from_page(page)
image_url = get_image_url_from_page(page)
jp_tags = get_jp_tags_from_page(page)
hash[:artist] = artist
hash[:profile_url] = profile_url
hash[:image_url] = image_url
hash[:jp_tags] = jp_tags
end
hash
end
def self.create_mechanize
mech = Mechanize.new
mech.get("http://www.pixiv.net") do |page|
page.form_with(:action => "/login.php") do |form|
form['mode'] = "login"
form['login_pixiv_id'] = "uroobnad"
form['pass'] = "uroobnad556"
end.click_button
end
mech
end
end

View File

@@ -5,10 +5,11 @@ require 'test_helper'
class PixivProxyTest < ActiveSupport::TestCase
context "The proxy" do
should "get a single post" do
results = PixivProxy.get_single("http://www.pixiv.net/member_illust.php?mode=medium&illust_id=9646484")
assert_equal("member.php?id=4015", results[:profile_url])
assert(results[:jp_tags].size > 0)
first_tag = results[:jp_tags][0]
proxy = ArtSiteProxies::Proxy.new("http://www.pixiv.net/member_illust.php?mode=medium&illust_id=9646484")
assert_equal("member.php?id=4015", proxy.profile_url)
assert(proxy.tags.size > 0)
first_tag = proxy.tags.first
assert_equal(2, first_tag.size)
assert(first_tag[0] =~ /./)
assert(first_tag[1] =~ /tags\.php\?tag=/)