diff --git a/app/logical/nico_seiga_api_client.rb b/app/logical/nico_seiga_api_client.rb new file mode 100644 index 000000000..15c516ddf --- /dev/null +++ b/app/logical/nico_seiga_api_client.rb @@ -0,0 +1,42 @@ +class NicoSeigaApiClient + BASE_URL = "http://seiga.nicovideo.jp/api" + attr_reader :user_id, :moniker, :image_id, :title, :desc + + def initialize(illust_id) + get_illust(illust_id) + get_artist(user_id) + end + + def get_illust(id) + uri = URI.parse("#{BASE_URL}/illust/info?id=#{id}") + Net::HTTP.start(uri.host, uri.port, :use_ssl => uri.is_a?(URI::HTTPS)) do |http| + resp = http.request_get(uri.request_uri) + if resp.is_a?(Net::HTTPSuccess) + parse_illust_xml_response(resp.body) + end + end + end + + def get_artist(id) + uri = URI.parse("#{BASE_URL}/user/info?id=#{id}") + Net::HTTP.start(uri.host, uri.port, :use_ssl => uri.is_a?(URI::HTTPS)) do |http| + resp = http.request_get(uri.request_uri) + if resp.is_a?(Net::HTTPSuccess) + parse_artist_xml_response(resp.body) + end + end + end + + def parse_artist_xml_response(text) + doc = Nokogiri::Slop(text) + @moniker = doc.response.user.nickname.content + end + + def parse_illust_xml_response(text) + doc = Nokogiri::Slop(text) + @image_id = doc.response.image.id.content.to_i + @user_id = doc.response.image.user_id.content.to_i + @title = doc.response.image.title.content + @desc = [doc.response.image.description.try(:content), doc.response.image.summary.try(:content)].compact.join("\n\n") + end +end diff --git a/app/logical/sources/strategies/nico_seiga.rb b/app/logical/sources/strategies/nico_seiga.rb index 238c42cd2..8c5aa899f 100644 --- a/app/logical/sources/strategies/nico_seiga.rb +++ b/app/logical/sources/strategies/nico_seiga.rb @@ -1,6 +1,8 @@ module Sources module Strategies class NicoSeiga < Base + extend Memoist + def self.url_match?(url) url =~ /^https?:\/\/(?:\w+\.)?nico(?:seiga|video)\.jp/ end @@ -25,8 +27,9 @@ module Sources def get page = load_page - @artist_name, @profile_url = get_profile_from_page(page) + @artist_name, @profile_url = get_profile_from_api @image_url = get_image_url_from_page(page) + @artist_commentary_title, @artist_commentary_desc = get_artist_commentary_from_api # Log out before getting the tags. # The reason for this is that if you're logged in and viewing a non-adult-rated work, the tags will be added with javascript after the page has loaded meaning we can't extract them easily. @@ -47,12 +50,25 @@ module Sources def normalize_for_artist_finder! page = load_page - @artist_name, @profile_url = get_profile_from_page(page) - profile_url + @illust_id = get_illust_id_from_url + @artist_name, @profile_url = get_profile_from_api + @profile_url + "/" end protected + def api_client + NicoSeigaApiClient.new(get_illust_id_from_url) + end + + def get_illust_id_from_url + if normalized_url =~ %r!http://seiga.nicovideo.jp/seiga/im(\d+)! + $1.to_i + else + nil + end + end + def load_page page = agent.get(normalized_url) @@ -66,18 +82,8 @@ module Sources page end - def get_profile_from_page(page) - links = page.search("li a").select {|x| x["href"] =~ /user\/illust/} - - if links.any? - profile_url = "http://seiga.nicovideo.jp" + links[0]["href"] - artist_name = links[0].search("span")[0].children[0].text - else - profile_url = nil - artist_name = nil - end - - return [artist_name, profile_url].compact + def get_profile_from_api + return [api_client.moniker, "http://seiga.nicovideo.jp/user/illust/#{api_client.user_id}"] end def get_image_url_from_page(page) @@ -108,6 +114,10 @@ module Sources end end + def get_artist_commentary_from_api + [api_client.title, api_client.desc] + end + def normalized_url @normalized_url ||= begin if url =~ %r!\Ahttp://lohas\.nicoseiga\.jp/o/[a-f0-9]+/\d+/(\d+)! @@ -163,6 +173,8 @@ module Sources mech end end + + memoize :api_client end end end diff --git a/app/models/artist.rb b/app/models/artist.rb index bb3e558b4..7b37522a6 100644 --- a/app/models/artist.rb +++ b/app/models/artist.rb @@ -44,6 +44,7 @@ class Artist < ActiveRecord::Base url = File.dirname(url) + "/" break if url =~ /pixiv\.net\/(?:img\/)?$/i break if url =~ /lohas\.nicoseiga\.jp\/priv\/$/i + break if url =~ /nicovideo\.jp\/user\/illust/ break if url =~ /(?:data|media)\.tumblr\.com\/[a-z0-9]+\/$/i break if url =~ /deviantart\.net\//i break if url =~ %r!\Ahttps?://(?:mobile\.)?twitter\.com/\Z!i diff --git a/script/fixes/046_fix_nicovideo_artist_urls.rb b/script/fixes/046_fix_nicovideo_artist_urls.rb new file mode 100644 index 000000000..559c47574 --- /dev/null +++ b/script/fixes/046_fix_nicovideo_artist_urls.rb @@ -0,0 +1,11 @@ +#!/usr/bin/env ruby + +require File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'config', 'environment')) + +ActiveRecord::Base.without_timeout do + ArtistUrl.where("normalized_url like ?", "\%nico\%").find_each do |url| + before = url.normalized_url + url.normalized_url = Sources::Site.new(before).normalize_for_artist_finder! + puts "#{before} -> #{url.normalized_url}" if before != url.normalized_url + end +end diff --git a/test/unit/artist_test.rb b/test/unit/artist_test.rb index c2889fadc..35097977b 100644 --- a/test/unit/artist_test.rb +++ b/test/unit/artist_test.rb @@ -250,6 +250,16 @@ class ArtistTest < ActiveSupport::TestCase end end + context "when finding nico seiga artists" do + setup do + FactoryGirl.create(:artist, :name => "osamari", :url_string => "http://seiga.nicovideo.jp/user/illust/7017777") + end + + should "find the artist by the profile" do + assert_artist_found("osamari", "http://seiga.nicovideo.jp/seiga/im4937663") + end + end + context "when finding twitter artists" do setup do FactoryGirl.create(:artist, :name => "hammer_(sunset_beach)", :url_string => "http://twitter.com/hamaororon") diff --git a/test/unit/sources/nico_seiga_test.rb b/test/unit/sources/nico_seiga_test.rb index 975c494c3..a4d04243d 100644 --- a/test/unit/sources/nico_seiga_test.rb +++ b/test/unit/sources/nico_seiga_test.rb @@ -21,6 +21,11 @@ module Sources assert_equal("osamari", @site_2.artist_name) end + should "get the artist commentary" do + assert_equal("コジコジ", @site_2.artist_commentary_title) + assert_equal("懐かしいですよね。テ また懐かしいものを ", @site_2.artist_commentary_desc) + end + should "get the image url" do assert_match(/^http:\/\/lohas\.nicoseiga\.jp\/priv\//, @site_1.image_url) assert_match(/^http:\/\/lohas\.nicoseiga\.jp\/priv\//, @site_2.image_url)