diff --git a/app/logical/sources/strategies/nijie.rb b/app/logical/sources/strategies/nijie.rb index b5d5b15b5..71de9869d 100644 --- a/app/logical/sources/strategies/nijie.rb +++ b/app/logical/sources/strategies/nijie.rb @@ -1,6 +1,8 @@ module Sources module Strategies class Nijie < Base + attr_reader :image_urls + def self.url_match?(url) url =~ /^https?:\/\/(?:.+?\.)?nijie\.info/ end @@ -26,6 +28,10 @@ module Sources "nijie" + $1.to_s end + def image_url + image_urls.first + end + def get page = agent.get(referer_url) @@ -37,12 +43,42 @@ module Sources end @artist_name, @profile_url = get_profile_from_page(page) - @image_url = get_image_url_from_page(page) + @image_urls = get_image_urls_from_page(page) @tags = get_tags_from_page(page) + @artist_commentary_title, @artist_commentary_desc = get_commentary_from_page(page) end protected + # XXX: duplicated from strategies/deviant_art.rb. + def self.to_dtext(text) + html = Nokogiri::HTML.fragment(text) + + dtext = html.children.map do |element| + case element.name + when "text" + element.content + when "strong" + "[b]#{to_dtext(element.inner_html)}[/b]" if element.inner_html.present? + when "i" + "[i]#{to_dtext(element.inner_html)}[/i]" if element.inner_html.present? + when "s" + "[s]#{to_dtext(element.inner_html)}[/s]" if element.inner_html.present? + else + to_dtext(element.inner_html) + end + end.join + + dtext + end + + def get_commentary_from_page(page) + title = page.search("h2.illust_title").text + desc = page.search('meta[property="og:description"]').attr("content").value + + [title, desc] + end + def get_profile_from_page(page) links = page.search("a.name") @@ -57,13 +93,12 @@ module Sources return [artist_name, profile_url].compact end - def get_image_url_from_page(page) - image = page.search("div#gallery a img") - - if image.any? - image[0]["src"].try(:sub, %r!^//!, "http://") - else - nil + def get_image_urls_from_page(page) + page.search("div#gallery a > img").map do |img| + # //pic01.nijie.info/__rs_l120x120/nijie_picture/diff/main/218856_0_236014_20170620101329.png + # => https://pic01.nijie.info/__rs_l120x120/nijie_picture/diff/main/218856_0_236014_20170620101329.png + url = "https:" + img.attr("src") + normalize_image_url(url) end end @@ -91,6 +126,17 @@ module Sources end end + def normalize_image_url(image_url) + # http://pic03.nijie.info/__rs_l120x120/nijie_picture/diff/main/218856_0_236014_20170620101329.png + # => http://pic03.nijie.info/nijie_picture/diff/main/218856_3_236014_20170620101331.png + if image_url =~ %r!\Ahttps?://pic\d+\.nijie\.info/__rs_l120x120/nijie_picture/diff/main/[0-9_]+\.\w+\z!i + image_url = image_url.gsub(%r!__rs_l120x120/!i, "") + end + + image_url = image_url.gsub(%r!\Ahttp:!i, "https:") + image_url + end + def agent @agent ||= begin mech = Mechanize.new diff --git a/app/models/artist.rb b/app/models/artist.rb index 152ca48d7..de62be6f0 100644 --- a/app/models/artist.rb +++ b/app/models/artist.rb @@ -49,6 +49,7 @@ class Artist < ApplicationRecord break if url =~ /deviantart\.net\//i break if url =~ %r!\Ahttps?://(?:mobile\.)?twitter\.com/\Z!i break if url =~ %r!pawoo\.net/(?:web/)?$!i + break if url =~ %r!\Ahttps?://(pic\d+\.)?nijie\.info/!i end artists.inject({}) {|h, x| h[x.name] = x; h}.values.slice(0, 20) diff --git a/test/unit/artist_test.rb b/test/unit/artist_test.rb index 7cd5f06ba..b35adff80 100644 --- a/test/unit/artist_test.rb +++ b/test/unit/artist_test.rb @@ -310,6 +310,22 @@ class ArtistTest < ActiveSupport::TestCase end end + context "when finding nijie artists" do + setup do + FactoryGirl.create(:artist, :name => "evazion", :url_string => "http://nijie.info/members.php?id=236014") + FactoryGirl.create(:artist, :name => "728995", :url_string => "http://nijie.info/members.php?id=728995") + end + + should "find the artist" do + assert_artist_found("evazion", "http://nijie.info/view.php?id=218944") + assert_artist_found("728995", "http://nijie.info/view.php?id=213043") + end + + should "return nothing for unknown nijie artists" do + assert_artist_not_found("http://nijie.info/view.php?id=157953") + end + end + should "normalize its other names" do artist = FactoryGirl.create(:artist, :name => "a1", :other_names_comma => "aaa, bbb, ccc ddd") assert_equal("aaa, bbb, ccc_ddd", artist.other_names_comma) diff --git a/test/unit/sources/nijie_test.rb b/test/unit/sources/nijie_test.rb index 776495f99..39dafb344 100644 --- a/test/unit/sources/nijie_test.rb +++ b/test/unit/sources/nijie_test.rb @@ -9,7 +9,7 @@ module Sources end should "get the image url" do - assert_equal("http://pic03.nijie.info/nijie_picture/728995_20170505014820_0.jpg", @site.image_url) + assert_equal("https://pic03.nijie.info/nijie_picture/728995_20170505014820_0.jpg", @site.image_url) end should "get the profile" do @@ -23,6 +23,14 @@ module Sources should "get the tags" do assert_equal([["眼鏡", "http://nijie.info/search.php?word=%E7%9C%BC%E9%8F%A1"], ["リトルウィッチアカデミア", "http://nijie.info/search.php?word=%E3%83%AA%E3%83%88%E3%83%AB%E3%82%A6%E3%82%A3%E3%83%83%E3%83%81%E3%82%A2%E3%82%AB%E3%83%87%E3%83%9F%E3%82%A2"], ["アーシュラ先生", "http://nijie.info/search.php?word=%E3%82%A2%E3%83%BC%E3%82%B7%E3%83%A5%E3%83%A9%E5%85%88%E7%94%9F"]], @site.tags) end + + should "get the commentary" do + title = "ジャージの下は" + desc = "「リトルウィッチアカデミア」から無自覚サキュバスぶりを発揮するアーシュラ先生です" + + assert_equal(title, @site.artist_commentary_title) + assert_equal(desc, @site.artist_commentary_desc) + end end context "The source site for a nijie referer url" do @@ -32,7 +40,7 @@ module Sources end should "get the image url" do - assert_equal("http://pic03.nijie.info/nijie_picture/728995_20170505014820_0.jpg", @site.image_url) + assert_equal("https://pic03.nijie.info/nijie_picture/728995_20170505014820_0.jpg", @site.image_url) end should "get the profile" do @@ -51,7 +59,7 @@ module Sources end should "get the image url" do - assert_equal("http://pic03.nijie.info/nijie_picture/728995_20170505014820_0.jpg", @site.image_url) + assert_equal("https://pic03.nijie.info/nijie_picture/728995_20170505014820_0.jpg", @site.image_url) end should "get the profile" do @@ -62,5 +70,35 @@ module Sources assert_equal("莚", @site.artist_name) end end + + context "The source site for a nijie gallery" do + setup do + @site = Sources::Site.new("http://nijie.info/view.php?id=218856") + @site.get + end + + should "get the image urls" do + urls = %w[ + https://pic03.nijie.info/nijie_picture/236014_20170620101426_0.png + https://pic01.nijie.info/nijie_picture/diff/main/218856_0_236014_20170620101329.png + https://pic01.nijie.info/nijie_picture/diff/main/218856_1_236014_20170620101330.png + https://pic01.nijie.info/nijie_picture/diff/main/218856_2_236014_20170620101331.png + https://pic03.nijie.info/nijie_picture/diff/main/218856_3_236014_20170620101331.png + https://pic03.nijie.info/nijie_picture/diff/main/218856_4_236014_20170620101333.png + ] + + assert_equal(urls, @site.image_urls) + end + + should "get the dtext-ified commentary" do + desc = <<-EOS.strip_heredoc.chomp + foo [b]bold[/b] [i]italics[/i] [s]strike[/s] red\r + \r + http://nijie.info/view.php?id=218944 + EOS + + assert_equal(desc, @site.dtext_artist_commentary_desc) + end + end end end