From 25e7db860aacc511fa106c4dd694d92bf333e100 Mon Sep 17 00:00:00 2001 From: evazion Date: Tue, 20 Jun 2017 14:55:46 -0500 Subject: [PATCH 1/4] nijie: fetch artist commentary. --- app/logical/sources/strategies/nijie.rb | 8 ++++++++ test/unit/sources/nijie_test.rb | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/app/logical/sources/strategies/nijie.rb b/app/logical/sources/strategies/nijie.rb index b5d5b15b5..5bd9b0fb7 100644 --- a/app/logical/sources/strategies/nijie.rb +++ b/app/logical/sources/strategies/nijie.rb @@ -39,10 +39,18 @@ module Sources @artist_name, @profile_url = get_profile_from_page(page) @image_url = get_image_url_from_page(page) @tags = get_tags_from_page(page) + @artist_commentary_title, @artist_commentary_desc = get_commentary_from_page(page) end protected + def get_commentary_from_page(page) + title = page.search("h2.illust_title").text + desc = page.search('meta[property="og:description"]').attr("content").value + + [title, desc] + end + def get_profile_from_page(page) links = page.search("a.name") diff --git a/test/unit/sources/nijie_test.rb b/test/unit/sources/nijie_test.rb index 776495f99..7d161c660 100644 --- a/test/unit/sources/nijie_test.rb +++ b/test/unit/sources/nijie_test.rb @@ -23,6 +23,14 @@ module Sources should "get the tags" do assert_equal([["眼鏡", "http://nijie.info/search.php?word=%E7%9C%BC%E9%8F%A1"], ["リトルウィッチアカデミア", "http://nijie.info/search.php?word=%E3%83%AA%E3%83%88%E3%83%AB%E3%82%A6%E3%82%A3%E3%83%83%E3%83%81%E3%82%A2%E3%82%AB%E3%83%87%E3%83%9F%E3%82%A2"], ["アーシュラ先生", "http://nijie.info/search.php?word=%E3%82%A2%E3%83%BC%E3%82%B7%E3%83%A5%E3%83%A9%E5%85%88%E7%94%9F"]], @site.tags) end + + should "get the commentary" do + title = "ジャージの下は" + desc = "「リトルウィッチアカデミア」から無自覚サキュバスぶりを発揮するアーシュラ先生です" + + assert_equal(title, @site.artist_commentary_title) + assert_equal(desc, @site.artist_commentary_desc) + end end context "The source site for a nijie referer url" do From 2d5fc191dd5d3ed3ec3c6fe8431815648e2aacc1 Mon Sep 17 00:00:00 2001 From: evazion Date: Tue, 20 Jun 2017 14:56:20 -0500 Subject: [PATCH 2/4] nijie: convert commentary to dtext. --- app/logical/sources/strategies/nijie.rb | 22 ++++++++++++++++++++++ test/unit/sources/nijie_test.rb | 17 +++++++++++++++++ 2 files changed, 39 insertions(+) diff --git a/app/logical/sources/strategies/nijie.rb b/app/logical/sources/strategies/nijie.rb index 5bd9b0fb7..ab1556bb4 100644 --- a/app/logical/sources/strategies/nijie.rb +++ b/app/logical/sources/strategies/nijie.rb @@ -44,6 +44,28 @@ module Sources protected + # XXX: duplicated from strategies/deviant_art.rb. + def self.to_dtext(text) + html = Nokogiri::HTML.fragment(text) + + dtext = html.children.map do |element| + case element.name + when "text" + element.content + when "strong" + "[b]#{to_dtext(element.inner_html)}[/b]" if element.inner_html.present? + when "i" + "[i]#{to_dtext(element.inner_html)}[/i]" if element.inner_html.present? + when "s" + "[s]#{to_dtext(element.inner_html)}[/s]" if element.inner_html.present? + else + to_dtext(element.inner_html) + end + end.join + + dtext + end + def get_commentary_from_page(page) title = page.search("h2.illust_title").text desc = page.search('meta[property="og:description"]').attr("content").value diff --git a/test/unit/sources/nijie_test.rb b/test/unit/sources/nijie_test.rb index 7d161c660..e7e4c6452 100644 --- a/test/unit/sources/nijie_test.rb +++ b/test/unit/sources/nijie_test.rb @@ -70,5 +70,22 @@ module Sources assert_equal("莚", @site.artist_name) end end + + context "The source site for a nijie gallery" do + setup do + @site = Sources::Site.new("http://nijie.info/view.php?id=218856") + @site.get + end + + should "get the dtext-ified commentary" do + desc = <<-EOS.strip_heredoc.chomp + foo [b]bold[/b] [i]italics[/i] [s]strike[/s] red\r + \r + http://nijie.info/view.php?id=218944 + EOS + + assert_equal(desc, @site.dtext_artist_commentary_desc) + end + end end end From 294358b4a6c7dbbd68488516f6390016daf21942 Mon Sep 17 00:00:00 2001 From: evazion Date: Tue, 20 Jun 2017 16:09:57 -0500 Subject: [PATCH 3/4] nijie: fetch image_urls for batch bookmarklet. --- app/logical/sources/strategies/nijie.rb | 32 ++++++++++++++++++------- test/unit/sources/nijie_test.rb | 19 ++++++++++++--- 2 files changed, 40 insertions(+), 11 deletions(-) diff --git a/app/logical/sources/strategies/nijie.rb b/app/logical/sources/strategies/nijie.rb index ab1556bb4..71de9869d 100644 --- a/app/logical/sources/strategies/nijie.rb +++ b/app/logical/sources/strategies/nijie.rb @@ -1,6 +1,8 @@ module Sources module Strategies class Nijie < Base + attr_reader :image_urls + def self.url_match?(url) url =~ /^https?:\/\/(?:.+?\.)?nijie\.info/ end @@ -26,6 +28,10 @@ module Sources "nijie" + $1.to_s end + def image_url + image_urls.first + end + def get page = agent.get(referer_url) @@ -37,7 +43,7 @@ module Sources end @artist_name, @profile_url = get_profile_from_page(page) - @image_url = get_image_url_from_page(page) + @image_urls = get_image_urls_from_page(page) @tags = get_tags_from_page(page) @artist_commentary_title, @artist_commentary_desc = get_commentary_from_page(page) end @@ -87,13 +93,12 @@ module Sources return [artist_name, profile_url].compact end - def get_image_url_from_page(page) - image = page.search("div#gallery a img") - - if image.any? - image[0]["src"].try(:sub, %r!^//!, "http://") - else - nil + def get_image_urls_from_page(page) + page.search("div#gallery a > img").map do |img| + # //pic01.nijie.info/__rs_l120x120/nijie_picture/diff/main/218856_0_236014_20170620101329.png + # => https://pic01.nijie.info/__rs_l120x120/nijie_picture/diff/main/218856_0_236014_20170620101329.png + url = "https:" + img.attr("src") + normalize_image_url(url) end end @@ -121,6 +126,17 @@ module Sources end end + def normalize_image_url(image_url) + # http://pic03.nijie.info/__rs_l120x120/nijie_picture/diff/main/218856_0_236014_20170620101329.png + # => http://pic03.nijie.info/nijie_picture/diff/main/218856_3_236014_20170620101331.png + if image_url =~ %r!\Ahttps?://pic\d+\.nijie\.info/__rs_l120x120/nijie_picture/diff/main/[0-9_]+\.\w+\z!i + image_url = image_url.gsub(%r!__rs_l120x120/!i, "") + end + + image_url = image_url.gsub(%r!\Ahttp:!i, "https:") + image_url + end + def agent @agent ||= begin mech = Mechanize.new diff --git a/test/unit/sources/nijie_test.rb b/test/unit/sources/nijie_test.rb index e7e4c6452..39dafb344 100644 --- a/test/unit/sources/nijie_test.rb +++ b/test/unit/sources/nijie_test.rb @@ -9,7 +9,7 @@ module Sources end should "get the image url" do - assert_equal("http://pic03.nijie.info/nijie_picture/728995_20170505014820_0.jpg", @site.image_url) + assert_equal("https://pic03.nijie.info/nijie_picture/728995_20170505014820_0.jpg", @site.image_url) end should "get the profile" do @@ -40,7 +40,7 @@ module Sources end should "get the image url" do - assert_equal("http://pic03.nijie.info/nijie_picture/728995_20170505014820_0.jpg", @site.image_url) + assert_equal("https://pic03.nijie.info/nijie_picture/728995_20170505014820_0.jpg", @site.image_url) end should "get the profile" do @@ -59,7 +59,7 @@ module Sources end should "get the image url" do - assert_equal("http://pic03.nijie.info/nijie_picture/728995_20170505014820_0.jpg", @site.image_url) + assert_equal("https://pic03.nijie.info/nijie_picture/728995_20170505014820_0.jpg", @site.image_url) end should "get the profile" do @@ -77,6 +77,19 @@ module Sources @site.get end + should "get the image urls" do + urls = %w[ + https://pic03.nijie.info/nijie_picture/236014_20170620101426_0.png + https://pic01.nijie.info/nijie_picture/diff/main/218856_0_236014_20170620101329.png + https://pic01.nijie.info/nijie_picture/diff/main/218856_1_236014_20170620101330.png + https://pic01.nijie.info/nijie_picture/diff/main/218856_2_236014_20170620101331.png + https://pic03.nijie.info/nijie_picture/diff/main/218856_3_236014_20170620101331.png + https://pic03.nijie.info/nijie_picture/diff/main/218856_4_236014_20170620101333.png + ] + + assert_equal(urls, @site.image_urls) + end + should "get the dtext-ified commentary" do desc = <<-EOS.strip_heredoc.chomp foo [b]bold[/b] [i]italics[/i] [s]strike[/s] red\r From dc36476d50b2dd43cec0a7b270ab8a5b19f39f11 Mon Sep 17 00:00:00 2001 From: evazion Date: Tue, 20 Jun 2017 17:09:57 -0500 Subject: [PATCH 4/4] nijie: don't return bogus artists when artist finder finds no match. --- app/models/artist.rb | 1 + test/unit/artist_test.rb | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/app/models/artist.rb b/app/models/artist.rb index 152ca48d7..de62be6f0 100644 --- a/app/models/artist.rb +++ b/app/models/artist.rb @@ -49,6 +49,7 @@ class Artist < ApplicationRecord break if url =~ /deviantart\.net\//i break if url =~ %r!\Ahttps?://(?:mobile\.)?twitter\.com/\Z!i break if url =~ %r!pawoo\.net/(?:web/)?$!i + break if url =~ %r!\Ahttps?://(pic\d+\.)?nijie\.info/!i end artists.inject({}) {|h, x| h[x.name] = x; h}.values.slice(0, 20) diff --git a/test/unit/artist_test.rb b/test/unit/artist_test.rb index 7cd5f06ba..b35adff80 100644 --- a/test/unit/artist_test.rb +++ b/test/unit/artist_test.rb @@ -310,6 +310,22 @@ class ArtistTest < ActiveSupport::TestCase end end + context "when finding nijie artists" do + setup do + FactoryGirl.create(:artist, :name => "evazion", :url_string => "http://nijie.info/members.php?id=236014") + FactoryGirl.create(:artist, :name => "728995", :url_string => "http://nijie.info/members.php?id=728995") + end + + should "find the artist" do + assert_artist_found("evazion", "http://nijie.info/view.php?id=218944") + assert_artist_found("728995", "http://nijie.info/view.php?id=213043") + end + + should "return nothing for unknown nijie artists" do + assert_artist_not_found("http://nijie.info/view.php?id=157953") + end + end + should "normalize its other names" do artist = FactoryGirl.create(:artist, :name => "a1", :other_names_comma => "aaa, bbb, ccc ddd") assert_equal("aaa, bbb, ccc_ddd", artist.other_names_comma)