# Image URLs: # # * https://pic03.nijie.info/nijie_picture/28310_20131101215959.jpg (page: https://www.nijie.info/view.php?id=64240) # * https://pic03.nijie.info/nijie_picture/236014_20170620101426_0.png (page: https://www.nijie.info/view.php?id=218856) # * https://pic01.nijie.info/nijie_picture/diff/main/218856_0_236014_20170620101329.png (page: http://nijie.info/view.php?id=218856) # * https://pic01.nijie.info/nijie_picture/diff/main/218856_1_236014_20170620101330.png # * https://pic05.nijie.info/nijie_picture/diff/main/559053_20180604023346_1.png (page: http://nijie.info/view_popup.php?id=265428#diff_2) # * https://pic04.nijie.info/nijie_picture/diff/main/287736_161475_20181112032855_1.png (page: http://nijie.info/view_popup.php?id=287736#diff_2) # # * https://pic.nijie.net/03/nijie_picture/236014_20170620101426_0.png (page: https://www.nijie.info/view.php?id=218856) # # Unhandled: # # * https://pic01.nijie.info/nijie_picture/20120211210359.jpg # * https://pic01.nijie.info/nijie_picture/2012021022424020120210.jpg # * https://pic01.nijie.info/nijie_picture/diff/main/2012061023480525712_0.jpg # * https://pic05.nijie.info/dojin_main/dojin_sam/1_2768_20180429004232.png # * https://pic04.nijie.info/horne_picture/diff/main/56095_20160403221810_0.jpg # * https://pic04.nijie.info/omata/4829_20161128012012.png (page: http://nijie.info/view_popup.php?id=33224#diff_3) # # Preview URLs: # # * https://pic01.nijie.info/__rs_l120x120/nijie_picture/diff/main/218856_0_236014_20170620101329.png # * https://pic03.nijie.info/__rs_l120x120/nijie_picture/236014_20170620101426_0.png # * https://pic03.nijie.info/__rs_l170x170/nijie_picture/236014_20170620101426_0.png # * https://pic03.nijie.info/__rs_l650x650/nijie_picture/236014_20170620101426_0.png # * https://pic03.nijie.info/__rs_cns350x350/nijie_picture/236014_20170620101426_0.png # * https://pic03.nijie.info/small_light(dh=150,dw=150,q=100)/nijie_picture/236014_20170620101426_0.png # # Page URLs: # # * https://nijie.info/view.php?id=167755 (deleted post) # * https://nijie.info/view.php?id=218856 # * https://nijie.info/view_popup.php?id=218856 # * https://nijie.info/view_popup.php?id=218856#diff_1 # * https://www.nijie.info/view.php?id=218856 # * https://sp.nijie.info/view.php?id=218856 # # Profile URLs # # * https://nijie.info/members.php?id=236014 # * https://nijie.info/members_illust.php?id=236014 module Sources module Strategies class Nijie < Base BASE_URL = %r{\Ahttps?://(?:[^.]+\.)?nijie\.info}i PAGE_URL = %r{#{BASE_URL}/view(?:_popup)?\.php\?id=(?\d+)}i PROFILE_URL = %r{#{BASE_URL}/members(?:_illust)?\.php\?id=(?\d+)\z}i # https://pic03.nijie.info/nijie_picture/28310_20131101215959.jpg # https://pic03.nijie.info/nijie_picture/236014_20170620101426_0.png # http://pic.nijie.net/03/nijie_picture/829001_20190620004513_0.mp4 # https://pic05.nijie.info/nijie_picture/diff/main/559053_20180604023346_1.png FILENAME1 = /(?\d+)_(?\d{14})(?:_\d+)?/i # https://pic01.nijie.info/nijie_picture/diff/main/218856_0_236014_20170620101329.png FILENAME2 = /(?\d+)_\d+_(?\d+)_(?\d{14})/i # https://pic04.nijie.info/nijie_picture/diff/main/287736_161475_20181112032855_1.png FILENAME3 = /(?\d+)_(?\d+)_(?\d{14})_\d+/i IMAGE_BASE_URL = %r{\Ahttps?://(?:pic\d+\.nijie\.info|pic\.nijie\.net)}i DIR = %r{(?:\d+/)?(?:__rs_\w+/)?nijie_picture(?:/diff/main)?} IMAGE_URL = %r{#{IMAGE_BASE_URL}/#{DIR}/#{Regexp.union(FILENAME1, FILENAME2, FILENAME3)}\.\w+\z}i def domains ["nijie.info", "nijie.net"] end def site_name "Nijie" end def image_url return to_full_image_url(url) if url.match?(IMAGE_URL) image_urls.first end def image_urls images = page&.search("div#gallery a > .mozamoza").to_a.map do |img| "https:#{img["src"]}" end images = [url] if url.match?(IMAGE_URL) && images.empty? images.map(&method(:to_full_image_url)).uniq end def preview_url return nil if image_url.blank? to_preview_url(image_url) end def preview_urls image_urls.map(&method(:to_preview_url)) end def page_url return nil if illust_id.blank? "https://nijie.info/view.php?id=#{illust_id}" end def profile_url return nil if artist_id.blank? "https://nijie.info/members.php?id=#{artist_id}" end def artist_name page&.search("a.name")&.first&.text end def artist_commentary_title page&.search("h2.illust_title")&.text end def artist_commentary_desc page&.search('#illust_text > p')&.to_html end def tags links = page&.search("div#view-tag a") || [] search_links = links.select do |node| node["href"] =~ /search\.php/ end search_links.map do |node| [node.inner_text, "https://nijie.info" + node.attr("href")] end end def tag_name "nijie" + artist_id.to_s end def self.to_dtext(text) text = text.to_s.gsub(/\r\n|\r/, "
") dtext = DText.from_html(text) do |element| if element.name == "a" && element["href"]&.start_with?("/jump.php") element["href"] = element.text end end dtext.strip end def to_full_image_url(x) x.gsub(%r{__rs_\w+/}i, "").gsub(/\Ahttp:/, "https:") end def to_preview_url(url) url.gsub(/nijie_picture/, "__rs_l170x170/nijie_picture").gsub(/\Ahttp:/, "https:") end def illust_id urls.map { |url| url[PAGE_URL, :illust_id] || url[IMAGE_URL, :illust_id] }.compact.first end def artist_id_from_url urls.map { |url| url[IMAGE_URL, :artist_id] || url[PROFILE_URL, :artist_id] }.compact.first end def artist_id_from_page page&.search("a.name")&.first&.attr("href")&.match(/members\.php\?id=(\d+)/) { $1.to_i } end def artist_id artist_id_from_url || artist_id_from_page end def normalize_for_source return if illust_id.blank? "https://nijie.info/view.php?id=#{illust_id}" end def page return nil if page_url.blank? http = Danbooru::Http.new form = { email: Danbooru.config.nijie_login, password: Danbooru.config.nijie_password } # XXX `retriable` must come after `cache` so that retries don't return cached error responses. response = http.cache(1.hour).use(retriable: { max_retries: 20 }).post("https://nijie.info/login_int.php", form: form) DanbooruLogger.info "Nijie login failed (#{url}, #{response.status})" if response.status != 200 return nil unless response.status == 200 response = http.cookies(R18: 1).cache(1.minute).get(page_url) return nil unless response.status == 200 response&.parse end memoize :page end end end