From 7f98b370ec312b0601e214b86117328385a3caa3 Mon Sep 17 00:00:00 2001 From: evazion Date: Fri, 3 Oct 2014 20:53:01 -0500 Subject: [PATCH] Fix scraping the Pixiv artist username. The artist's username is no longer contained in the image thumbnail URL on the HTML page. Get it from the Feed link instead. --- app/logical/sources/strategies/pixiv.rb | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/app/logical/sources/strategies/pixiv.rb b/app/logical/sources/strategies/pixiv.rb index 866e4781f..4df34efe6 100644 --- a/app/logical/sources/strategies/pixiv.rb +++ b/app/logical/sources/strategies/pixiv.rb @@ -20,13 +20,13 @@ module Sources end def unique_id - image_url =~ /\/img\/([^\/]+)/ - $1 + @pixiv_moniker end def get agent.get(URI.parse(normalized_url)) do |page| @artist_name, @profile_url = get_profile_from_page(page) + @pixiv_moniker = get_moniker_from_page(page) @image_url = get_image_url_from_page(page) @tags = get_tags_from_page(page) @page_count = get_page_count_from_page(page) @@ -49,6 +49,17 @@ module Sources return [artist_name, profile_url] end + def get_moniker_from_page(page) + # Feed + stacc_link = page.search("a.tab-feed").first + + if not stacc_link.nil? + stacc_link.attr("href").sub(%r!^/stacc/!i, '') + else + raise "Couldn't find Pixiv moniker in page: #{normalized_url}" + end + end + def get_image_url_from_page(page) elements = page.search("div.works_display a img").find_all do |node| node["src"] !~ /source\.pixiv\.net/