diff --git a/.rubocop.yml b/.rubocop.yml index 932920c18..371b1967c 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -119,7 +119,9 @@ Style/NumericPredicate: Style/PercentLiteralDelimiters: PreferredDelimiters: "default": "[]" - "%r": "!!" + +Style/ParallelAssignment: + Enabled: false Style/PerlBackrefs: Enabled: false diff --git a/app/logical/sources/strategies.rb b/app/logical/sources/strategies.rb index 0e2f2f82c..92232e59e 100644 --- a/app/logical/sources/strategies.rb +++ b/app/logical/sources/strategies.rb @@ -1,7 +1,7 @@ module Sources module Strategies def self.all - return [ + [ Strategies::Pixiv, Strategies::NicoSeiga, Strategies::Twitter, diff --git a/app/logical/sources/strategies/art_station.rb b/app/logical/sources/strategies/art_station.rb index 2c78826fe..6ab7445ca 100644 --- a/app/logical/sources/strategies/art_station.rb +++ b/app/logical/sources/strategies/art_station.rb @@ -22,15 +22,15 @@ module Sources::Strategies class ArtStation < Base - PROJECT1 = %r!\Ahttps?://www\.artstation\.com/artwork/(?[a-z0-9-]+)/?\z!i - PROJECT2 = %r!\Ahttps?://(?[\w-]+)\.artstation\.com/projects/(?[a-z0-9-]+)(?:/|\?[\w=-]+)?\z!i + PROJECT1 = %r{\Ahttps?://www\.artstation\.com/artwork/(?[a-z0-9-]+)/?\z}i + PROJECT2 = %r{\Ahttps?://(?[\w-]+)\.artstation\.com/projects/(?[a-z0-9-]+)(?:/|\?[\w=-]+)?\z}i PROJECT = Regexp.union(PROJECT1, PROJECT2) ARTIST1 = %r{\Ahttps?://(?[\w-]+)(?[\w-]+)/?\z}i ARTIST3 = %r{\Ahttps?://www\.artstation\.com/(?[\w-]+)/?\z}i ARTIST = Regexp.union(ARTIST1, ARTIST2, ARTIST3) - ASSET = %r!\Ahttps?://cdn\w*\.artstation\.com/p/assets/(?images|covers)/images/(?\d+/\d+/\d+)/(?[^/]+)/(?.+)\z!i + ASSET = %r{\Ahttps?://cdn\w*\.artstation\.com/p/assets/(?images|covers)/images/(?\d+/\d+/\d+)/(?[^/]+)/(?.+)\z}i attr_reader :json @@ -144,7 +144,7 @@ module Sources::Strategies urls = image_url_sizes($~[:type], $~[:id], $~[:filename]) if size == :smallest - urls = urls.reverse() + urls = urls.reverse end chosen_url = urls.find { |url| http_exists?(url, headers) } diff --git a/app/logical/sources/strategies/base.rb b/app/logical/sources/strategies/base.rb index 55bf676ff..0ee78f365 100644 --- a/app/logical/sources/strategies/base.rb +++ b/app/logical/sources/strategies/base.rb @@ -58,8 +58,8 @@ module Sources end def site_name - Addressable::URI.heuristic_parse(url).host - rescue Addressable::URI::InvalidURIError => e + Addressable::URI.heuristic_parse(url)&.host + rescue Addressable::URI::InvalidURIError nil end @@ -90,9 +90,7 @@ module Sources # eventually be assigned as the source for the post, but it does not # represent what the downloader will fetch. def page_url - Rails.logger.warn "Valid page url for (#{url}, #{referer_url}) not found" - - return nil + nil end # This will be the url stored in posts. Typically this is the page @@ -141,7 +139,7 @@ module Sources # Subclasses should merge in any required headers needed to access resources # on the site. def headers - return Danbooru.config.http_headers + Danbooru.config.http_headers end # Returns the size of the image resource without actually downloading the file. @@ -189,7 +187,7 @@ module Sources end def normalized_tags - tags.map { |tag, url| normalize_tag(tag) }.sort.uniq + tags.map { |tag, _url| normalize_tag(tag) }.sort.uniq end def normalize_tag(tag) @@ -243,7 +241,7 @@ module Sources end def to_h - return { + { :artist => { :name => artist_name, :tag_name => tag_name, diff --git a/app/logical/sources/strategies/deviant_art.rb b/app/logical/sources/strategies/deviant_art.rb index 8bb193672..41c12382a 100644 --- a/app/logical/sources/strategies/deviant_art.rb +++ b/app/logical/sources/strategies/deviant_art.rb @@ -47,18 +47,18 @@ module Sources module Strategies class DeviantArt < Base - ASSET_SUBDOMAINS = %r{(?:fc|th|pre|img|orig|origin-orig)\d*}i + ASSET_SUBDOMAINS = /(?:fc|th|pre|img|orig|origin-orig)\d*/i RESERVED_SUBDOMAINS = %r{\Ahttps?://(?:#{ASSET_SUBDOMAINS}|www)\.}i MAIN_DOMAIN = %r{\Ahttps?://(?:www\.)?deviantart.com}i - TITLE = %r{(?[a-z0-9_-]+?)}i - ARTIST = %r{(?<artist>[a-z0-9_-]+?)}i - DEVIATION_ID = %r{(?<deviation_id>[0-9]+)}i + TITLE = /(?<title>[a-z0-9_-]+?)/i + ARTIST = /(?<artist>[a-z0-9_-]+?)/i + DEVIATION_ID = /(?<deviation_id>[0-9]+)/i - DA_FILENAME_1 = %r{[a-f0-9]{32}-d(?<base36_deviation_id>[a-z0-9]+)\.}i - DA_FILENAME_2 = %r{#{TITLE}(?:_by_#{ARTIST}(?:-d(?<base36_deviation_id>[a-z0-9]+))?)?\.}i + DA_FILENAME_1 = /[a-f0-9]{32}-d(?<base36_deviation_id>[a-z0-9]+)\./i + DA_FILENAME_2 = /#{TITLE}(?:_by_#{ARTIST}(?:-d(?<base36_deviation_id>[a-z0-9]+))?)?\./i DA_FILENAME = Regexp.union(DA_FILENAME_1, DA_FILENAME_2) - WIX_FILENAME = %r{d(?<base36_deviation_id>[a-z0-9]+)[0-9a-f-]+\.\w+(?:/\w+/\w+/[\w,]+/(?<title>[\w-]+)_by_(?<artist>[\w-]+)_d\w+-\w+\.\w+)?.+}i + WIX_FILENAME = %r{d(?<base36_deviation_id>[a-z0-9]+)[0-9a-f-]+\.\w+(?:/\w+/\w+/[\w,]+/(?<title>[\w-]+)_by_(?<artist>[\w-]+)_d\w+-\w+\.\w+)?.+}i NOT_NORMALIZABLE_ASSET = %r{\Ahttps?://#{ASSET_SUBDOMAINS}\.deviantart\.net/.+/[0-9a-f]{32}(?:-[^d]\w+)?\.}i @@ -75,7 +75,7 @@ module Sources PATH_PROFILE = %r{#{MAIN_DOMAIN}/#{ARTIST}/?\z}i SUBDOMAIN_PROFILE = %r{\Ahttps?://#{ARTIST}\.deviantart\.com/?\z}i - FAVME = %r{\Ahttps?://(www\.)?fav\.me/d(?<base36_deviation_id>[a-z0-9]+)\z}i + FAVME = %r{\Ahttps?://(?:www\.)?fav\.me/d(?<base36_deviation_id>[a-z0-9]+)\z}i def domains ["deviantart.net", "deviantart.com", "fav.me"] @@ -110,12 +110,12 @@ module Sources api_deviation[:videos].max_by { |x| x[:filesize] }[:src] else src = api_deviation.dig(:content, :src) - if deviation_id && deviation_id.to_i <= 790677560 && src =~ /^https:\/\/images-wixmp-/ && src !~ /\.gif\?/ - src = src.sub(%r!(/f/[a-f0-9-]+/[a-f0-9-]+)!, '/intermediary\1') - src = src.sub(%r!/v1/(fit|fill)/.*\z!i, "") + if deviation_id && deviation_id.to_i <= 790_677_560 && src =~ %r{\Ahttps://images-wixmp-} && src !~ /\.gif\?/ + src = src.sub(%r{(/f/[a-f0-9-]+/[a-f0-9-]+)}, '/intermediary\1') + src = src.sub(%r{/v1/(fit|fill)/.*\z}i, "") end - src = src.sub(%r!\Ahttps?://orig\d+\.deviantart\.net!i, "http://origin-orig.deviantart.net") - src = src.gsub(%r!q_\d+,strp!, "q_100") + src = src.sub(%r{\Ahttps?://orig\d+\.deviantart\.net}i, "http://origin-orig.deviantart.net") + src = src.gsub(/q_\d+,strp/, "q_100") src end end @@ -191,7 +191,7 @@ module Sources # <a href="https://sa-dui.deviantart.com/journal/About-Commissions-223178193" data-sigil="thumb" class="thumb lit" ...> if element["class"].split.include?("lit") - deviation_id = element["href"][%r!-(\d+)\z!, 1].to_i + deviation_id = element["href"][/-(\d+)\z/, 1].to_i element.content = "deviantart ##{deviation_id}" else element.content = "" @@ -199,7 +199,7 @@ module Sources end if element.name == "a" && element["href"].present? - element["href"] = element["href"].gsub(%r!\Ahttps?://www\.deviantart\.com/users/outgoing\?!i, "") + element["href"] = element["href"].gsub(%r{\Ahttps?://www\.deviantart\.com/users/outgoing\?}i, "") # href may be missing the `http://` bit (ex: `inprnt.com`, `//inprnt.com`). Add it if missing. uri = Addressable::URI.heuristic_parse(element["href"]) rescue nil @@ -283,7 +283,7 @@ module Sources return nil if meta.nil? appurl = meta["content"] - uuid = appurl[%r!\ADeviantArt://deviation/(.*)\z!, 1] + uuid = appurl[%r{\ADeviantArt://deviation/(.*)\z}, 1] uuid end memoize :uuid diff --git a/app/logical/sources/strategies/hentai_foundry.rb b/app/logical/sources/strategies/hentai_foundry.rb index 5ab3ca2ed..74f570288 100644 --- a/app/logical/sources/strategies/hentai_foundry.rb +++ b/app/logical/sources/strategies/hentai_foundry.rb @@ -23,11 +23,11 @@ module Sources module Strategies class HentaiFoundry < Base - BASE_URL = %r!\Ahttps?://(?:www\.)?hentai-foundry\.com!i - PAGE_URL = %r!#{BASE_URL}/pictures/user/(?<artist_name>[\w-]+)/(?<illust_id>\d+)(?:/[\w.-]*)?(\?[\w=]*)?\z!i - OLD_PAGE = %r!#{BASE_URL}/pic-(?<illust_id>\d+)(?:\.html)?\z!i - PROFILE_URL = %r!#{BASE_URL}/(?:pictures/)?user/(?<artist_name>[\w-]+)(?:/[a-z]*)?\z!i - IMAGE_URL = %r!\Ahttps?://pictures\.hentai-foundry\.com/+\w/(?<artist_name>[\w-]+)/(?<illust_id>\d+)(?:(?:/[\w.-]+)?\.\w+)?\z!i + BASE_URL = %r{\Ahttps?://(?:www\.)?hentai-foundry\.com}i + PAGE_URL = %r{#{BASE_URL}/pictures/user/(?<artist_name>[\w-]+)/(?<illust_id>\d+)(?:/[\w.-]*)?(\?[\w=]*)?\z}i + OLD_PAGE = %r{#{BASE_URL}/pic-(?<illust_id>\d+)(?:\.html)?\z}i + PROFILE_URL = %r{#{BASE_URL}/(?:pictures/)?user/(?<artist_name>[\w-]+)(?:/[a-z]*)?\z}i + IMAGE_URL = %r{\Ahttps?://pictures\.hentai-foundry\.com/+\w/(?<artist_name>[\w-]+)/(?<illust_id>\d+)(?:(?:/[\w.-]+)?\.\w+)?\z}i def domains ["hentai-foundry.com"] diff --git a/app/logical/sources/strategies/moebooru.rb b/app/logical/sources/strategies/moebooru.rb index af149682e..9c4cc1f8d 100644 --- a/app/logical/sources/strategies/moebooru.rb +++ b/app/logical/sources/strategies/moebooru.rb @@ -32,10 +32,10 @@ module Sources module Strategies class Moebooru < Base - BASE_URL = %r!\Ahttps?://(?:[^.]+\.)?(?<domain>yande\.re|konachan\.com)!i - POST_URL = %r!#{BASE_URL}/post/show/(?<id>\d+)!i - URL_SLUG = %r!/(?:yande\.re%20|Konachan\.com%20-%20)?(?<id>\d+)?.*!i - IMAGE_URL = %r!#{BASE_URL}/(?<type>image|jpeg|sample)/(?<md5>\h{32})#{URL_SLUG}?\.(?<ext>jpg|jpeg|png|gif)\z!i + BASE_URL = %r{\Ahttps?://(?:[^.]+\.)?(?<domain>yande\.re|konachan\.com)}i + POST_URL = %r{#{BASE_URL}/post/show/(?<id>\d+)}i + URL_SLUG = %r{/(?:yande\.re%20|Konachan\.com%20-%20)?(?<id>\d+)?.*}i + IMAGE_URL = %r{#{BASE_URL}/(?<type>image|jpeg|sample)/(?<md5>\h{32})#{URL_SLUG}?\.(?<ext>jpg|jpeg|png|gif)\z}i delegate :artist_name, :profile_url, :tag_name, :artist_commentary_title, :artist_commentary_desc, :dtext_artist_commentary_title, :dtext_artist_commentary_desc, to: :sub_strategy, allow_nil: true @@ -63,7 +63,7 @@ module Sources end def preview_urls - return image_urls unless post_md5.present? + return image_urls if post_md5.blank? ["https://#{file_host}/data/preview/#{post_md5[0..1]}/#{post_md5[2..3]}/#{post_md5}.jpg"] end diff --git a/app/logical/sources/strategies/nico_seiga.rb b/app/logical/sources/strategies/nico_seiga.rb index e63094bb9..ee4efd3d5 100644 --- a/app/logical/sources/strategies/nico_seiga.rb +++ b/app/logical/sources/strategies/nico_seiga.rb @@ -38,7 +38,7 @@ module Sources module Strategies class NicoSeiga < Base - DIRECT = %r{\Ahttps?://lohas\.nicoseiga\.jp/(priv|o)/(?:\w+/\d+/)?(?<image_id>\d+)(?:\?.+)?}i + DIRECT = %r{\Ahttps?://lohas\.nicoseiga\.jp/(?:priv|o)/(?:\w+/\d+/)?(?<image_id>\d+)(?:\?.+)?}i CDN_DIRECT = %r{\Ahttps?://dcdn\.cdn\.nimg\.jp/.+/\w+/\d+/(?<image_id>\d+)}i SOURCE = %r{\Ahttps?://seiga\.nicovideo\.jp/image/source(?:/|\?id=)(?<image_id>\d+)}i diff --git a/app/logical/sources/strategies/nijie.rb b/app/logical/sources/strategies/nijie.rb index e41d47f3d..b24605168 100644 --- a/app/logical/sources/strategies/nijie.rb +++ b/app/logical/sources/strategies/nijie.rb @@ -44,25 +44,25 @@ module Sources module Strategies class Nijie < Base - BASE_URL = %r!\Ahttps?://(?:[^.]+\.)?nijie\.info!i - PAGE_URL = %r!#{BASE_URL}/view(?:_popup)?\.php\?id=(?<illust_id>\d+)!i - PROFILE_URL = %r!#{BASE_URL}/members(?:_illust)?\.php\?id=(?<artist_id>\d+)\z!i + BASE_URL = %r{\Ahttps?://(?:[^.]+\.)?nijie\.info}i + PAGE_URL = %r{#{BASE_URL}/view(?:_popup)?\.php\?id=(?<illust_id>\d+)}i + PROFILE_URL = %r{#{BASE_URL}/members(?:_illust)?\.php\?id=(?<artist_id>\d+)\z}i # https://pic03.nijie.info/nijie_picture/28310_20131101215959.jpg # https://pic03.nijie.info/nijie_picture/236014_20170620101426_0.png # http://pic.nijie.net/03/nijie_picture/829001_20190620004513_0.mp4 # https://pic05.nijie.info/nijie_picture/diff/main/559053_20180604023346_1.png - FILENAME1 = %r!(?<artist_id>\d+)_(?<timestamp>\d{14})(?:_\d+)?!i + FILENAME1 = /(?<artist_id>\d+)_(?<timestamp>\d{14})(?:_\d+)?/i # https://pic01.nijie.info/nijie_picture/diff/main/218856_0_236014_20170620101329.png - FILENAME2 = %r!(?<illust_id>\d+)_\d+_(?<artist_id>\d+)_(?<timestamp>\d{14})!i + FILENAME2 = /(?<illust_id>\d+)_\d+_(?<artist_id>\d+)_(?<timestamp>\d{14})/i # https://pic04.nijie.info/nijie_picture/diff/main/287736_161475_20181112032855_1.png - FILENAME3 = %r!(?<illust_id>\d+)_(?<artist_id>\d+)_(?<timestamp>\d{14})_\d+!i + FILENAME3 = /(?<illust_id>\d+)_(?<artist_id>\d+)_(?<timestamp>\d{14})_\d+/i - IMAGE_BASE_URL = %r!\Ahttps?://(?:pic\d+\.nijie\.info|pic\.nijie\.net)!i - DIR = %r!(?:\d+/)?(?:__rs_\w+/)?nijie_picture(?:/diff/main)?! - IMAGE_URL = %r!#{IMAGE_BASE_URL}/#{DIR}/#{Regexp.union(FILENAME1, FILENAME2, FILENAME3)}\.\w+\z!i + IMAGE_BASE_URL = %r{\Ahttps?://(?:pic\d+\.nijie\.info|pic\.nijie\.net)}i + DIR = %r{(?:\d+/)?(?:__rs_\w+/)?nijie_picture(?:/diff/main)?} + IMAGE_URL = %r{#{IMAGE_BASE_URL}/#{DIR}/#{Regexp.union(FILENAME1, FILENAME2, FILENAME3)}\.\w+\z}i def domains ["nijie.info", "nijie.net"] @@ -146,7 +146,7 @@ module Sources end def to_full_image_url(x) - x.gsub(%r!__rs_\w+/!i, "").gsub(/\Ahttp:/, "https:") + x.gsub(%r{__rs_\w+/}i, "").gsub(/\Ahttp:/, "https:") end def to_preview_url(url) @@ -186,7 +186,7 @@ module Sources doc = agent.get(page_url) end - return doc + doc rescue Mechanize::ResponseCodeError => e return nil if e.response_code.to_i == 404 raise @@ -220,13 +220,10 @@ module Sources mech.cookie_jar.add(cookie) mech - rescue Mechanize::ResponseCodeError => x - if x.response_code.to_i == 429 - sleep(5) - retry - else - raise - end + rescue Mechanize::ResponseCodeError => e + raise unless e.response_code.to_i == 429 + sleep(5) + retry end memoize :agent end diff --git a/app/logical/sources/strategies/null.rb b/app/logical/sources/strategies/null.rb index e17c5e5c4..4ce04c30c 100644 --- a/app/logical/sources/strategies/null.rb +++ b/app/logical/sources/strategies/null.rb @@ -28,7 +28,7 @@ module Sources when %r{\Ahttp://p\.twpl\.jp/show/(?:large|orig)/([a-z0-9]+)}i "http://p.twipple.jp/#{$1}" - when %r{\Ahttps?://blog(?:(?:-imgs-)?\d*(?:-origin)?)?\.fc2\.com/(?:(?:[^/]/){3}|(?:[^/]/))([^/]+)/(?:file/)?([^\.]+\.[^\?]+)}i + when %r{\Ahttps?://blog(?:(?:-imgs-)?\d*(?:-origin)?)?\.fc2\.com/(?:(?:[^/]/){3}|(?:[^/]/))([^/]+)/(?:file/)?([^.]+\.[^?]+)}i username = $1 filename = $2 "http://#{username}.blog.fc2.com/img/#{filename}/" @@ -105,7 +105,7 @@ module Sources # http://img.toranoana.jp/popup_img18/04/0010/22/87/040010228714-1p.jpg # http://img.toranoana.jp/popup_blimg/04/0030/08/30/040030083068-1p.jpg # https://ecdnimg.toranoana.jp/ec/img/04/0030/65/34/040030653417-6p.jpg - when %r{\Ahttps?://(\w+\.)?toranoana\.jp/(?:popup_(?:bl)?img\d*|ec/img)/\d{2}/\d{4}/\d{2}/\d{2}/(?<work_id>\d+)}i + when %r{\Ahttps?://(?:\w+\.)?toranoana\.jp/(?:popup_(?:bl)?img\d*|ec/img)/\d{2}/\d{4}/\d{2}/\d{2}/(?<work_id>\d+)}i "https://ec.toranoana.jp/tora_r/ec/item/#{$~[:work_id]}/" # https://a.hitomi.la/galleries/907838/1.png diff --git a/app/logical/sources/strategies/pawoo.rb b/app/logical/sources/strategies/pawoo.rb index 49a04b5ae..3af7ef8a7 100644 --- a/app/logical/sources/strategies/pawoo.rb +++ b/app/logical/sources/strategies/pawoo.rb @@ -16,13 +16,13 @@ module Sources::Strategies class Pawoo < Base - HOST = %r!\Ahttps?://(www\.)?pawoo\.net!i - IMAGE = %r!\Ahttps?://img\.pawoo\.net/media_attachments/files/(\d+/\d+/\d+)! - NAMED_PROFILE = %r!#{HOST}/@(?<artist_name>\w+)!i - ID_PROFILE = %r!#{HOST}/web/accounts/(?<artist_id>\d+)! + HOST = %r{\Ahttps?://(www\.)?pawoo\.net}i + IMAGE = %r{\Ahttps?://img\.pawoo\.net/media_attachments/files/(\d+/\d+/\d+)} + NAMED_PROFILE = %r{#{HOST}/@(?<artist_name>\w+)}i + ID_PROFILE = %r{#{HOST}/web/accounts/(?<artist_id>\d+)} - STATUS1 = %r!\A#{HOST}/web/statuses/(?<status_id>\d+)! - STATUS2 = %r!\A#{NAMED_PROFILE}/(?<status_id>\d+)! + STATUS1 = %r{\A#{HOST}/web/statuses/(?<status_id>\d+)} + STATUS2 = %r{\A#{NAMED_PROFILE}/(?<status_id>\d+)} def domains ["pawoo.net"] @@ -37,15 +37,13 @@ module Sources::Strategies end def image_urls - if url =~ %r!#{IMAGE}/small/([a-z0-9]+\.\w+)\z!i - return ["https://img.pawoo.net/media_attachments/files/#{$1}/original/#{$2}"] + if url =~ %r{#{IMAGE}/small/([a-z0-9]+\.\w+)\z}i + ["https://img.pawoo.net/media_attachments/files/#{$1}/original/#{$2}"] + elsif url =~ %r{#{IMAGE}/original/([a-z0-9]+\.\w+)\z}i + [url] + else + api_response.image_urls end - - if url =~ %r!#{IMAGE}/original/([a-z0-9]+\.\w+)\z!i - return [url] - end - - return api_response.image_urls end def page_url @@ -55,16 +53,17 @@ module Sources::Strategies end end - return super + super end def profile_url if url =~ PawooApiClient::PROFILE2 - return "https://pawoo.net/@#{$1}" + "https://pawoo.net/@#{$1}" + elsif api_response.profile_url.blank? + url + else + api_response.profile_url end - - return url if api_response.profile_url.blank? - api_response.profile_url end def artist_name @@ -87,10 +86,6 @@ module Sources::Strategies urls.map { |url| url[STATUS1, :status_id] || url[STATUS2, :status_id] }.compact.first end - def artist_commentary_title - nil - end - def artist_commentary_desc api_response.commentary end @@ -99,18 +94,10 @@ module Sources::Strategies api_response.tags end - def normalizable_for_artist_finder? - true - end - - def normalize_for_artist_finder - profile_url - end - def normalize_for_source artist_name = artist_name_from_url status_id = status_id_from_url - return unless status_id.present? + return if status_id.blank? if artist_name.present? "https://pawoo.net/@#{artist_name}/#{status_id}" @@ -131,7 +118,7 @@ module Sources::Strategies def api_response [url, referer_url].each do |x| - if client = PawooApiClient.new.get(x) + if (client = PawooApiClient.new.get(x)) return client end end diff --git a/app/logical/sources/strategies/pixiv.rb b/app/logical/sources/strategies/pixiv.rb index 5f2bbf669..ad7cd7eb1 100644 --- a/app/logical/sources/strategies/pixiv.rb +++ b/app/logical/sources/strategies/pixiv.rb @@ -50,35 +50,35 @@ module Sources module Strategies class Pixiv < Base - MONIKER = %r!(?:[a-zA-Z0-9_-]+)! - PROFILE = %r!\Ahttps?://www\.pixiv\.net/member\.php\?id=[0-9]+\z! - DATE = %r!(?<date>\d{4}/\d{2}/\d{2}/\d{2}/\d{2}/\d{2})!i - EXT = %r!(?:jpg|jpeg|png|gif)!i + MONIKER = /(?:[a-zA-Z0-9_-]+)/ + PROFILE = %r{\Ahttps?://www\.pixiv\.net/member\.php\?id=[0-9]+\z} + DATE = %r{(?<date>\d{4}/\d{2}/\d{2}/\d{2}/\d{2}/\d{2})}i + EXT = /(?:jpg|jpeg|png|gif)/i - WEB = %r!(?:\A(?:https?://)?www\.pixiv\.net)! - I12 = %r!(?:\A(?:https?://)?i[0-9]+\.pixiv\.net)! - IMG = %r!(?:\A(?:https?://)?img[0-9]*\.pixiv\.net)! - PXIMG = %r!(?:\A(?:https?://)?[^.]+\.pximg\.net)! - TOUCH = %r!(?:\A(?:https?://)?touch\.pixiv\.net)! - UGOIRA = %r!#{PXIMG}/img-zip-ugoira/img/#{DATE}/(?<illust_id>\d+)_ugoira1920x1080\.zip\z!i - ORIG_IMAGE = %r!#{PXIMG}/img-original/img/#{DATE}/(?<illust_id>\d+)_p(?<page>\d+)\.#{EXT}\z!i - STACC_PAGE = %r!\A#{WEB}/stacc/#{MONIKER}/?\z!i - NOVEL_PAGE = %r!(?:\Ahttps?://www\.pixiv\.net/novel/show\.php\?id=(\d+))! - FANBOX_ACCOUNT = %r!(?:\Ahttps?://www\.pixiv\.net/fanbox/creator/\d+\z)! - FANBOX_IMAGE = %r!(?:\Ahttps?://fanbox\.pixiv\.net/images/post/(\d+))! - FANBOX_PAGE = %r!(?:\Ahttps?://www\.pixiv\.net/fanbox/creator/\d+/post/(\d+))! + WEB = %r{(?:\A(?:https?://)?www\.pixiv\.net)} + I12 = %r{(?:\A(?:https?://)?i[0-9]+\.pixiv\.net)} + IMG = %r{(?:\A(?:https?://)?img[0-9]*\.pixiv\.net)} + PXIMG = %r{(?:\A(?:https?://)?[^.]+\.pximg\.net)} + TOUCH = %r{(?:\A(?:https?://)?touch\.pixiv\.net)} + UGOIRA = %r{#{PXIMG}/img-zip-ugoira/img/#{DATE}/(?<illust_id>\d+)_ugoira1920x1080\.zip\z}i + ORIG_IMAGE = %r{#{PXIMG}/img-original/img/#{DATE}/(?<illust_id>\d+)_p(?<page>\d+)\.#{EXT}\z}i + STACC_PAGE = %r{\A#{WEB}/stacc/#{MONIKER}/?\z}i + NOVEL_PAGE = %r{(?:\Ahttps?://www\.pixiv\.net/novel/show\.php\?id=(\d+))} + FANBOX_ACCOUNT = %r{(?:\Ahttps?://www\.pixiv\.net/fanbox/creator/\d+\z)} + FANBOX_IMAGE = %r{(?:\Ahttps?://fanbox\.pixiv\.net/images/post/(\d+))} + FANBOX_PAGE = %r{(?:\Ahttps?://www\.pixiv\.net/fanbox/creator/\d+/post/(\d+))} def self.to_dtext(text) if text.nil? return nil end - text = text.gsub(%r!https?://www\.pixiv\.net/member_illust\.php\?mode=medium&illust_id=([0-9]+)!i) do |match| + text = text.gsub(%r{https?://www\.pixiv\.net/member_illust\.php\?mode=medium&illust_id=([0-9]+)}i) do |_match| pixiv_id = $1 %(pixiv ##{pixiv_id} "ยป":[/posts?tags=pixiv:#{pixiv_id}]) end - text = text.gsub(%r!https?://www\.pixiv\.net/member\.php\?id=([0-9]+)!i) do |match| + text = text.gsub(%r{https?://www\.pixiv\.net/member\.php\?id=([0-9]+)}i) do |_match| member_id = $1 profile_url = "https://www.pixiv.net/member.php?id=#{member_id}" search_params = {"search[url_matches]" => profile_url}.to_param @@ -139,13 +139,13 @@ module Sources return "https://www.pixiv.net/artworks/#{illust_id}" end - return url + url rescue PixivApiClient::BadIDError nil end def canonical_url - return image_url + image_url end def profile_url @@ -200,7 +200,7 @@ module Sources } end - return { + { "Referer" => "https://www.pixiv.net" } end @@ -231,7 +231,7 @@ module Sources translated_tags = super(tag) if translated_tags.empty? && tag.include?("/") - translated_tags = tag.split("/").flat_map { |tag| super(tag) } + translated_tags = tag.split("/").flat_map { |translated_tag| super(translated_tag) } end translated_tags @@ -257,7 +257,7 @@ module Sources return [ugoira_zip_url] end - return metadata.pages + metadata.pages end # in order to prevent recursive loops, this method should not make any @@ -276,11 +276,11 @@ module Sources return url.query_values["illust_id"].to_i # http://www.pixiv.net/en/artworks/46324488 - elsif url.host == "www.pixiv.net" && url.path =~ %r!\A/(?:en/)?artworks/(?<illust_id>\d+)!i + elsif url.host == "www.pixiv.net" && url.path =~ %r{\A/(?:en/)?artworks/(?<illust_id>\d+)}i return $~[:illust_id].to_i # http://www.pixiv.net/i/18557054 - elsif url.host == "www.pixiv.net" && url.path =~ %r!\A/i/(?<illust_id>\d+)\z!i + elsif url.host == "www.pixiv.net" && url.path =~ %r{\A/i/(?<illust_id>\d+)\z}i return $~[:illust_id].to_i # http://img18.pixiv.net/img/evazion/14901720.png @@ -289,8 +289,8 @@ module Sources # http://i2.pixiv.net/img18/img/evazion/14901720_s.png # http://i1.pixiv.net/img07/img/pasirism/18557054_p1.png # http://i1.pixiv.net/img07/img/pasirism/18557054_big_p1.png - elsif url.host =~ %r!\A(?:i\d+|img\d+)\.pixiv\.net\z!i && - url.path =~ %r!\A(?:/img\d+)?/img/#{MONIKER}/(?<illust_id>\d+)(?:_\w+)?\.(?:jpg|jpeg|png|gif|zip)!i + elsif url.host =~ /\A(?:i\d+|img\d+)\.pixiv\.net\z/i && + url.path =~ %r{\A(?:/img\d+)?/img/#{MONIKER}/(?<illust_id>\d+)(?:_\w+)?\.(?:jpg|jpeg|png|gif|zip)}i return $~[:illust_id].to_i # http://i1.pixiv.net/img-inf/img/2011/05/01/23/28/04/18557054_64x64.jpg @@ -307,13 +307,13 @@ module Sources # # https://i.pximg.net/novel-cover-original/img/2019/01/14/01/15/05/10617324_d84daae89092d96bbe66efafec136e42.jpg # https://img-sketch.pixiv.net/uploads/medium/file/4463372/8906921629213362989.jpg - elsif url.host =~ %r!\A(?:[^.]+\.pximg\.net|i\d+\.pixiv\.net|tc-pximg01\.techorus-cdn\.com)\z!i && - url.path =~ %r!\A(/c/\w+)?/img-[a-z-]+/img/#{DATE}/(?<illust_id>\d+)(?:_\w+)?\.(?:jpg|jpeg|png|gif|zip)!i + elsif url.host =~ /\A(?:[^.]+\.pximg\.net|i\d+\.pixiv\.net|tc-pximg01\.techorus-cdn\.com)\z/i && + url.path =~ %r{\A(/c/\w+)?/img-[a-z-]+/img/#{DATE}/(?<illust_id>\d+)(?:_\w+)?\.(?:jpg|jpeg|png|gif|zip)}i return $~[:illust_id].to_i end end - return nil + nil end memoize :illust_id @@ -324,7 +324,7 @@ module Sources end end - return nil + nil end memoize :novel_id @@ -339,7 +339,7 @@ module Sources end end - return nil + nil end memoize :fanbox_id @@ -350,7 +350,7 @@ module Sources end end - return nil + nil end memoize :fanbox_account_id @@ -368,45 +368,39 @@ module Sources return PixivApiClient.new.fanbox(fanbox_id) end - return PixivApiClient.new.work(illust_id) + PixivApiClient.new.work(illust_id) end memoize :metadata def moniker # we can sometimes get the moniker from the url - if url =~ %r!#{IMG}/img/(#{MONIKER})!i - return $1 + if url =~ %r{#{IMG}/img/(#{MONIKER})}i + $1 + elsif url =~ %r{#{I12}/img[0-9]+/img/(#{MONIKER})}i + $1 + elsif url =~ %r{#{WEB}/stacc/(#{MONIKER})/?$}i + $1 + else + metadata.moniker end - - if url =~ %r!#{I12}/img[0-9]+/img/(#{MONIKER})!i - return $1 - end - - if url =~ %r!#{WEB}/stacc/(#{MONIKER})/?$!i - return $1 - end - - return metadata.moniker rescue PixivApiClient::BadIDError nil end memoize :moniker def data - return { - ugoira_frame_data: ugoira_frame_data - } + { ugoira_frame_data: ugoira_frame_data } end def ugoira_zip_url if metadata.pages.is_a?(Hash) && metadata.pages["ugoira600x600"] - return metadata.pages["ugoira600x600"].sub("_ugoira600x600.zip", "_ugoira1920x1080.zip") + metadata.pages["ugoira600x600"].sub("_ugoira600x600.zip", "_ugoira1920x1080.zip") end end memoize :ugoira_zip_url def ugoira_frame_data - return metadata.json.dig("metadata", "frames") + metadata.json.dig("metadata", "frames") rescue PixivApiClient::BadIDError nil end @@ -415,16 +409,14 @@ module Sources def ugoira_content_type case metadata.json["image_urls"].to_s when /\.jpg/ - return "image/jpeg" - + "image/jpeg" when /\.png/ - return "image/png" - + "image/png" when /\.gif/ - return "image/gif" + "image/gif" + else + raise Sources::Error, "content type not found for (#{url}, #{referer_url})" end - - raise Sources::Error.new("content type not found for (#{url}, #{referer_url})") end memoize :ugoira_content_type @@ -434,7 +426,7 @@ module Sources # http://i2.pixiv.net/img04/img/syounen_no_uta/46170939_p0.jpg # http://i1.pixiv.net/c/600x600/img-master/img/2014/09/24/23/25/08/46168376_p0_master1200.jpg # http://i1.pixiv.net/img-original/img/2014/09/25/23/09/29/46183440_p0.jpg - if url =~ %r!/\d+_p(\d+)(?:_\w+)?\.#{EXT}!i + if url =~ %r{/\d+_p(\d+)(?:_\w+)?\.#{EXT}}i return $1.to_i end @@ -445,7 +437,7 @@ module Sources end end - return nil + nil end memoize :manga_page end diff --git a/app/logical/sources/strategies/tumblr.rb b/app/logical/sources/strategies/tumblr.rb index c6d2fc5bc..6123f9f8b 100644 --- a/app/logical/sources/strategies/tumblr.rb +++ b/app/logical/sources/strategies/tumblr.rb @@ -12,19 +12,19 @@ module Sources::Strategies class Tumblr < Base SIZES = %w[1280 640 540 500h 500 400 250 100] - BASE_URL = %r!\Ahttps?://(?:[^/]+\.)*tumblr\.com!i - DOMAIN = %r{(data|(\d+\.)?media)\.tumblr\.com} - MD5 = %r{(?<md5>[0-9a-f]{32})}i - FILENAME = %r{(?<filename>(tumblr_(inline_)?)?[a-z0-9]+(_r[0-9]+)?)}i - EXT = %r{(?<ext>\w+)} + BASE_URL = %r{\Ahttps?://(?:[^/]+\.)*tumblr\.com}i + DOMAIN = /(data|(?:\d+\.)?media)\.tumblr\.com/i + MD5 = /(?<md5>[0-9a-f]{32})/i + FILENAME = /(?<filename>(?:tumblr_(?:inline_)?)?[a-z0-9]+(?:_r[0-9]+)?)/i + EXT = /(?<ext>\w+)/ # old: https://66.media.tumblr.com/2c6f55531618b4335c67e29157f5c1fc/tumblr_pz4a44xdVj1ssucdno1_1280.png # new: https://66.media.tumblr.com/168dabd09d5ad69eb5fedcf94c45c31a/3dbfaec9b9e0c2e3-72/s640x960/bf33a1324f3f36d2dc64f011bfeab4867da62bc8.png - OLD_IMAGE = %r!\Ahttps?://#{DOMAIN}/(?<dir>#{MD5}/)?#{FILENAME}_(?<size>\w+)\.#{EXT}\z!i + OLD_IMAGE = %r{\Ahttps?://#{DOMAIN}/(?<dir>#{MD5}/)?#{FILENAME}_(?<size>\w+)\.#{EXT}\z}i - IMAGE = %r!\Ahttps?://#{DOMAIN}/!i - VIDEO = %r!\Ahttps?://(?:vtt|ve\.media)\.tumblr\.com/!i - POST = %r!\Ahttps?://(?<blog_name>[^.]+)\.tumblr\.com/(?:post|image)/(?<post_id>\d+)!i + IMAGE = %r{\Ahttps?://#{DOMAIN}/}i + VIDEO = %r{\Ahttps?://(?:vtt|ve\.media)\.tumblr\.com/}i + POST = %r{\Ahttps?://(?<blog_name>[^.]+)\.tumblr\.com/(?:post|image)/(?<post_id>\d+)}i def self.enabled? Danbooru.config.tumblr_consumer_key.present? @@ -68,7 +68,7 @@ module Sources::Strategies def preview_urls image_urls.map do |x| - x.sub(%r!_1280\.(jpg|png|gif|jpeg)\z!, '_250.\1') + x.sub(/_1280\.(jpg|png|gif|jpeg)\z/, '_250.\1') end end diff --git a/app/logical/sources/strategies/twitter.rb b/app/logical/sources/strategies/twitter.rb index 69f363363..a51e162ea 100644 --- a/app/logical/sources/strategies/twitter.rb +++ b/app/logical/sources/strategies/twitter.rb @@ -1,20 +1,20 @@ module Sources::Strategies class Twitter < Base - PAGE = %r!\Ahttps?://(?:mobile\.)?twitter\.com!i - PROFILE = %r!\Ahttps?://(?:mobile\.)?twitter.com/(?<username>[a-z0-9_]+)!i + PAGE = %r{\Ahttps?://(?:mobile\.)?twitter\.com}i + PROFILE = %r{\Ahttps?://(?:mobile\.)?twitter.com/(?<username>[a-z0-9_]+)}i # https://pbs.twimg.com/media/EBGbJe_U8AA4Ekb.jpg # https://pbs.twimg.com/media/EBGbJe_U8AA4Ekb?format=jpg&name=900x900 # https://pbs.twimg.com/tweet_video_thumb/ETkN_L3X0AMy1aT.jpg # https://pbs.twimg.com/ext_tw_video_thumb/1243725361986375680/pu/img/JDA7g7lcw7wK-PIv.jpg # https://pbs.twimg.com/amplify_video_thumb/1215590775364259840/img/lolCkEEioFZTb5dl.jpg - BASE_IMAGE_URL = %r!\Ahttps?://pbs\.twimg\.com/(?<media_type>media|tweet_video_thumb|ext_tw_video_thumb|amplify_video_thumb)!i - FILENAME1 = %r!(?<file_name>[a-zA-Z0-9_-]+)\.(?<file_ext>\w+)!i - FILENAME2 = %r!(?<file_name>[a-zA-Z0-9_-]+)\?.*format=(?<file_ext>\w+)!i - FILEPATH1 = %r!(?<file_path>\d+/[\w_-]+/img)!i - FILEPATH2 = %r!(?<file_path>\d+/img)!i - IMAGE_URL1 = %r!#{BASE_IMAGE_URL}/#{Regexp.union(FILENAME1, FILENAME2)}!i - IMAGE_URL2 = %r!#{BASE_IMAGE_URL}/#{Regexp.union(FILEPATH1, FILEPATH2)}/#{FILENAME1}!i + BASE_IMAGE_URL = %r{\Ahttps?://pbs\.twimg\.com/(?<media_type>media|tweet_video_thumb|ext_tw_video_thumb|amplify_video_thumb)}i + FILENAME1 = /(?<file_name>[a-zA-Z0-9_-]+)\.(?<file_ext>\w+)/i + FILENAME2 = /(?<file_name>[a-zA-Z0-9_-]+)\?.*format=(?<file_ext>\w+)/i + FILEPATH1 = %r{(?<file_path>\d+/[\w_-]+/img)}i + FILEPATH2 = %r{(?<file_path>\d+/img)}i + IMAGE_URL1 = %r{#{BASE_IMAGE_URL}/#{Regexp.union(FILENAME1, FILENAME2)}}i + IMAGE_URL2 = %r{#{BASE_IMAGE_URL}/#{Regexp.union(FILEPATH1, FILEPATH2)}/#{FILENAME1}}i # Twitter provides a list but it's inaccurate; some names ('intent') aren't # included and other names in the list aren't actually reserved. @@ -47,7 +47,7 @@ module Sources::Strategies return $1 end - return nil + nil end def self.artist_name_from_url(url) @@ -78,7 +78,7 @@ module Sources::Strategies elsif media[:type].in?(["video", "animated_gif"]) variants = media.dig(:video_info, :variants) videos = variants.select { |variant| variant[:content_type] == "video/mp4" } - video = videos.max_by { |video| video[:bitrate].to_i } + video = videos.max_by { |v| v[:bitrate].to_i } video[:url] end end @@ -137,10 +137,6 @@ module Sources::Strategies api_response[:full_text].to_s end - def normalizable_for_artist_finder? - url =~ PAGE - end - def normalize_for_artist_finder profile_url.try(:downcase).presence || url end @@ -193,9 +189,9 @@ module Sources::Strategies desc = artist_commentary_desc.unicode_normalize(:nfkc) desc = CGI.unescapeHTML(desc) - desc = desc.gsub(%r!https?://t\.co/[a-zA-Z0-9]+!i, url_replacements) - desc = desc.gsub(%r!#([^[:space:]]+)!, '"#\\1":[https://twitter.com/hashtag/\\1]') - desc = desc.gsub(%r!@([a-zA-Z0-9_]+)!, '"@\\1":[https://twitter.com/\\1]') + desc = desc.gsub(%r{https?://t\.co/[a-zA-Z0-9]+}i, url_replacements) + desc = desc.gsub(/#([^[:space:]]+)/, '"#\\1":[https://twitter.com/hashtag/\\1]') + desc = desc.gsub(/@([a-zA-Z0-9_]+)/, '"@\\1":[https://twitter.com/\\1]') desc.strip end @@ -204,7 +200,7 @@ module Sources::Strategies end def api_response - return {} if !self.class.enabled? + return {} unless self.class.enabled? api_client.status(status_id) end diff --git a/app/logical/sources/strategies/weibo.rb b/app/logical/sources/strategies/weibo.rb index 8cad13cd1..eaac6e40f 100644 --- a/app/logical/sources/strategies/weibo.rb +++ b/app/logical/sources/strategies/weibo.rb @@ -38,7 +38,7 @@ module Sources PAGE_URL_1 = %r{\Ahttps?://(?:www\.)?weibo\.com/(?<artist_short_id>\d+)/(?<illust_base62_id>\w+)(?:\?.*)?\z}i PAGE_URL_2 = %r{#{PROFILE_URL_2}/(?:wbphotos/large/mid|talbum/detail/photo_id)/(?<illust_long_id>\d+)(?:/pid/(?<image_id>\w{32}))?}i - PAGE_URL_3 = %r{\Ahttps?://m\.weibo\.cn/(detail/(?<illust_long_id>\d+)|status/(?<illust_base62_id>\w+))}i + PAGE_URL_3 = %r{\Ahttps?://m\.weibo\.cn/(?:detail/(?<illust_long_id>\d+)|status/(?<illust_base62_id>\w+))}i PAGE_URL_4 = %r{\Ahttps?://tw\.weibo\.com/(?:(?<artist_short_id>\d+)|\w+)/(?<illust_long_id>\d+)}i IMAGE_URL = %r{\Ahttps?://\w{3}\.sinaimg\.cn/\w+/(?<image_id>\w{32})\.}i @@ -203,12 +203,12 @@ module Sources end def api_response - return nil if mobile_url.blank? + return {} if mobile_url.blank? resp = Danbooru::Http.cache(1.minute).get(mobile_url) json_string = resp.to_s[/var \$render_data = \[(.*)\]\[0\]/m, 1] - return nil if json_string.blank? + return {} if json_string.blank? JSON.parse(json_string)["status"] end