sources: fix rubocop warnings.

This commit is contained in:
evazion
2020-06-15 23:30:29 -05:00
parent 049f33916b
commit 1aa0f65187
15 changed files with 158 additions and 186 deletions

View File

@@ -119,7 +119,9 @@ Style/NumericPredicate:
Style/PercentLiteralDelimiters: Style/PercentLiteralDelimiters:
PreferredDelimiters: PreferredDelimiters:
"default": "[]" "default": "[]"
"%r": "!!"
Style/ParallelAssignment:
Enabled: false
Style/PerlBackrefs: Style/PerlBackrefs:
Enabled: false Enabled: false

View File

@@ -1,7 +1,7 @@
module Sources module Sources
module Strategies module Strategies
def self.all def self.all
return [ [
Strategies::Pixiv, Strategies::Pixiv,
Strategies::NicoSeiga, Strategies::NicoSeiga,
Strategies::Twitter, Strategies::Twitter,

View File

@@ -22,15 +22,15 @@
module Sources::Strategies module Sources::Strategies
class ArtStation < Base class ArtStation < Base
PROJECT1 = %r!\Ahttps?://www\.artstation\.com/artwork/(?<project_id>[a-z0-9-]+)/?\z!i PROJECT1 = %r{\Ahttps?://www\.artstation\.com/artwork/(?<project_id>[a-z0-9-]+)/?\z}i
PROJECT2 = %r!\Ahttps?://(?<artist_name>[\w-]+)\.artstation\.com/projects/(?<project_id>[a-z0-9-]+)(?:/|\?[\w=-]+)?\z!i PROJECT2 = %r{\Ahttps?://(?<artist_name>[\w-]+)\.artstation\.com/projects/(?<project_id>[a-z0-9-]+)(?:/|\?[\w=-]+)?\z}i
PROJECT = Regexp.union(PROJECT1, PROJECT2) PROJECT = Regexp.union(PROJECT1, PROJECT2)
ARTIST1 = %r{\Ahttps?://(?<artist_name>[\w-]+)(?<!www)\.artstation\.com/?\z}i ARTIST1 = %r{\Ahttps?://(?<artist_name>[\w-]+)(?<!www)\.artstation\.com/?\z}i
ARTIST2 = %r{\Ahttps?://www\.artstation\.com/artist/(?<artist_name>[\w-]+)/?\z}i ARTIST2 = %r{\Ahttps?://www\.artstation\.com/artist/(?<artist_name>[\w-]+)/?\z}i
ARTIST3 = %r{\Ahttps?://www\.artstation\.com/(?<artist_name>[\w-]+)/?\z}i ARTIST3 = %r{\Ahttps?://www\.artstation\.com/(?<artist_name>[\w-]+)/?\z}i
ARTIST = Regexp.union(ARTIST1, ARTIST2, ARTIST3) ARTIST = Regexp.union(ARTIST1, ARTIST2, ARTIST3)
ASSET = %r!\Ahttps?://cdn\w*\.artstation\.com/p/assets/(?<type>images|covers)/images/(?<id>\d+/\d+/\d+)/(?<size>[^/]+)/(?<filename>.+)\z!i ASSET = %r{\Ahttps?://cdn\w*\.artstation\.com/p/assets/(?<type>images|covers)/images/(?<id>\d+/\d+/\d+)/(?<size>[^/]+)/(?<filename>.+)\z}i
attr_reader :json attr_reader :json
@@ -144,7 +144,7 @@ module Sources::Strategies
urls = image_url_sizes($~[:type], $~[:id], $~[:filename]) urls = image_url_sizes($~[:type], $~[:id], $~[:filename])
if size == :smallest if size == :smallest
urls = urls.reverse() urls = urls.reverse
end end
chosen_url = urls.find { |url| http_exists?(url, headers) } chosen_url = urls.find { |url| http_exists?(url, headers) }

View File

@@ -58,8 +58,8 @@ module Sources
end end
def site_name def site_name
Addressable::URI.heuristic_parse(url).host Addressable::URI.heuristic_parse(url)&.host
rescue Addressable::URI::InvalidURIError => e rescue Addressable::URI::InvalidURIError
nil nil
end end
@@ -90,9 +90,7 @@ module Sources
# eventually be assigned as the source for the post, but it does not # eventually be assigned as the source for the post, but it does not
# represent what the downloader will fetch. # represent what the downloader will fetch.
def page_url def page_url
Rails.logger.warn "Valid page url for (#{url}, #{referer_url}) not found" nil
return nil
end end
# This will be the url stored in posts. Typically this is the page # This will be the url stored in posts. Typically this is the page
@@ -141,7 +139,7 @@ module Sources
# Subclasses should merge in any required headers needed to access resources # Subclasses should merge in any required headers needed to access resources
# on the site. # on the site.
def headers def headers
return Danbooru.config.http_headers Danbooru.config.http_headers
end end
# Returns the size of the image resource without actually downloading the file. # Returns the size of the image resource without actually downloading the file.
@@ -189,7 +187,7 @@ module Sources
end end
def normalized_tags def normalized_tags
tags.map { |tag, url| normalize_tag(tag) }.sort.uniq tags.map { |tag, _url| normalize_tag(tag) }.sort.uniq
end end
def normalize_tag(tag) def normalize_tag(tag)
@@ -243,7 +241,7 @@ module Sources
end end
def to_h def to_h
return { {
:artist => { :artist => {
:name => artist_name, :name => artist_name,
:tag_name => tag_name, :tag_name => tag_name,

View File

@@ -47,18 +47,18 @@
module Sources module Sources
module Strategies module Strategies
class DeviantArt < Base class DeviantArt < Base
ASSET_SUBDOMAINS = %r{(?:fc|th|pre|img|orig|origin-orig)\d*}i ASSET_SUBDOMAINS = /(?:fc|th|pre|img|orig|origin-orig)\d*/i
RESERVED_SUBDOMAINS = %r{\Ahttps?://(?:#{ASSET_SUBDOMAINS}|www)\.}i RESERVED_SUBDOMAINS = %r{\Ahttps?://(?:#{ASSET_SUBDOMAINS}|www)\.}i
MAIN_DOMAIN = %r{\Ahttps?://(?:www\.)?deviantart.com}i MAIN_DOMAIN = %r{\Ahttps?://(?:www\.)?deviantart.com}i
TITLE = %r{(?<title>[a-z0-9_-]+?)}i TITLE = /(?<title>[a-z0-9_-]+?)/i
ARTIST = %r{(?<artist>[a-z0-9_-]+?)}i ARTIST = /(?<artist>[a-z0-9_-]+?)/i
DEVIATION_ID = %r{(?<deviation_id>[0-9]+)}i DEVIATION_ID = /(?<deviation_id>[0-9]+)/i
DA_FILENAME_1 = %r{[a-f0-9]{32}-d(?<base36_deviation_id>[a-z0-9]+)\.}i DA_FILENAME_1 = /[a-f0-9]{32}-d(?<base36_deviation_id>[a-z0-9]+)\./i
DA_FILENAME_2 = %r{#{TITLE}(?:_by_#{ARTIST}(?:-d(?<base36_deviation_id>[a-z0-9]+))?)?\.}i DA_FILENAME_2 = /#{TITLE}(?:_by_#{ARTIST}(?:-d(?<base36_deviation_id>[a-z0-9]+))?)?\./i
DA_FILENAME = Regexp.union(DA_FILENAME_1, DA_FILENAME_2) DA_FILENAME = Regexp.union(DA_FILENAME_1, DA_FILENAME_2)
WIX_FILENAME = %r{d(?<base36_deviation_id>[a-z0-9]+)[0-9a-f-]+\.\w+(?:/\w+/\w+/[\w,]+/(?<title>[\w-]+)_by_(?<artist>[\w-]+)_d\w+-\w+\.\w+)?.+}i WIX_FILENAME = %r{d(?<base36_deviation_id>[a-z0-9]+)[0-9a-f-]+\.\w+(?:/\w+/\w+/[\w,]+/(?<title>[\w-]+)_by_(?<artist>[\w-]+)_d\w+-\w+\.\w+)?.+}i
NOT_NORMALIZABLE_ASSET = %r{\Ahttps?://#{ASSET_SUBDOMAINS}\.deviantart\.net/.+/[0-9a-f]{32}(?:-[^d]\w+)?\.}i NOT_NORMALIZABLE_ASSET = %r{\Ahttps?://#{ASSET_SUBDOMAINS}\.deviantart\.net/.+/[0-9a-f]{32}(?:-[^d]\w+)?\.}i
@@ -75,7 +75,7 @@ module Sources
PATH_PROFILE = %r{#{MAIN_DOMAIN}/#{ARTIST}/?\z}i PATH_PROFILE = %r{#{MAIN_DOMAIN}/#{ARTIST}/?\z}i
SUBDOMAIN_PROFILE = %r{\Ahttps?://#{ARTIST}\.deviantart\.com/?\z}i SUBDOMAIN_PROFILE = %r{\Ahttps?://#{ARTIST}\.deviantart\.com/?\z}i
FAVME = %r{\Ahttps?://(www\.)?fav\.me/d(?<base36_deviation_id>[a-z0-9]+)\z}i FAVME = %r{\Ahttps?://(?:www\.)?fav\.me/d(?<base36_deviation_id>[a-z0-9]+)\z}i
def domains def domains
["deviantart.net", "deviantart.com", "fav.me"] ["deviantart.net", "deviantart.com", "fav.me"]
@@ -110,12 +110,12 @@ module Sources
api_deviation[:videos].max_by { |x| x[:filesize] }[:src] api_deviation[:videos].max_by { |x| x[:filesize] }[:src]
else else
src = api_deviation.dig(:content, :src) src = api_deviation.dig(:content, :src)
if deviation_id && deviation_id.to_i <= 790677560 && src =~ /^https:\/\/images-wixmp-/ && src !~ /\.gif\?/ if deviation_id && deviation_id.to_i <= 790_677_560 && src =~ %r{\Ahttps://images-wixmp-} && src !~ /\.gif\?/
src = src.sub(%r!(/f/[a-f0-9-]+/[a-f0-9-]+)!, '/intermediary\1') src = src.sub(%r{(/f/[a-f0-9-]+/[a-f0-9-]+)}, '/intermediary\1')
src = src.sub(%r!/v1/(fit|fill)/.*\z!i, "") src = src.sub(%r{/v1/(fit|fill)/.*\z}i, "")
end end
src = src.sub(%r!\Ahttps?://orig\d+\.deviantart\.net!i, "http://origin-orig.deviantart.net") src = src.sub(%r{\Ahttps?://orig\d+\.deviantart\.net}i, "http://origin-orig.deviantart.net")
src = src.gsub(%r!q_\d+,strp!, "q_100") src = src.gsub(/q_\d+,strp/, "q_100")
src src
end end
end end
@@ -191,7 +191,7 @@ module Sources
# <a href="https://sa-dui.deviantart.com/journal/About-Commissions-223178193" data-sigil="thumb" class="thumb lit" ...> # <a href="https://sa-dui.deviantart.com/journal/About-Commissions-223178193" data-sigil="thumb" class="thumb lit" ...>
if element["class"].split.include?("lit") if element["class"].split.include?("lit")
deviation_id = element["href"][%r!-(\d+)\z!, 1].to_i deviation_id = element["href"][/-(\d+)\z/, 1].to_i
element.content = "deviantart ##{deviation_id}" element.content = "deviantart ##{deviation_id}"
else else
element.content = "" element.content = ""
@@ -199,7 +199,7 @@ module Sources
end end
if element.name == "a" && element["href"].present? if element.name == "a" && element["href"].present?
element["href"] = element["href"].gsub(%r!\Ahttps?://www\.deviantart\.com/users/outgoing\?!i, "") element["href"] = element["href"].gsub(%r{\Ahttps?://www\.deviantart\.com/users/outgoing\?}i, "")
# href may be missing the `http://` bit (ex: `inprnt.com`, `//inprnt.com`). Add it if missing. # href may be missing the `http://` bit (ex: `inprnt.com`, `//inprnt.com`). Add it if missing.
uri = Addressable::URI.heuristic_parse(element["href"]) rescue nil uri = Addressable::URI.heuristic_parse(element["href"]) rescue nil
@@ -283,7 +283,7 @@ module Sources
return nil if meta.nil? return nil if meta.nil?
appurl = meta["content"] appurl = meta["content"]
uuid = appurl[%r!\ADeviantArt://deviation/(.*)\z!, 1] uuid = appurl[%r{\ADeviantArt://deviation/(.*)\z}, 1]
uuid uuid
end end
memoize :uuid memoize :uuid

View File

@@ -23,11 +23,11 @@
module Sources module Sources
module Strategies module Strategies
class HentaiFoundry < Base class HentaiFoundry < Base
BASE_URL = %r!\Ahttps?://(?:www\.)?hentai-foundry\.com!i BASE_URL = %r{\Ahttps?://(?:www\.)?hentai-foundry\.com}i
PAGE_URL = %r!#{BASE_URL}/pictures/user/(?<artist_name>[\w-]+)/(?<illust_id>\d+)(?:/[\w.-]*)?(\?[\w=]*)?\z!i PAGE_URL = %r{#{BASE_URL}/pictures/user/(?<artist_name>[\w-]+)/(?<illust_id>\d+)(?:/[\w.-]*)?(\?[\w=]*)?\z}i
OLD_PAGE = %r!#{BASE_URL}/pic-(?<illust_id>\d+)(?:\.html)?\z!i OLD_PAGE = %r{#{BASE_URL}/pic-(?<illust_id>\d+)(?:\.html)?\z}i
PROFILE_URL = %r!#{BASE_URL}/(?:pictures/)?user/(?<artist_name>[\w-]+)(?:/[a-z]*)?\z!i PROFILE_URL = %r{#{BASE_URL}/(?:pictures/)?user/(?<artist_name>[\w-]+)(?:/[a-z]*)?\z}i
IMAGE_URL = %r!\Ahttps?://pictures\.hentai-foundry\.com/+\w/(?<artist_name>[\w-]+)/(?<illust_id>\d+)(?:(?:/[\w.-]+)?\.\w+)?\z!i IMAGE_URL = %r{\Ahttps?://pictures\.hentai-foundry\.com/+\w/(?<artist_name>[\w-]+)/(?<illust_id>\d+)(?:(?:/[\w.-]+)?\.\w+)?\z}i
def domains def domains
["hentai-foundry.com"] ["hentai-foundry.com"]

View File

@@ -32,10 +32,10 @@
module Sources module Sources
module Strategies module Strategies
class Moebooru < Base class Moebooru < Base
BASE_URL = %r!\Ahttps?://(?:[^.]+\.)?(?<domain>yande\.re|konachan\.com)!i BASE_URL = %r{\Ahttps?://(?:[^.]+\.)?(?<domain>yande\.re|konachan\.com)}i
POST_URL = %r!#{BASE_URL}/post/show/(?<id>\d+)!i POST_URL = %r{#{BASE_URL}/post/show/(?<id>\d+)}i
URL_SLUG = %r!/(?:yande\.re%20|Konachan\.com%20-%20)?(?<id>\d+)?.*!i URL_SLUG = %r{/(?:yande\.re%20|Konachan\.com%20-%20)?(?<id>\d+)?.*}i
IMAGE_URL = %r!#{BASE_URL}/(?<type>image|jpeg|sample)/(?<md5>\h{32})#{URL_SLUG}?\.(?<ext>jpg|jpeg|png|gif)\z!i IMAGE_URL = %r{#{BASE_URL}/(?<type>image|jpeg|sample)/(?<md5>\h{32})#{URL_SLUG}?\.(?<ext>jpg|jpeg|png|gif)\z}i
delegate :artist_name, :profile_url, :tag_name, :artist_commentary_title, :artist_commentary_desc, :dtext_artist_commentary_title, :dtext_artist_commentary_desc, to: :sub_strategy, allow_nil: true delegate :artist_name, :profile_url, :tag_name, :artist_commentary_title, :artist_commentary_desc, :dtext_artist_commentary_title, :dtext_artist_commentary_desc, to: :sub_strategy, allow_nil: true
@@ -63,7 +63,7 @@ module Sources
end end
def preview_urls def preview_urls
return image_urls unless post_md5.present? return image_urls if post_md5.blank?
["https://#{file_host}/data/preview/#{post_md5[0..1]}/#{post_md5[2..3]}/#{post_md5}.jpg"] ["https://#{file_host}/data/preview/#{post_md5[0..1]}/#{post_md5[2..3]}/#{post_md5}.jpg"]
end end

View File

@@ -38,7 +38,7 @@
module Sources module Sources
module Strategies module Strategies
class NicoSeiga < Base class NicoSeiga < Base
DIRECT = %r{\Ahttps?://lohas\.nicoseiga\.jp/(priv|o)/(?:\w+/\d+/)?(?<image_id>\d+)(?:\?.+)?}i DIRECT = %r{\Ahttps?://lohas\.nicoseiga\.jp/(?:priv|o)/(?:\w+/\d+/)?(?<image_id>\d+)(?:\?.+)?}i
CDN_DIRECT = %r{\Ahttps?://dcdn\.cdn\.nimg\.jp/.+/\w+/\d+/(?<image_id>\d+)}i CDN_DIRECT = %r{\Ahttps?://dcdn\.cdn\.nimg\.jp/.+/\w+/\d+/(?<image_id>\d+)}i
SOURCE = %r{\Ahttps?://seiga\.nicovideo\.jp/image/source(?:/|\?id=)(?<image_id>\d+)}i SOURCE = %r{\Ahttps?://seiga\.nicovideo\.jp/image/source(?:/|\?id=)(?<image_id>\d+)}i

View File

@@ -44,25 +44,25 @@
module Sources module Sources
module Strategies module Strategies
class Nijie < Base class Nijie < Base
BASE_URL = %r!\Ahttps?://(?:[^.]+\.)?nijie\.info!i BASE_URL = %r{\Ahttps?://(?:[^.]+\.)?nijie\.info}i
PAGE_URL = %r!#{BASE_URL}/view(?:_popup)?\.php\?id=(?<illust_id>\d+)!i PAGE_URL = %r{#{BASE_URL}/view(?:_popup)?\.php\?id=(?<illust_id>\d+)}i
PROFILE_URL = %r!#{BASE_URL}/members(?:_illust)?\.php\?id=(?<artist_id>\d+)\z!i PROFILE_URL = %r{#{BASE_URL}/members(?:_illust)?\.php\?id=(?<artist_id>\d+)\z}i
# https://pic03.nijie.info/nijie_picture/28310_20131101215959.jpg # https://pic03.nijie.info/nijie_picture/28310_20131101215959.jpg
# https://pic03.nijie.info/nijie_picture/236014_20170620101426_0.png # https://pic03.nijie.info/nijie_picture/236014_20170620101426_0.png
# http://pic.nijie.net/03/nijie_picture/829001_20190620004513_0.mp4 # http://pic.nijie.net/03/nijie_picture/829001_20190620004513_0.mp4
# https://pic05.nijie.info/nijie_picture/diff/main/559053_20180604023346_1.png # https://pic05.nijie.info/nijie_picture/diff/main/559053_20180604023346_1.png
FILENAME1 = %r!(?<artist_id>\d+)_(?<timestamp>\d{14})(?:_\d+)?!i FILENAME1 = /(?<artist_id>\d+)_(?<timestamp>\d{14})(?:_\d+)?/i
# https://pic01.nijie.info/nijie_picture/diff/main/218856_0_236014_20170620101329.png # https://pic01.nijie.info/nijie_picture/diff/main/218856_0_236014_20170620101329.png
FILENAME2 = %r!(?<illust_id>\d+)_\d+_(?<artist_id>\d+)_(?<timestamp>\d{14})!i FILENAME2 = /(?<illust_id>\d+)_\d+_(?<artist_id>\d+)_(?<timestamp>\d{14})/i
# https://pic04.nijie.info/nijie_picture/diff/main/287736_161475_20181112032855_1.png # https://pic04.nijie.info/nijie_picture/diff/main/287736_161475_20181112032855_1.png
FILENAME3 = %r!(?<illust_id>\d+)_(?<artist_id>\d+)_(?<timestamp>\d{14})_\d+!i FILENAME3 = /(?<illust_id>\d+)_(?<artist_id>\d+)_(?<timestamp>\d{14})_\d+/i
IMAGE_BASE_URL = %r!\Ahttps?://(?:pic\d+\.nijie\.info|pic\.nijie\.net)!i IMAGE_BASE_URL = %r{\Ahttps?://(?:pic\d+\.nijie\.info|pic\.nijie\.net)}i
DIR = %r!(?:\d+/)?(?:__rs_\w+/)?nijie_picture(?:/diff/main)?! DIR = %r{(?:\d+/)?(?:__rs_\w+/)?nijie_picture(?:/diff/main)?}
IMAGE_URL = %r!#{IMAGE_BASE_URL}/#{DIR}/#{Regexp.union(FILENAME1, FILENAME2, FILENAME3)}\.\w+\z!i IMAGE_URL = %r{#{IMAGE_BASE_URL}/#{DIR}/#{Regexp.union(FILENAME1, FILENAME2, FILENAME3)}\.\w+\z}i
def domains def domains
["nijie.info", "nijie.net"] ["nijie.info", "nijie.net"]
@@ -146,7 +146,7 @@ module Sources
end end
def to_full_image_url(x) def to_full_image_url(x)
x.gsub(%r!__rs_\w+/!i, "").gsub(/\Ahttp:/, "https:") x.gsub(%r{__rs_\w+/}i, "").gsub(/\Ahttp:/, "https:")
end end
def to_preview_url(url) def to_preview_url(url)
@@ -186,7 +186,7 @@ module Sources
doc = agent.get(page_url) doc = agent.get(page_url)
end end
return doc doc
rescue Mechanize::ResponseCodeError => e rescue Mechanize::ResponseCodeError => e
return nil if e.response_code.to_i == 404 return nil if e.response_code.to_i == 404
raise raise
@@ -220,13 +220,10 @@ module Sources
mech.cookie_jar.add(cookie) mech.cookie_jar.add(cookie)
mech mech
rescue Mechanize::ResponseCodeError => x rescue Mechanize::ResponseCodeError => e
if x.response_code.to_i == 429 raise unless e.response_code.to_i == 429
sleep(5) sleep(5)
retry retry
else
raise
end
end end
memoize :agent memoize :agent
end end

View File

@@ -28,7 +28,7 @@ module Sources
when %r{\Ahttp://p\.twpl\.jp/show/(?:large|orig)/([a-z0-9]+)}i when %r{\Ahttp://p\.twpl\.jp/show/(?:large|orig)/([a-z0-9]+)}i
"http://p.twipple.jp/#{$1}" "http://p.twipple.jp/#{$1}"
when %r{\Ahttps?://blog(?:(?:-imgs-)?\d*(?:-origin)?)?\.fc2\.com/(?:(?:[^/]/){3}|(?:[^/]/))([^/]+)/(?:file/)?([^\.]+\.[^\?]+)}i when %r{\Ahttps?://blog(?:(?:-imgs-)?\d*(?:-origin)?)?\.fc2\.com/(?:(?:[^/]/){3}|(?:[^/]/))([^/]+)/(?:file/)?([^.]+\.[^?]+)}i
username = $1 username = $1
filename = $2 filename = $2
"http://#{username}.blog.fc2.com/img/#{filename}/" "http://#{username}.blog.fc2.com/img/#{filename}/"
@@ -105,7 +105,7 @@ module Sources
# http://img.toranoana.jp/popup_img18/04/0010/22/87/040010228714-1p.jpg # http://img.toranoana.jp/popup_img18/04/0010/22/87/040010228714-1p.jpg
# http://img.toranoana.jp/popup_blimg/04/0030/08/30/040030083068-1p.jpg # http://img.toranoana.jp/popup_blimg/04/0030/08/30/040030083068-1p.jpg
# https://ecdnimg.toranoana.jp/ec/img/04/0030/65/34/040030653417-6p.jpg # https://ecdnimg.toranoana.jp/ec/img/04/0030/65/34/040030653417-6p.jpg
when %r{\Ahttps?://(\w+\.)?toranoana\.jp/(?:popup_(?:bl)?img\d*|ec/img)/\d{2}/\d{4}/\d{2}/\d{2}/(?<work_id>\d+)}i when %r{\Ahttps?://(?:\w+\.)?toranoana\.jp/(?:popup_(?:bl)?img\d*|ec/img)/\d{2}/\d{4}/\d{2}/\d{2}/(?<work_id>\d+)}i
"https://ec.toranoana.jp/tora_r/ec/item/#{$~[:work_id]}/" "https://ec.toranoana.jp/tora_r/ec/item/#{$~[:work_id]}/"
# https://a.hitomi.la/galleries/907838/1.png # https://a.hitomi.la/galleries/907838/1.png

View File

@@ -16,13 +16,13 @@
module Sources::Strategies module Sources::Strategies
class Pawoo < Base class Pawoo < Base
HOST = %r!\Ahttps?://(www\.)?pawoo\.net!i HOST = %r{\Ahttps?://(www\.)?pawoo\.net}i
IMAGE = %r!\Ahttps?://img\.pawoo\.net/media_attachments/files/(\d+/\d+/\d+)! IMAGE = %r{\Ahttps?://img\.pawoo\.net/media_attachments/files/(\d+/\d+/\d+)}
NAMED_PROFILE = %r!#{HOST}/@(?<artist_name>\w+)!i NAMED_PROFILE = %r{#{HOST}/@(?<artist_name>\w+)}i
ID_PROFILE = %r!#{HOST}/web/accounts/(?<artist_id>\d+)! ID_PROFILE = %r{#{HOST}/web/accounts/(?<artist_id>\d+)}
STATUS1 = %r!\A#{HOST}/web/statuses/(?<status_id>\d+)! STATUS1 = %r{\A#{HOST}/web/statuses/(?<status_id>\d+)}
STATUS2 = %r!\A#{NAMED_PROFILE}/(?<status_id>\d+)! STATUS2 = %r{\A#{NAMED_PROFILE}/(?<status_id>\d+)}
def domains def domains
["pawoo.net"] ["pawoo.net"]
@@ -37,15 +37,13 @@ module Sources::Strategies
end end
def image_urls def image_urls
if url =~ %r!#{IMAGE}/small/([a-z0-9]+\.\w+)\z!i if url =~ %r{#{IMAGE}/small/([a-z0-9]+\.\w+)\z}i
return ["https://img.pawoo.net/media_attachments/files/#{$1}/original/#{$2}"] ["https://img.pawoo.net/media_attachments/files/#{$1}/original/#{$2}"]
elsif url =~ %r{#{IMAGE}/original/([a-z0-9]+\.\w+)\z}i
[url]
else
api_response.image_urls
end end
if url =~ %r!#{IMAGE}/original/([a-z0-9]+\.\w+)\z!i
return [url]
end
return api_response.image_urls
end end
def page_url def page_url
@@ -55,16 +53,17 @@ module Sources::Strategies
end end
end end
return super super
end end
def profile_url def profile_url
if url =~ PawooApiClient::PROFILE2 if url =~ PawooApiClient::PROFILE2
return "https://pawoo.net/@#{$1}" "https://pawoo.net/@#{$1}"
elsif api_response.profile_url.blank?
url
else
api_response.profile_url
end end
return url if api_response.profile_url.blank?
api_response.profile_url
end end
def artist_name def artist_name
@@ -87,10 +86,6 @@ module Sources::Strategies
urls.map { |url| url[STATUS1, :status_id] || url[STATUS2, :status_id] }.compact.first urls.map { |url| url[STATUS1, :status_id] || url[STATUS2, :status_id] }.compact.first
end end
def artist_commentary_title
nil
end
def artist_commentary_desc def artist_commentary_desc
api_response.commentary api_response.commentary
end end
@@ -99,18 +94,10 @@ module Sources::Strategies
api_response.tags api_response.tags
end end
def normalizable_for_artist_finder?
true
end
def normalize_for_artist_finder
profile_url
end
def normalize_for_source def normalize_for_source
artist_name = artist_name_from_url artist_name = artist_name_from_url
status_id = status_id_from_url status_id = status_id_from_url
return unless status_id.present? return if status_id.blank?
if artist_name.present? if artist_name.present?
"https://pawoo.net/@#{artist_name}/#{status_id}" "https://pawoo.net/@#{artist_name}/#{status_id}"
@@ -131,7 +118,7 @@ module Sources::Strategies
def api_response def api_response
[url, referer_url].each do |x| [url, referer_url].each do |x|
if client = PawooApiClient.new.get(x) if (client = PawooApiClient.new.get(x))
return client return client
end end
end end

View File

@@ -50,35 +50,35 @@
module Sources module Sources
module Strategies module Strategies
class Pixiv < Base class Pixiv < Base
MONIKER = %r!(?:[a-zA-Z0-9_-]+)! MONIKER = /(?:[a-zA-Z0-9_-]+)/
PROFILE = %r!\Ahttps?://www\.pixiv\.net/member\.php\?id=[0-9]+\z! PROFILE = %r{\Ahttps?://www\.pixiv\.net/member\.php\?id=[0-9]+\z}
DATE = %r!(?<date>\d{4}/\d{2}/\d{2}/\d{2}/\d{2}/\d{2})!i DATE = %r{(?<date>\d{4}/\d{2}/\d{2}/\d{2}/\d{2}/\d{2})}i
EXT = %r!(?:jpg|jpeg|png|gif)!i EXT = /(?:jpg|jpeg|png|gif)/i
WEB = %r!(?:\A(?:https?://)?www\.pixiv\.net)! WEB = %r{(?:\A(?:https?://)?www\.pixiv\.net)}
I12 = %r!(?:\A(?:https?://)?i[0-9]+\.pixiv\.net)! I12 = %r{(?:\A(?:https?://)?i[0-9]+\.pixiv\.net)}
IMG = %r!(?:\A(?:https?://)?img[0-9]*\.pixiv\.net)! IMG = %r{(?:\A(?:https?://)?img[0-9]*\.pixiv\.net)}
PXIMG = %r!(?:\A(?:https?://)?[^.]+\.pximg\.net)! PXIMG = %r{(?:\A(?:https?://)?[^.]+\.pximg\.net)}
TOUCH = %r!(?:\A(?:https?://)?touch\.pixiv\.net)! TOUCH = %r{(?:\A(?:https?://)?touch\.pixiv\.net)}
UGOIRA = %r!#{PXIMG}/img-zip-ugoira/img/#{DATE}/(?<illust_id>\d+)_ugoira1920x1080\.zip\z!i UGOIRA = %r{#{PXIMG}/img-zip-ugoira/img/#{DATE}/(?<illust_id>\d+)_ugoira1920x1080\.zip\z}i
ORIG_IMAGE = %r!#{PXIMG}/img-original/img/#{DATE}/(?<illust_id>\d+)_p(?<page>\d+)\.#{EXT}\z!i ORIG_IMAGE = %r{#{PXIMG}/img-original/img/#{DATE}/(?<illust_id>\d+)_p(?<page>\d+)\.#{EXT}\z}i
STACC_PAGE = %r!\A#{WEB}/stacc/#{MONIKER}/?\z!i STACC_PAGE = %r{\A#{WEB}/stacc/#{MONIKER}/?\z}i
NOVEL_PAGE = %r!(?:\Ahttps?://www\.pixiv\.net/novel/show\.php\?id=(\d+))! NOVEL_PAGE = %r{(?:\Ahttps?://www\.pixiv\.net/novel/show\.php\?id=(\d+))}
FANBOX_ACCOUNT = %r!(?:\Ahttps?://www\.pixiv\.net/fanbox/creator/\d+\z)! FANBOX_ACCOUNT = %r{(?:\Ahttps?://www\.pixiv\.net/fanbox/creator/\d+\z)}
FANBOX_IMAGE = %r!(?:\Ahttps?://fanbox\.pixiv\.net/images/post/(\d+))! FANBOX_IMAGE = %r{(?:\Ahttps?://fanbox\.pixiv\.net/images/post/(\d+))}
FANBOX_PAGE = %r!(?:\Ahttps?://www\.pixiv\.net/fanbox/creator/\d+/post/(\d+))! FANBOX_PAGE = %r{(?:\Ahttps?://www\.pixiv\.net/fanbox/creator/\d+/post/(\d+))}
def self.to_dtext(text) def self.to_dtext(text)
if text.nil? if text.nil?
return nil return nil
end end
text = text.gsub(%r!https?://www\.pixiv\.net/member_illust\.php\?mode=medium&illust_id=([0-9]+)!i) do |match| text = text.gsub(%r{https?://www\.pixiv\.net/member_illust\.php\?mode=medium&illust_id=([0-9]+)}i) do |_match|
pixiv_id = $1 pixiv_id = $1
%(pixiv ##{pixiv_id} "»":[/posts?tags=pixiv:#{pixiv_id}]) %(pixiv ##{pixiv_id} "»":[/posts?tags=pixiv:#{pixiv_id}])
end end
text = text.gsub(%r!https?://www\.pixiv\.net/member\.php\?id=([0-9]+)!i) do |match| text = text.gsub(%r{https?://www\.pixiv\.net/member\.php\?id=([0-9]+)}i) do |_match|
member_id = $1 member_id = $1
profile_url = "https://www.pixiv.net/member.php?id=#{member_id}" profile_url = "https://www.pixiv.net/member.php?id=#{member_id}"
search_params = {"search[url_matches]" => profile_url}.to_param search_params = {"search[url_matches]" => profile_url}.to_param
@@ -139,13 +139,13 @@ module Sources
return "https://www.pixiv.net/artworks/#{illust_id}" return "https://www.pixiv.net/artworks/#{illust_id}"
end end
return url url
rescue PixivApiClient::BadIDError rescue PixivApiClient::BadIDError
nil nil
end end
def canonical_url def canonical_url
return image_url image_url
end end
def profile_url def profile_url
@@ -200,7 +200,7 @@ module Sources
} }
end end
return { {
"Referer" => "https://www.pixiv.net" "Referer" => "https://www.pixiv.net"
} }
end end
@@ -231,7 +231,7 @@ module Sources
translated_tags = super(tag) translated_tags = super(tag)
if translated_tags.empty? && tag.include?("/") if translated_tags.empty? && tag.include?("/")
translated_tags = tag.split("/").flat_map { |tag| super(tag) } translated_tags = tag.split("/").flat_map { |translated_tag| super(translated_tag) }
end end
translated_tags translated_tags
@@ -257,7 +257,7 @@ module Sources
return [ugoira_zip_url] return [ugoira_zip_url]
end end
return metadata.pages metadata.pages
end end
# in order to prevent recursive loops, this method should not make any # in order to prevent recursive loops, this method should not make any
@@ -276,11 +276,11 @@ module Sources
return url.query_values["illust_id"].to_i return url.query_values["illust_id"].to_i
# http://www.pixiv.net/en/artworks/46324488 # http://www.pixiv.net/en/artworks/46324488
elsif url.host == "www.pixiv.net" && url.path =~ %r!\A/(?:en/)?artworks/(?<illust_id>\d+)!i elsif url.host == "www.pixiv.net" && url.path =~ %r{\A/(?:en/)?artworks/(?<illust_id>\d+)}i
return $~[:illust_id].to_i return $~[:illust_id].to_i
# http://www.pixiv.net/i/18557054 # http://www.pixiv.net/i/18557054
elsif url.host == "www.pixiv.net" && url.path =~ %r!\A/i/(?<illust_id>\d+)\z!i elsif url.host == "www.pixiv.net" && url.path =~ %r{\A/i/(?<illust_id>\d+)\z}i
return $~[:illust_id].to_i return $~[:illust_id].to_i
# http://img18.pixiv.net/img/evazion/14901720.png # http://img18.pixiv.net/img/evazion/14901720.png
@@ -289,8 +289,8 @@ module Sources
# http://i2.pixiv.net/img18/img/evazion/14901720_s.png # http://i2.pixiv.net/img18/img/evazion/14901720_s.png
# http://i1.pixiv.net/img07/img/pasirism/18557054_p1.png # http://i1.pixiv.net/img07/img/pasirism/18557054_p1.png
# http://i1.pixiv.net/img07/img/pasirism/18557054_big_p1.png # http://i1.pixiv.net/img07/img/pasirism/18557054_big_p1.png
elsif url.host =~ %r!\A(?:i\d+|img\d+)\.pixiv\.net\z!i && elsif url.host =~ /\A(?:i\d+|img\d+)\.pixiv\.net\z/i &&
url.path =~ %r!\A(?:/img\d+)?/img/#{MONIKER}/(?<illust_id>\d+)(?:_\w+)?\.(?:jpg|jpeg|png|gif|zip)!i url.path =~ %r{\A(?:/img\d+)?/img/#{MONIKER}/(?<illust_id>\d+)(?:_\w+)?\.(?:jpg|jpeg|png|gif|zip)}i
return $~[:illust_id].to_i return $~[:illust_id].to_i
# http://i1.pixiv.net/img-inf/img/2011/05/01/23/28/04/18557054_64x64.jpg # http://i1.pixiv.net/img-inf/img/2011/05/01/23/28/04/18557054_64x64.jpg
@@ -307,13 +307,13 @@ module Sources
# #
# https://i.pximg.net/novel-cover-original/img/2019/01/14/01/15/05/10617324_d84daae89092d96bbe66efafec136e42.jpg # https://i.pximg.net/novel-cover-original/img/2019/01/14/01/15/05/10617324_d84daae89092d96bbe66efafec136e42.jpg
# https://img-sketch.pixiv.net/uploads/medium/file/4463372/8906921629213362989.jpg # https://img-sketch.pixiv.net/uploads/medium/file/4463372/8906921629213362989.jpg
elsif url.host =~ %r!\A(?:[^.]+\.pximg\.net|i\d+\.pixiv\.net|tc-pximg01\.techorus-cdn\.com)\z!i && elsif url.host =~ /\A(?:[^.]+\.pximg\.net|i\d+\.pixiv\.net|tc-pximg01\.techorus-cdn\.com)\z/i &&
url.path =~ %r!\A(/c/\w+)?/img-[a-z-]+/img/#{DATE}/(?<illust_id>\d+)(?:_\w+)?\.(?:jpg|jpeg|png|gif|zip)!i url.path =~ %r{\A(/c/\w+)?/img-[a-z-]+/img/#{DATE}/(?<illust_id>\d+)(?:_\w+)?\.(?:jpg|jpeg|png|gif|zip)}i
return $~[:illust_id].to_i return $~[:illust_id].to_i
end end
end end
return nil nil
end end
memoize :illust_id memoize :illust_id
@@ -324,7 +324,7 @@ module Sources
end end
end end
return nil nil
end end
memoize :novel_id memoize :novel_id
@@ -339,7 +339,7 @@ module Sources
end end
end end
return nil nil
end end
memoize :fanbox_id memoize :fanbox_id
@@ -350,7 +350,7 @@ module Sources
end end
end end
return nil nil
end end
memoize :fanbox_account_id memoize :fanbox_account_id
@@ -368,45 +368,39 @@ module Sources
return PixivApiClient.new.fanbox(fanbox_id) return PixivApiClient.new.fanbox(fanbox_id)
end end
return PixivApiClient.new.work(illust_id) PixivApiClient.new.work(illust_id)
end end
memoize :metadata memoize :metadata
def moniker def moniker
# we can sometimes get the moniker from the url # we can sometimes get the moniker from the url
if url =~ %r!#{IMG}/img/(#{MONIKER})!i if url =~ %r{#{IMG}/img/(#{MONIKER})}i
return $1 $1
elsif url =~ %r{#{I12}/img[0-9]+/img/(#{MONIKER})}i
$1
elsif url =~ %r{#{WEB}/stacc/(#{MONIKER})/?$}i
$1
else
metadata.moniker
end end
if url =~ %r!#{I12}/img[0-9]+/img/(#{MONIKER})!i
return $1
end
if url =~ %r!#{WEB}/stacc/(#{MONIKER})/?$!i
return $1
end
return metadata.moniker
rescue PixivApiClient::BadIDError rescue PixivApiClient::BadIDError
nil nil
end end
memoize :moniker memoize :moniker
def data def data
return { { ugoira_frame_data: ugoira_frame_data }
ugoira_frame_data: ugoira_frame_data
}
end end
def ugoira_zip_url def ugoira_zip_url
if metadata.pages.is_a?(Hash) && metadata.pages["ugoira600x600"] if metadata.pages.is_a?(Hash) && metadata.pages["ugoira600x600"]
return metadata.pages["ugoira600x600"].sub("_ugoira600x600.zip", "_ugoira1920x1080.zip") metadata.pages["ugoira600x600"].sub("_ugoira600x600.zip", "_ugoira1920x1080.zip")
end end
end end
memoize :ugoira_zip_url memoize :ugoira_zip_url
def ugoira_frame_data def ugoira_frame_data
return metadata.json.dig("metadata", "frames") metadata.json.dig("metadata", "frames")
rescue PixivApiClient::BadIDError rescue PixivApiClient::BadIDError
nil nil
end end
@@ -415,16 +409,14 @@ module Sources
def ugoira_content_type def ugoira_content_type
case metadata.json["image_urls"].to_s case metadata.json["image_urls"].to_s
when /\.jpg/ when /\.jpg/
return "image/jpeg" "image/jpeg"
when /\.png/ when /\.png/
return "image/png" "image/png"
when /\.gif/ when /\.gif/
return "image/gif" "image/gif"
else
raise Sources::Error, "content type not found for (#{url}, #{referer_url})"
end end
raise Sources::Error.new("content type not found for (#{url}, #{referer_url})")
end end
memoize :ugoira_content_type memoize :ugoira_content_type
@@ -434,7 +426,7 @@ module Sources
# http://i2.pixiv.net/img04/img/syounen_no_uta/46170939_p0.jpg # http://i2.pixiv.net/img04/img/syounen_no_uta/46170939_p0.jpg
# http://i1.pixiv.net/c/600x600/img-master/img/2014/09/24/23/25/08/46168376_p0_master1200.jpg # http://i1.pixiv.net/c/600x600/img-master/img/2014/09/24/23/25/08/46168376_p0_master1200.jpg
# http://i1.pixiv.net/img-original/img/2014/09/25/23/09/29/46183440_p0.jpg # http://i1.pixiv.net/img-original/img/2014/09/25/23/09/29/46183440_p0.jpg
if url =~ %r!/\d+_p(\d+)(?:_\w+)?\.#{EXT}!i if url =~ %r{/\d+_p(\d+)(?:_\w+)?\.#{EXT}}i
return $1.to_i return $1.to_i
end end
@@ -445,7 +437,7 @@ module Sources
end end
end end
return nil nil
end end
memoize :manga_page memoize :manga_page
end end

View File

@@ -12,19 +12,19 @@ module Sources::Strategies
class Tumblr < Base class Tumblr < Base
SIZES = %w[1280 640 540 500h 500 400 250 100] SIZES = %w[1280 640 540 500h 500 400 250 100]
BASE_URL = %r!\Ahttps?://(?:[^/]+\.)*tumblr\.com!i BASE_URL = %r{\Ahttps?://(?:[^/]+\.)*tumblr\.com}i
DOMAIN = %r{(data|(\d+\.)?media)\.tumblr\.com} DOMAIN = /(data|(?:\d+\.)?media)\.tumblr\.com/i
MD5 = %r{(?<md5>[0-9a-f]{32})}i MD5 = /(?<md5>[0-9a-f]{32})/i
FILENAME = %r{(?<filename>(tumblr_(inline_)?)?[a-z0-9]+(_r[0-9]+)?)}i FILENAME = /(?<filename>(?:tumblr_(?:inline_)?)?[a-z0-9]+(?:_r[0-9]+)?)/i
EXT = %r{(?<ext>\w+)} EXT = /(?<ext>\w+)/
# old: https://66.media.tumblr.com/2c6f55531618b4335c67e29157f5c1fc/tumblr_pz4a44xdVj1ssucdno1_1280.png # old: https://66.media.tumblr.com/2c6f55531618b4335c67e29157f5c1fc/tumblr_pz4a44xdVj1ssucdno1_1280.png
# new: https://66.media.tumblr.com/168dabd09d5ad69eb5fedcf94c45c31a/3dbfaec9b9e0c2e3-72/s640x960/bf33a1324f3f36d2dc64f011bfeab4867da62bc8.png # new: https://66.media.tumblr.com/168dabd09d5ad69eb5fedcf94c45c31a/3dbfaec9b9e0c2e3-72/s640x960/bf33a1324f3f36d2dc64f011bfeab4867da62bc8.png
OLD_IMAGE = %r!\Ahttps?://#{DOMAIN}/(?<dir>#{MD5}/)?#{FILENAME}_(?<size>\w+)\.#{EXT}\z!i OLD_IMAGE = %r{\Ahttps?://#{DOMAIN}/(?<dir>#{MD5}/)?#{FILENAME}_(?<size>\w+)\.#{EXT}\z}i
IMAGE = %r!\Ahttps?://#{DOMAIN}/!i IMAGE = %r{\Ahttps?://#{DOMAIN}/}i
VIDEO = %r!\Ahttps?://(?:vtt|ve\.media)\.tumblr\.com/!i VIDEO = %r{\Ahttps?://(?:vtt|ve\.media)\.tumblr\.com/}i
POST = %r!\Ahttps?://(?<blog_name>[^.]+)\.tumblr\.com/(?:post|image)/(?<post_id>\d+)!i POST = %r{\Ahttps?://(?<blog_name>[^.]+)\.tumblr\.com/(?:post|image)/(?<post_id>\d+)}i
def self.enabled? def self.enabled?
Danbooru.config.tumblr_consumer_key.present? Danbooru.config.tumblr_consumer_key.present?
@@ -68,7 +68,7 @@ module Sources::Strategies
def preview_urls def preview_urls
image_urls.map do |x| image_urls.map do |x|
x.sub(%r!_1280\.(jpg|png|gif|jpeg)\z!, '_250.\1') x.sub(/_1280\.(jpg|png|gif|jpeg)\z/, '_250.\1')
end end
end end

View File

@@ -1,20 +1,20 @@
module Sources::Strategies module Sources::Strategies
class Twitter < Base class Twitter < Base
PAGE = %r!\Ahttps?://(?:mobile\.)?twitter\.com!i PAGE = %r{\Ahttps?://(?:mobile\.)?twitter\.com}i
PROFILE = %r!\Ahttps?://(?:mobile\.)?twitter.com/(?<username>[a-z0-9_]+)!i PROFILE = %r{\Ahttps?://(?:mobile\.)?twitter.com/(?<username>[a-z0-9_]+)}i
# https://pbs.twimg.com/media/EBGbJe_U8AA4Ekb.jpg # https://pbs.twimg.com/media/EBGbJe_U8AA4Ekb.jpg
# https://pbs.twimg.com/media/EBGbJe_U8AA4Ekb?format=jpg&name=900x900 # https://pbs.twimg.com/media/EBGbJe_U8AA4Ekb?format=jpg&name=900x900
# https://pbs.twimg.com/tweet_video_thumb/ETkN_L3X0AMy1aT.jpg # https://pbs.twimg.com/tweet_video_thumb/ETkN_L3X0AMy1aT.jpg
# https://pbs.twimg.com/ext_tw_video_thumb/1243725361986375680/pu/img/JDA7g7lcw7wK-PIv.jpg # https://pbs.twimg.com/ext_tw_video_thumb/1243725361986375680/pu/img/JDA7g7lcw7wK-PIv.jpg
# https://pbs.twimg.com/amplify_video_thumb/1215590775364259840/img/lolCkEEioFZTb5dl.jpg # https://pbs.twimg.com/amplify_video_thumb/1215590775364259840/img/lolCkEEioFZTb5dl.jpg
BASE_IMAGE_URL = %r!\Ahttps?://pbs\.twimg\.com/(?<media_type>media|tweet_video_thumb|ext_tw_video_thumb|amplify_video_thumb)!i BASE_IMAGE_URL = %r{\Ahttps?://pbs\.twimg\.com/(?<media_type>media|tweet_video_thumb|ext_tw_video_thumb|amplify_video_thumb)}i
FILENAME1 = %r!(?<file_name>[a-zA-Z0-9_-]+)\.(?<file_ext>\w+)!i FILENAME1 = /(?<file_name>[a-zA-Z0-9_-]+)\.(?<file_ext>\w+)/i
FILENAME2 = %r!(?<file_name>[a-zA-Z0-9_-]+)\?.*format=(?<file_ext>\w+)!i FILENAME2 = /(?<file_name>[a-zA-Z0-9_-]+)\?.*format=(?<file_ext>\w+)/i
FILEPATH1 = %r!(?<file_path>\d+/[\w_-]+/img)!i FILEPATH1 = %r{(?<file_path>\d+/[\w_-]+/img)}i
FILEPATH2 = %r!(?<file_path>\d+/img)!i FILEPATH2 = %r{(?<file_path>\d+/img)}i
IMAGE_URL1 = %r!#{BASE_IMAGE_URL}/#{Regexp.union(FILENAME1, FILENAME2)}!i IMAGE_URL1 = %r{#{BASE_IMAGE_URL}/#{Regexp.union(FILENAME1, FILENAME2)}}i
IMAGE_URL2 = %r!#{BASE_IMAGE_URL}/#{Regexp.union(FILEPATH1, FILEPATH2)}/#{FILENAME1}!i IMAGE_URL2 = %r{#{BASE_IMAGE_URL}/#{Regexp.union(FILEPATH1, FILEPATH2)}/#{FILENAME1}}i
# Twitter provides a list but it's inaccurate; some names ('intent') aren't # Twitter provides a list but it's inaccurate; some names ('intent') aren't
# included and other names in the list aren't actually reserved. # included and other names in the list aren't actually reserved.
@@ -47,7 +47,7 @@ module Sources::Strategies
return $1 return $1
end end
return nil nil
end end
def self.artist_name_from_url(url) def self.artist_name_from_url(url)
@@ -78,7 +78,7 @@ module Sources::Strategies
elsif media[:type].in?(["video", "animated_gif"]) elsif media[:type].in?(["video", "animated_gif"])
variants = media.dig(:video_info, :variants) variants = media.dig(:video_info, :variants)
videos = variants.select { |variant| variant[:content_type] == "video/mp4" } videos = variants.select { |variant| variant[:content_type] == "video/mp4" }
video = videos.max_by { |video| video[:bitrate].to_i } video = videos.max_by { |v| v[:bitrate].to_i }
video[:url] video[:url]
end end
end end
@@ -137,10 +137,6 @@ module Sources::Strategies
api_response[:full_text].to_s api_response[:full_text].to_s
end end
def normalizable_for_artist_finder?
url =~ PAGE
end
def normalize_for_artist_finder def normalize_for_artist_finder
profile_url.try(:downcase).presence || url profile_url.try(:downcase).presence || url
end end
@@ -193,9 +189,9 @@ module Sources::Strategies
desc = artist_commentary_desc.unicode_normalize(:nfkc) desc = artist_commentary_desc.unicode_normalize(:nfkc)
desc = CGI.unescapeHTML(desc) desc = CGI.unescapeHTML(desc)
desc = desc.gsub(%r!https?://t\.co/[a-zA-Z0-9]+!i, url_replacements) desc = desc.gsub(%r{https?://t\.co/[a-zA-Z0-9]+}i, url_replacements)
desc = desc.gsub(%r!#([^[:space:]]+)!, '"#\\1":[https://twitter.com/hashtag/\\1]') desc = desc.gsub(/#([^[:space:]]+)/, '"#\\1":[https://twitter.com/hashtag/\\1]')
desc = desc.gsub(%r!@([a-zA-Z0-9_]+)!, '"@\\1":[https://twitter.com/\\1]') desc = desc.gsub(/@([a-zA-Z0-9_]+)/, '"@\\1":[https://twitter.com/\\1]')
desc.strip desc.strip
end end
@@ -204,7 +200,7 @@ module Sources::Strategies
end end
def api_response def api_response
return {} if !self.class.enabled? return {} unless self.class.enabled?
api_client.status(status_id) api_client.status(status_id)
end end

View File

@@ -38,7 +38,7 @@ module Sources
PAGE_URL_1 = %r{\Ahttps?://(?:www\.)?weibo\.com/(?<artist_short_id>\d+)/(?<illust_base62_id>\w+)(?:\?.*)?\z}i PAGE_URL_1 = %r{\Ahttps?://(?:www\.)?weibo\.com/(?<artist_short_id>\d+)/(?<illust_base62_id>\w+)(?:\?.*)?\z}i
PAGE_URL_2 = %r{#{PROFILE_URL_2}/(?:wbphotos/large/mid|talbum/detail/photo_id)/(?<illust_long_id>\d+)(?:/pid/(?<image_id>\w{32}))?}i PAGE_URL_2 = %r{#{PROFILE_URL_2}/(?:wbphotos/large/mid|talbum/detail/photo_id)/(?<illust_long_id>\d+)(?:/pid/(?<image_id>\w{32}))?}i
PAGE_URL_3 = %r{\Ahttps?://m\.weibo\.cn/(detail/(?<illust_long_id>\d+)|status/(?<illust_base62_id>\w+))}i PAGE_URL_3 = %r{\Ahttps?://m\.weibo\.cn/(?:detail/(?<illust_long_id>\d+)|status/(?<illust_base62_id>\w+))}i
PAGE_URL_4 = %r{\Ahttps?://tw\.weibo\.com/(?:(?<artist_short_id>\d+)|\w+)/(?<illust_long_id>\d+)}i PAGE_URL_4 = %r{\Ahttps?://tw\.weibo\.com/(?:(?<artist_short_id>\d+)|\w+)/(?<illust_long_id>\d+)}i
IMAGE_URL = %r{\Ahttps?://\w{3}\.sinaimg\.cn/\w+/(?<image_id>\w{32})\.}i IMAGE_URL = %r{\Ahttps?://\w{3}\.sinaimg\.cn/\w+/(?<image_id>\w{32})\.}i
@@ -203,12 +203,12 @@ module Sources
end end
def api_response def api_response
return nil if mobile_url.blank? return {} if mobile_url.blank?
resp = Danbooru::Http.cache(1.minute).get(mobile_url) resp = Danbooru::Http.cache(1.minute).get(mobile_url)
json_string = resp.to_s[/var \$render_data = \[(.*)\]\[0\]/m, 1] json_string = resp.to_s[/var \$render_data = \[(.*)\]\[0\]/m, 1]
return nil if json_string.blank? return {} if json_string.blank?
JSON.parse(json_string)["status"] JSON.parse(json_string)["status"]
end end