sources: refactor normalize_for_source.
`normalize_for_source` was used to convert image URLs to page URLs when displaying sources on the post show page. Move all the code for converting image URLs to page URLs from `Sources::Strategies#normalize_for_source` to `Source::URL#page_url`. Before we had to be very careful in source strategies not to make any network calls in `normalize_for_source`, since it was used in the view for the post show page. Now all the code for generating page URLs is isolated in Source::URL, which makes source strategies simpler. It also makes it easier to check if a source is an image URL or page URL, and if the image URL is convertible to a page URL, which will make autotagging bad_link or bad_source feasible. Finally, this fixes it to generate better page URLs in a handful of cases: * https://www.artstation.com/artwork/qPVGP instead of https://anubis1982918.artstation.com/projects/qPVGP * https://yande.re/post/show?md5=b4b1d11facd1700544554e4805d47bb6s instead of https://yande.re/post?tags=md5:b4b1d11facd1700544554e4805d47bb6 * http://gallery.minitokyo.net/view/365677 instead of http://gallery.minitokyo.net/download/365677 * https://valkyriecrusade.fandom.com/wiki/File:Crimson_Hatsune_H.png instead of https://valkyriecrusade.wikia.com/wiki/File:Crimson_Hatsune_H.png * https://rule34.paheal.net/post/view/852405 instead of https://rule34.paheal.net/post/list/md5:854806addcd3b1246424e7cea49afe31/1
This commit is contained in:
@@ -23,7 +23,6 @@ module Sources
|
||||
Strategies::Foundation,
|
||||
Strategies::Plurk,
|
||||
Strategies::Tinami,
|
||||
Strategies::TwitPic,
|
||||
Strategies::Fantia,
|
||||
]
|
||||
end
|
||||
@@ -36,9 +35,5 @@ module Sources
|
||||
def self.canonical(url, referer)
|
||||
find(url, referer).canonical_url
|
||||
end
|
||||
|
||||
def self.normalize_source(url)
|
||||
find(url).normalize_for_source || url
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -52,16 +52,6 @@ module Sources::Strategies
|
||||
end
|
||||
end
|
||||
|
||||
def normalize_for_source
|
||||
return if project_id.blank?
|
||||
|
||||
if artist_name_from_url.present?
|
||||
"https://#{artist_name_from_url}.artstation.com/projects/#{project_id}"
|
||||
else
|
||||
"https://www.artstation.com/artwork/#{project_id}"
|
||||
end
|
||||
end
|
||||
|
||||
def image_urls_from_api
|
||||
api_response[:assets].to_a.map do |asset|
|
||||
if asset[:asset_type] == "image"
|
||||
|
||||
@@ -138,12 +138,6 @@ module Sources
|
||||
end
|
||||
memoize :http_downloader
|
||||
|
||||
# Given a post/image url, this is the normalized url that will be displayed in a post's page in its stead.
|
||||
# This function should never make any network call, even indirectly. Return nil to never normalize.
|
||||
def normalize_for_source
|
||||
nil
|
||||
end
|
||||
|
||||
def artists
|
||||
ArtistFinder.find_artists(profile_url)
|
||||
end
|
||||
|
||||
@@ -65,10 +65,6 @@ module Sources
|
||||
end
|
||||
end
|
||||
|
||||
def normalize_for_source
|
||||
page_url_from_image_url
|
||||
end
|
||||
|
||||
def profile_url
|
||||
return nil if artist_name.blank?
|
||||
"https://www.deviantart.com/#{artist_name.downcase}"
|
||||
|
||||
@@ -34,19 +34,6 @@ module Sources
|
||||
end
|
||||
end
|
||||
|
||||
def normalize_for_source
|
||||
if illust_id.present?
|
||||
if artist_name_from_url.present?
|
||||
"https://#{artist_name_from_url}.fanbox.cc/posts/#{illust_id}"
|
||||
elsif artist_id_from_url.present?
|
||||
"https://www.pixiv.net/fanbox/creator/#{artist_id_from_url}/post/#{illust_id}"
|
||||
end
|
||||
elsif artist_id_from_url.present?
|
||||
# Cover images
|
||||
"https://www.pixiv.net/fanbox/creator/#{artist_id_from_url}"
|
||||
end
|
||||
end
|
||||
|
||||
def profile_url
|
||||
return if artist_name.blank?
|
||||
|
||||
|
||||
@@ -128,10 +128,6 @@ module Sources::Strategies
|
||||
DText.from_html(artist_commentary_desc)
|
||||
end
|
||||
|
||||
def normalize_for_source
|
||||
page_url
|
||||
end
|
||||
|
||||
def work_type
|
||||
parsed_url.work_type || parsed_referer&.work_type
|
||||
end
|
||||
|
||||
@@ -76,10 +76,6 @@ module Sources
|
||||
DText.from_html(artist_commentary_desc)
|
||||
end
|
||||
|
||||
def normalize_for_source
|
||||
page_url
|
||||
end
|
||||
|
||||
def api_response
|
||||
return {} if page.nil?
|
||||
|
||||
|
||||
@@ -68,10 +68,6 @@ module Sources
|
||||
DText.from_html(artist_commentary_desc).gsub(/\A[[:space:]]+|[[:space:]]+\z/, "").gsub(/\n+/, "\n")
|
||||
end
|
||||
|
||||
def normalize_for_source
|
||||
page_url
|
||||
end
|
||||
|
||||
def illust_id
|
||||
parsed_url.work_id || parsed_referer&.work_id
|
||||
end
|
||||
|
||||
@@ -47,10 +47,6 @@ module Sources
|
||||
page&.search(".ct .text, .content .text, .posts .photo .text").to_a.compact.first&.to_html
|
||||
end
|
||||
|
||||
def normalize_for_source
|
||||
page_url
|
||||
end
|
||||
|
||||
def illust_id
|
||||
parsed_url.work_id || parsed_referer&.work_id
|
||||
end
|
||||
|
||||
@@ -79,10 +79,6 @@ module Sources::Strategies
|
||||
api_response.tags
|
||||
end
|
||||
|
||||
def normalize_for_source
|
||||
page_url
|
||||
end
|
||||
|
||||
def dtext_artist_commentary_desc
|
||||
DText.from_html(artist_commentary_desc) do |element|
|
||||
if element.name == "a"
|
||||
|
||||
@@ -25,17 +25,6 @@ module Sources
|
||||
image_urls.first
|
||||
end
|
||||
|
||||
def normalize_for_source
|
||||
id = post_id_from_url
|
||||
md5 = post_md5_from_url
|
||||
|
||||
if id.present?
|
||||
"https://#{domain}/post/show/#{id}"
|
||||
elsif md5.present?
|
||||
"https://#{domain}/post?tags=md5:#{md5}"
|
||||
end
|
||||
end
|
||||
|
||||
def tags
|
||||
api_response[:tags].to_s.split.map do |tag|
|
||||
[tag, "https://#{domain}/post?tags=#{CGI.escape(tag)}"]
|
||||
|
||||
@@ -84,10 +84,6 @@ module Sources
|
||||
end
|
||||
end
|
||||
|
||||
def normalize_for_source
|
||||
page_url
|
||||
end
|
||||
|
||||
def user_name
|
||||
parsed_url.username || parsed_referer&.username
|
||||
end
|
||||
|
||||
@@ -66,10 +66,6 @@ module Sources
|
||||
end.gsub(/[^\w]im(\d+)/, ' seiga #\1 ').chomp
|
||||
end
|
||||
|
||||
def normalize_for_source
|
||||
page_url
|
||||
end
|
||||
|
||||
def tag_name
|
||||
return if api_client&.user_id.blank?
|
||||
"nicoseiga#{api_client.user_id}"
|
||||
|
||||
@@ -109,12 +109,6 @@ module Sources
|
||||
artist_id_from_url || artist_id_from_page
|
||||
end
|
||||
|
||||
def normalize_for_source
|
||||
return if illust_id.blank?
|
||||
|
||||
"https://nijie.info/view.php?id=#{illust_id}"
|
||||
end
|
||||
|
||||
def doujin?
|
||||
page&.at("#dojin_left").present?
|
||||
end
|
||||
|
||||
@@ -18,112 +18,6 @@ module Sources
|
||||
def artists
|
||||
ArtistFinder.find_artists(url)
|
||||
end
|
||||
|
||||
def normalize_for_source
|
||||
case url
|
||||
when %r{\Ahttp://www\.karabako\.net/images(?:ub)?/karabako_(\d+)(?:_\d+)?\.}i
|
||||
"http://www.karabako.net/post/view/#{$1}"
|
||||
|
||||
# XXX http://twipple.jp is defunct
|
||||
# http://p.twpl.jp/show/orig/myRVs
|
||||
when %r{\Ahttp://p\.twpl\.jp/show/(?:large|orig)/([a-z0-9]+)}i
|
||||
"http://p.twipple.jp/#{$1}"
|
||||
|
||||
when %r{\Ahttps?://blog(?:(?:-imgs-)?\d*(?:-origin)?)?\.fc2\.com/(?:(?:[^/]/){3}|(?:[^/]/))([^/]+)/(?:file/)?([^.]+\.[^?]+)}i
|
||||
username = $1
|
||||
filename = $2
|
||||
"http://#{username}.blog.fc2.com/img/#{filename}/"
|
||||
|
||||
when %r{\Ahttps?://diary(\d)?\.fc2\.com/user/([^/]+)/img/(\d+)_(\d+)/(\d+)\.}i
|
||||
server_id = $1
|
||||
username = $2
|
||||
year = $3
|
||||
month = $4
|
||||
day = $5
|
||||
"http://diary#{server_id}.fc2.com/cgi-sys/ed.cgi/#{username}?Y=#{year}&M=#{month}&D=#{day}"
|
||||
|
||||
when %r{\Ahttps?://(?:fbcdn-)?s(?:content|photos)-[^/]+\.(?:fbcdn|akamaihd)\.net/hphotos-.+/\d+_(\d+)_(?:\d+_){1,3}[no]\.}i
|
||||
"https://www.facebook.com/photo.php?fbid=#{$1}"
|
||||
|
||||
when %r{\Ahttps?://c(?:s|han|[1-4])\.sankakucomplex\.com/data(?:/sample)?/(?:[a-f0-9]{2}/){2}(?:sample-|preview)?([a-f0-9]{32})}i
|
||||
"https://chan.sankakucomplex.com/en/post/show?md5=#{$1}"
|
||||
|
||||
when %r{\Ahttps?://(?:www|s(?:tatic|[1-4]))\.zerochan\.net/.+(?:\.|\/)(\d+)(?:\.(?:jpe?g?|png))?\z}i
|
||||
"https://www.zerochan.net/#{$1}#full"
|
||||
|
||||
when %r{\Ahttps?://static[1-6]?\.minitokyo\.net/(?:downloads|view)/(?:\d{2}/){2}(\d+)}i
|
||||
"http://gallery.minitokyo.net/download/#{$1}"
|
||||
|
||||
# https://gelbooru.com//images/ee/5c/ee5c9a69db9602c95debdb9b98fb3e3e.jpeg
|
||||
# http://simg.gelbooru.com//images/2003/edd1d2b3881cf70c3acf540780507531.png
|
||||
# https://simg3.gelbooru.com//samples/0b/3a/sample_0b3ae5e225072b8e391c827cb470d29c.jpg
|
||||
when %r{\Ahttps?://(?:\w+\.)?gelbooru\.com//?(?:images|samples)/(?:\d+|\h\h/\h\h)/(?:sample_)?(?<md5>\h{32})\.}i
|
||||
"https://gelbooru.com/index.php?page=post&s=list&tags=md5:#{$~[:md5]}"
|
||||
|
||||
when %r{\Ahttps?://(?:slot\d*\.)?im(?:g|ages)\d*\.wikia\.(?:nocookie\.net|com)/(?:_{2}cb\d{14}/)?([^/]+)(?:/[a-z]{2})?/images/(?:(?:thumb|archive)?/)?[a-f0-9]/[a-f0-9]{2}/(?:\d{14}(?:!|%21))?([^/]+)}i
|
||||
subdomain = $1
|
||||
filename = $2
|
||||
"https://#{subdomain}.wikia.com/wiki/File:#{filename}"
|
||||
|
||||
when %r{\Ahttps?://vignette(?:\d*)\.wikia\.nocookie\.net/([^/]+)/images/[a-f0-9]/[a-f0-9]{2}/([^/]+)}i
|
||||
subdomain = $1
|
||||
filename = $2
|
||||
"https://#{subdomain}.wikia.com/wiki/File:#{filename}"
|
||||
|
||||
when %r{\Ahttps?://e-shuushuu.net/images/\d{4}-(?:\d{2}-){2}(\d+)}i
|
||||
"https://e-shuushuu.net/image/#{$1}"
|
||||
|
||||
when %r{\Ahttps?://jpg\.nijigen-daiaru\.com/(\d+)}i
|
||||
"http://nijigen-daiaru.com/book.php?idb=#{$1}"
|
||||
|
||||
when %r{\Ahttps?://sozai\.doujinantena\.com/contents_jpg/([a-f0-9]{32})/}i
|
||||
"http://doujinantena.com/page.php?id=#{$1}"
|
||||
|
||||
when %r{\Ahttps?://rule34-(?:data-\d{3}|images)\.paheal\.net/(?:_images/)?([a-f0-9]{32})}i
|
||||
"https://rule34.paheal.net/post/list/md5:#{$1}/1"
|
||||
|
||||
when %r{\Ahttps?://shimmie\.katawa-shoujo\.com/image/(\d+)}i
|
||||
"https://shimmie.katawa-shoujo.com/post/view/#{$1}"
|
||||
|
||||
when %r{\Ahttps://(?:(?:\w+\.)?rule34\.xxx|img\.booru\.org/(?:rule34|r34))(?:/(?:img/rule34|r34))?/{1,2}images/\d+/([a-f0-9]{32})\.}i
|
||||
"https://rule34.xxx/index.php?page=post&s=list&md5=#{$1}"
|
||||
|
||||
when %r{(\Ahttps?://.+)/diarypro/d(?:ata/upfile/|iary\.cgi\?mode=image&upfile=)(\d+)}i
|
||||
base_url = $1
|
||||
entry_no = $2
|
||||
"#{base_url}/diarypro/diary.cgi?no=#{entry_no}"
|
||||
|
||||
# XXX site is defunct
|
||||
when %r{\Ahttps?://i(?:\d)?\.minus\.com/(?:i|j)([^\.]{12,})}i
|
||||
"http://minus.com/i/#{$1}"
|
||||
|
||||
# http://art59.photozou.jp/pub/212/1986212/photo/118493247_org.v1534644005.jpg
|
||||
# http://kura3.photozou.jp/pub/794/1481794/photo/161537258_org.v1364829097.jpg
|
||||
when %r{\Ahttps?://\w+\.photozou\.jp/pub/\d+/(?<artist_id>\d+)/photo/(?<photo_id>\d+)_.*$}i
|
||||
"https://photozou.jp/photo/show/#{$~[:artist_id]}/#{$~[:photo_id]}"
|
||||
|
||||
# http://img.toranoana.jp/popup_img/04/0030/09/76/040030097695-2p.jpg
|
||||
# http://img.toranoana.jp/popup_img18/04/0010/22/87/040010228714-1p.jpg
|
||||
# http://img.toranoana.jp/popup_blimg/04/0030/08/30/040030083068-1p.jpg
|
||||
# https://ecdnimg.toranoana.jp/ec/img/04/0030/65/34/040030653417-6p.jpg
|
||||
when %r{\Ahttps?://(?:\w+\.)?toranoana\.jp/(?:popup_(?:bl)?img\d*|ec/img)/\d{2}/\d{4}/\d{2}/\d{2}/(?<work_id>\d+)}i
|
||||
"https://ec.toranoana.jp/tora_r/ec/item/#{$~[:work_id]}/"
|
||||
|
||||
# https://a.hitomi.la/galleries/907838/1.png
|
||||
# https://0a.hitomi.la/galleries/1169701/23.png
|
||||
# https://aa.hitomi.la/galleries/990722/003_01_002.jpg
|
||||
# https://la.hitomi.la/galleries/1054851/001_main_image.jpg
|
||||
when %r{\Ahttps?://\w+\.hitomi\.la/galleries/(?<gallery_id>\d+)/(?<image_id>\d+)\w*\.[a-z]+\z}i
|
||||
"https://hitomi.la/reader/#{$~[:gallery_id]}.html##{$~[:image_id].to_i}"
|
||||
|
||||
# https://aa.hitomi.la/galleries/883451/t_rena1g.png
|
||||
when %r{\Ahttps?://\w+\.hitomi\.la/galleries/(?<gallery_id>\d+)/\w*\.[a-z]+\z}i
|
||||
"https://hitomi.la/galleries/#{$~[:gallery_id]}.html"
|
||||
|
||||
else
|
||||
nil
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -96,11 +96,6 @@ module Sources
|
||||
api_illust[:description]
|
||||
end
|
||||
|
||||
def normalize_for_source
|
||||
return nil if illust_id.blank?
|
||||
"https://www.pixiv.net/artworks/#{illust_id}"
|
||||
end
|
||||
|
||||
def tag_name
|
||||
moniker
|
||||
end
|
||||
|
||||
@@ -110,10 +110,6 @@ module Sources
|
||||
end.gsub(/\A[[:space:]]+|[[:space:]]+\z/, "")
|
||||
end
|
||||
|
||||
def normalize_for_source
|
||||
page_url
|
||||
end
|
||||
|
||||
memoize :page, :page_json, :api_replies
|
||||
end
|
||||
end
|
||||
|
||||
@@ -48,10 +48,6 @@ module Sources
|
||||
"https://skeb.jp/@#{artist_name}/works/#{illust_id}"
|
||||
end
|
||||
|
||||
def normalize_for_source
|
||||
page_url
|
||||
end
|
||||
|
||||
def api_url
|
||||
return nil unless artist_name.present? && illust_id.present?
|
||||
"https://skeb.jp/api/users/#{artist_name}/works/#{illust_id}"
|
||||
|
||||
@@ -83,10 +83,6 @@ module Sources::Strategies
|
||||
super(tag)
|
||||
end
|
||||
|
||||
def normalize_for_source
|
||||
parsed_url.page_url
|
||||
end
|
||||
|
||||
def dtext_artist_commentary_desc
|
||||
DText.from_html(artist_commentary_desc).strip
|
||||
end
|
||||
|
||||
@@ -1,14 +0,0 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
# @see Source::URL::TwitPic
|
||||
module Sources::Strategies
|
||||
class TwitPic < Base
|
||||
def match?
|
||||
Source::URL::TwitPic === parsed_url
|
||||
end
|
||||
|
||||
def normalize_for_source
|
||||
parsed_url.page_url || url
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -93,14 +93,6 @@ module Sources::Strategies
|
||||
api_response[:full_text].to_s
|
||||
end
|
||||
|
||||
def normalize_for_source
|
||||
if tag_name_from_url.present? && status_id.present?
|
||||
"https://twitter.com/#{tag_name_from_url}/status/#{status_id}"
|
||||
elsif status_id.present?
|
||||
"https://twitter.com/i/web/status/#{status_id}"
|
||||
end
|
||||
end
|
||||
|
||||
def tags
|
||||
api_response.dig(:entities, :hashtags).to_a.map do |hashtag|
|
||||
[hashtag[:text], "https://twitter.com/hashtag/#{hashtag[:text]}"]
|
||||
@@ -150,7 +142,7 @@ module Sources::Strategies
|
||||
end
|
||||
|
||||
def tag_name_from_url
|
||||
parsed_url.twitter_username || parsed_referer&.twitter_username
|
||||
parsed_url.username || parsed_referer&.username
|
||||
end
|
||||
|
||||
memoize :api_response
|
||||
|
||||
@@ -87,10 +87,6 @@ module Sources
|
||||
end
|
||||
end
|
||||
|
||||
def normalize_for_source
|
||||
parsed_url.normalized_url
|
||||
end
|
||||
|
||||
def api_response
|
||||
return {} if (mobile_url = parsed_url.mobile_url || parsed_referer&.mobile_url).blank?
|
||||
|
||||
|
||||
Reference in New Issue
Block a user