Files
danbooru/app/logical/sources/strategies/hentai_foundry.rb
evazion 88d9fc4e5e sources: simplify artist finder url normalization.
Get rid of `normalized_for_artist_finder?` and `normalizable_for_artist_finder?`.
This was legacy bullshit that was originally designed to avoid API calls
when saving artist entries containing old Pixiv direct image urls that
had already been normalized, or that couldn't be normalized because they
were bad id.

Nowadays we store profile urls in artist entries instead of direct image
urls, so we don't normally need to do any API calls to normalize the
profile url. Strategies should take care to avoid triggering API calls
inside `profile_url` when possible.
2020-05-29 15:35:15 -05:00

117 lines
3.3 KiB
Ruby

# Image URLs
#
# * https://pictures.hentai-foundry.com/a/Afrobull/795025/Afrobull-795025-kuroeda.png
#
# Page URLs
#
# * https://www.hentai-foundry.com/pictures/user/Afrobull/795025/kuroeda
# * https://www.hentai-foundry.com/pictures/user/Afrobull/795025
# * http://www.hentai-foundry.com/pic-795025
# * http://www.hentai-foundry.com/pictures/user/Ganassa/457176/LOL-Swimsuit---Caitlyn-reworked-nude-ver.
#
# Preview URLs
#
# * https://thumbs.hentai-foundry.com/thumb.php?pid=795025&size=350
#
# Profile URLs
#
# * https://www.hentai-foundry.com/user/kajinman/profile
# * https://www.hentai-foundry.com/pictures/user/kajinman
# * https://www.hentai-foundry.com/pictures/user/kajinman/scraps
# * https://www.hentai-foundry.com/user/J-likes-to-draw/profile
module Sources
module Strategies
class HentaiFoundry < Base
BASE_URL = %r!\Ahttps?://(?:www\.)?hentai-foundry\.com!i
PAGE_URL = %r!#{BASE_URL}/pictures/user/(?<artist_name>[\w-]+)/(?<illust_id>\d+)(?:/[\w.-]*)?(\?[\w=]*)?\z!i
OLD_PAGE = %r!#{BASE_URL}/pic-(?<illust_id>\d+)(?:\.html)?\z!i
PROFILE_URL = %r!#{BASE_URL}/(?:pictures/)?user/(?<artist_name>[\w-]+)(?:/[a-z]*)?\z!i
IMAGE_URL = %r!\Ahttps?://pictures\.hentai-foundry\.com/+\w/(?<artist_name>[\w-]+)/(?<illust_id>\d+)(?:(?:/[\w.-]+)?\.\w+)?\z!i
def domains
["hentai-foundry.com"]
end
def site_name
"Hentai Foundry"
end
def image_urls
image = page&.search("#picBox img")
return [] unless image
image.to_a.map { |img| URI.join(page_url, img["src"]).to_s }
end
def preview_urls
image_urls.map do
"https://thumbs.hentai-foundry.com/thumb.php?pid=#{illust_id}&size=250"
end
end
def page_url
return nil if illust_id.blank?
if artist_name.blank?
"https://www.hentai-foundry.com/pic-#{illust_id}"
else
"https://www.hentai-foundry.com/pictures/user/#{artist_name}/#{illust_id}"
end
end
def page
return nil if page_url.blank?
doc = Cache.get("hentai-foundry:#{page_url}", 1.minute) do
HTTParty.get("#{page_url}?enterAgree=1").body
end
Nokogiri::HTML(doc)
end
def tags
tags = page&.search(".boxbody [rel='tag']") || []
tags.map do |tag|
[tag.text, URI.join(page_url, tag.attr("href")).to_s]
end
end
def artist_name
urls.map { |url| url[PROFILE_URL, :artist_name] || url[PAGE_URL, :artist_name] || url[IMAGE_URL, :artist_name] }.compact.first
end
def canonical_url
image_url
end
def profile_url
return nil if artist_name.blank?
"https://www.hentai-foundry.com/user/#{artist_name}"
end
def artist_commentary_title
page&.search("#picBox .imageTitle")&.text
end
def artist_commentary_desc
page&.search("#descriptionBox .picDescript")&.to_html
end
def dtext_artist_commentary_desc
DText.from_html(artist_commentary_desc).gsub(/\A[[:space:]]+|[[:space:]]+\z/, "").gsub(/\n+/, "\n")
end
def normalize_for_source
page_url
end
def illust_id
url[PAGE_URL, :illust_id] || url[IMAGE_URL, :illust_id] || url[OLD_PAGE, :illust_id]
end
end
end
end