Refactor source normalization

* Move the source normalization logic out of the post model
  and into individual sources' strategies.
* Rewrite normalization tests to be handled into each source's test,
  and expand them significantly. Previously we were only testing
  a very small subset of domains and variants.
* Fix up normalization for several sites.
* Normalize fav.me urls into normal deviantart urls.
This commit is contained in:
nonamethanks
2020-05-16 23:03:09 +02:00
parent 364343453c
commit 307df3b3e4
26 changed files with 674 additions and 315 deletions

View File

@@ -1,6 +1,28 @@
# Image URLS:
# * https://img.pawoo.net/media_attachments/files/001/297/997/small/c4272a09570757c2.png
# * https://img.pawoo.net/media_attachments/files/001/297/997/original/c4272a09570757c2.png
# * https://pawoo.net/media/lU2uV7C1MMQSb1czwvg
#
# Page URLS:
# * https://pawoo.net/@evazion/19451018
# * https://pawoo.net/web/statuses/19451018
#
# Account URLS:
# * https://pawoo.net/@evazion
# * https://pawoo.net/web/accounts/47806
#
# OAUTH URLS: (NOTE: ID IS DIFFERENT FROM ACCOUNT URL ID)
# * https://pawoo.net/oauth_authentications/17230064
module Sources::Strategies
class Pawoo < Base
HOST = %r!\Ahttps?://(www\.)?pawoo\.net!i
IMAGE = %r!\Ahttps?://img\.pawoo\.net/media_attachments/files/(\d+/\d+/\d+)!
NAMED_PROFILE = %r!#{HOST}/@(?<artist_name>\w+)!i
ID_PROFILE = %r!#{HOST}/web/accounts/(?<artist_id>\d+)!
STATUS1 = %r!\A#{HOST}/web/statuses/(?<status_id>\d+)!
STATUS2 = %r!\A#{NAMED_PROFILE}/(?<status_id>\d+)!
def domains
["pawoo.net"]
@@ -14,9 +36,6 @@ module Sources::Strategies
image_urls.first
end
# https://img.pawoo.net/media_attachments/files/001/297/997/small/c4272a09570757c2.png
# https://img.pawoo.net/media_attachments/files/001/297/997/original/c4272a09570757c2.png
# https://pawoo.net/media/lU2uV7C1MMQSb1czwvg
def image_urls
if url =~ %r!#{IMAGE}/small/([a-z0-9]+\.\w+)\z!i
return ["https://img.pawoo.net/media_attachments/files/#{$1}/original/#{$2}"]
@@ -29,8 +48,6 @@ module Sources::Strategies
return api_response.image_urls
end
# https://pawoo.net/@evazion/19451018
# https://pawoo.net/web/statuses/19451018
def page_url
[url, referer_url].each do |x|
if PawooApiClient::Status.is_match?(x)
@@ -41,8 +58,6 @@ module Sources::Strategies
return super
end
# https://pawoo.net/@evazion
# https://pawoo.net/web/accounts/47806
def profile_url
if url =~ PawooApiClient::PROFILE2
return "https://pawoo.net/@#{$1}"
@@ -56,6 +71,22 @@ module Sources::Strategies
api_response.account_name
end
def artist_name_from_url
if url =~ NAMED_PROFILE
url[NAMED_PROFILE, :artist_name]
end
end
def artist_id_from_url
if url =~ ID_PROFILE
url[ID_PROFILE, :artist_name]
end
end
def status_id_from_url
urls.map { |url| url[STATUS1, :status_id] || url[STATUS2, :status_id] }.compact.first
end
def artist_commentary_title
nil
end
@@ -76,6 +107,18 @@ module Sources::Strategies
profile_url
end
def normalize_for_source
artist_name = artist_name_from_url
status_id = status_id_from_url
return unless status_id.present?
if artist_name.present?
"https://pawoo.net/@#{artist_name}/#{status_id}"
else
"https://pawoo.net/web/statuses/#{status_id}"
end
end
def dtext_artist_commentary_desc
DText.from_html(artist_commentary_desc) do |element|
if element.name == "a"