diff --git a/app/logical/source/extractor/lofter.rb b/app/logical/source/extractor/lofter.rb index aa4feabaf..917627de8 100644 --- a/app/logical/source/extractor/lofter.rb +++ b/app/logical/source/extractor/lofter.rb @@ -44,7 +44,13 @@ module Source end def artist_commentary_desc - page&.search(".ct .text, .content .text, .posts .photo .text").to_a.compact.first&.to_html + commentary_selectors = [ + ".ct .text", + ".content .text", + ".posts .photo .text", + "#post .description", + ] + page&.search(commentary_selectors.join(", ")).to_a.compact.first&.to_html end def illust_id diff --git a/test/unit/sources/lofter_test.rb b/test/unit/sources/lofter_test.rb index c066f002f..a3014769f 100644 --- a/test/unit/sources/lofter_test.rb +++ b/test/unit/sources/lofter_test.rb @@ -2,71 +2,56 @@ require "test_helper" module Sources class LofterTest < ActiveSupport::TestCase - context "A lofter post" do - setup do - @img = "https://imglf4.lf127.net/img/S1d2QlVsWkJhSW1qcnpIS0ZSa3ZJUFczb2RKSVlpMHJkNy9kc3BSQVQvQm5DNzB4eVhxay9nPT0.png?imageView&thumbnail=1680x0&quality=96&stripmeta=0" - @ref = "https://gengar563.lofter.com/post/1e82da8c_1c98dae1b" - @source = Source::Extractor.find(@img, @ref) - @source2 = Source::Extractor.find(@ref) - end - - should "get the artist name" do - assert_equal("gengar563", @source.artist_name) - end - - should "get the commentary" do - assert_match(/发了三次发不出有毒…… \n.*\n失去耐心.jpg/, @source.dtext_artist_commentary_desc) - end - should "get profile url" do - assert_equal("https://gengar563.lofter.com", @source.profile_url) - end - - should "get the image urls" do - images = %w[ - https://imglf3.lf127.net/img/S1d2QlVsWkJhSW1qcnpIS0ZSa3ZJQ1RxY0lYaU1UUE9tQ0NvUE9rVXFpOFFEVzMwbnQ4aEFnPT0.jpg - https://imglf3.lf127.net/img/S1d2QlVsWkJhSW1qcnpIS0ZSa3ZJRWlXYTRVOEpXTU9TSGt3TjBDQ0JFZVpZMEJtWjFneVNBPT0.png - https://imglf6.lf127.net/img/S1d2QlVsWkJhSW1qcnpIS0ZSa3ZJR1d3Y2VvbTNTQlIvdFU1WWlqZHEzbjI4MFVNZVdoN3VBPT0.png - https://imglf6.lf127.net/img/S1d2QlVsWkJhSW1qcnpIS0ZSa3ZJTi83NDRDUjNvd3hySGxEZFovd2hwbi9oaG9NQ1hOUkZ3PT0.png - https://imglf4.lf127.net/img/S1d2QlVsWkJhSW1qcnpIS0ZSa3ZJUFczb2RKSVlpMHJkNy9kc3BSQVQvQm5DNzB4eVhxay9nPT0.png - https://imglf4.lf127.net/img/S1d2QlVsWkJhSW1qcnpIS0ZSa3ZJSStJZE9RYnJURktHazdIVHNNMjQ5eFJldHVTQy9XbDB3PT0.png - https://imglf3.lf127.net/img/S1d2QlVsWkJhSW1qcnpIS0ZSa3ZJSzFCWFlnUWgzb01DcUdpT1lreG5yQjJVMkhGS09HNGR3PT0.png - ] - - assert_equal(images, @source2.image_urls) - end - - should "download the full-size image" do - assert_downloaded(2_739_443, @source.image_urls.sole) - end - - should "find the correct artist" do - @artist = FactoryBot.create(:artist, name: "gengar563", url_string: "https://gengar563.lofter.com") - assert_equal([@artist], @source.artists) - end + context "A lofter post with commentary under <.ct .text>" do + image_urls = %w[ + https://imglf3.lf127.net/img/S1d2QlVsWkJhSW1qcnpIS0ZSa3ZJQ1RxY0lYaU1UUE9tQ0NvUE9rVXFpOFFEVzMwbnQ4aEFnPT0.jpg + https://imglf3.lf127.net/img/S1d2QlVsWkJhSW1qcnpIS0ZSa3ZJRWlXYTRVOEpXTU9TSGt3TjBDQ0JFZVpZMEJtWjFneVNBPT0.png + https://imglf6.lf127.net/img/S1d2QlVsWkJhSW1qcnpIS0ZSa3ZJR1d3Y2VvbTNTQlIvdFU1WWlqZHEzbjI4MFVNZVdoN3VBPT0.png + https://imglf6.lf127.net/img/S1d2QlVsWkJhSW1qcnpIS0ZSa3ZJTi83NDRDUjNvd3hySGxEZFovd2hwbi9oaG9NQ1hOUkZ3PT0.png + https://imglf4.lf127.net/img/S1d2QlVsWkJhSW1qcnpIS0ZSa3ZJUFczb2RKSVlpMHJkNy9kc3BSQVQvQm5DNzB4eVhxay9nPT0.png + https://imglf4.lf127.net/img/S1d2QlVsWkJhSW1qcnpIS0ZSa3ZJSStJZE9RYnJURktHazdIVHNNMjQ5eFJldHVTQy9XbDB3PT0.png + https://imglf3.lf127.net/img/S1d2QlVsWkJhSW1qcnpIS0ZSa3ZJSzFCWFlnUWgzb01DcUdpT1lreG5yQjJVMkhGS09HNGR3PT0.png + ] + strategy_should_work( + "https://gengar563.lofter.com/post/1e82da8c_1c98dae1b", + image_urls: image_urls, + artist_name: "gengar563", + profile_url: "https://gengar563.lofter.com", + dtext_artist_commentary_desc: /发了三次发不出有毒…… \n.*\n失去耐心.jpg/ + ) end - context "A different CSS schema" do - should "still find all the data" do - source1 = Source::Extractor.find("https://yuli031458.lofter.com/post/3163d871_1cbdc5f6d") - - assert_equal(["https://imglf5.lf127.net/img/Mm55d3lNK2tJUWpNTjVLN0MvaTRDc1UvQUFLMGszOHRvSjV6S3VSa1lwa3BDWUtVOWpBTHBnPT0.jpg"], source1.image_urls) - assert_not_empty(source1.tags) - end + context "A lofter direct image url" do + strategy_should_work( + "https://imglf4.lf127.net/img/S1d2QlVsWkJhSW1qcnpIS0ZSa3ZJUFczb2RKSVlpMHJkNy9kc3BSQVQvQm5DNzB4eVhxay9nPT0.png?imageView&thumbnail=1680x0&quality=96&stripmeta=0", + image_urls: ["https://imglf4.lf127.net/img/S1d2QlVsWkJhSW1qcnpIS0ZSa3ZJUFczb2RKSVlpMHJkNy9kc3BSQVQvQm5DNzB4eVhxay9nPT0.png"], + profile_url: nil, + download_size: 2_739_443 + ) end - context "A bad link" do - should "correctly get the full size" do - source = Source::Extractor.find("https://imglf4.lf127.net/img/S1d2QlVsWkJhSW1qcnpIS0ZSa3ZJUFczb2RKSVlpMHJkNy9kc3BSQVQvQm5DNzB4eVhxay9nPT0.png?imageView&thumbnail=1680x0&quality=96&stripmeta=0") - assert_equal(["https://imglf4.lf127.net/img/S1d2QlVsWkJhSW1qcnpIS0ZSa3ZJUFczb2RKSVlpMHJkNy9kc3BSQVQvQm5DNzB4eVhxay9nPT0.png"], source.image_urls) - assert_nothing_raised { source.to_h } - end + context "A lofter post with commentary under <.content .text>" do + strategy_should_work( + "https://yuli031458.lofter.com/post/3163d871_1cbdc5f6d", + image_urls: ["https://imglf5.lf127.net/img/Mm55d3lNK2tJUWpNTjVLN0MvaTRDc1UvQUFLMGszOHRvSjV6S3VSa1lwa3BDWUtVOWpBTHBnPT0.jpg"], + tags: ["明日方舟", "阿米娅"], + dtext_artist_commentary_desc: "Amiya" + ) + end + + context "A lofter post with commentary under <#post .description>" do + strategy_should_work( + "https://chengyeliuli.lofter.com/post/1d127639_2b6e850c8", + image_urls: ["https://imglf3.lf127.net/img/d28aeb098a69b1d2/ZmltbmVjOU9BRzFHVTVnTkNmc0V0NDlSRnNrdENIWWwyZkFreTJJd0duRT0.jpg"], + dtext_artist_commentary_desc: /練習\s+画画卡姐~/ + ) end context "A dead link" do - should "not raise anything" do - source = Source::Extractor.find("https://gxszdddd.lofter.com/post/322595b1_1ca5e6f66") - assert_nothing_raised { source.to_h } - end + strategy_should_work( + "https://gxszdddd.lofter.com/post/322595b1_1ca5e6f66", + deleted: true + ) end should "Parse Lofter URLs correctly" do