From 83d14a281fdda0401f729fa31459624a481a0fae Mon Sep 17 00:00:00 2001 From: evazion Date: Tue, 8 Nov 2022 21:41:59 -0600 Subject: [PATCH] replacements: backfill images in parallel. --- script/fixes/127_download_old_replacements.rb | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/script/fixes/127_download_old_replacements.rb b/script/fixes/127_download_old_replacements.rb index 53c43f93c..86813cedd 100755 --- a/script/fixes/127_download_old_replacements.rb +++ b/script/fixes/127_download_old_replacements.rb @@ -4,8 +4,12 @@ require_relative "base" FIX = ENV.fetch("FIX", "false").truthy? COND = ENV.fetch("COND", "TRUE") +WORKERS = ENV.fetch("WORKERS", 5).to_i def download(replacement, md5) + # url = "https://b2.donmai.us/file/danbooru/original/#{md5[0..1]}/#{md5[2..3}/#{md5}.jpg" + # image_url = url if Danbooru::Http.head(url)&.status == 200 + url = "https://gelbooru.com/index.php?page=post&s=list&md5=#{md5}" image_url = Source::Extractor.find(url).image_urls.first @@ -17,10 +21,10 @@ rescue Timeout::Error puts ({ error: "upload timed out", replacement: replacement.id, upload: upload&.id, image_url:, }).to_json end -PostReplacement.where(COND).find_each do |replacement| +PostReplacement.where(COND).parallel_each(in_processes: WORKERS) do |replacement| old_media_asset = MediaAsset.active.find_by(md5: replacement.old_md5) new_media_asset = MediaAsset.active.find_by(md5: replacement.md5) - download(replacement, replacement.old_md5) if old_media_asset.nil? - download(replacement, replacement.md5) if new_media_asset.nil? + download(replacement, replacement.old_md5) if old_media_asset.nil? && replacement.old_md5.present? + download(replacement, replacement.md5) if new_media_asset.nil? && replacement.md5.present? end