diff --git a/app/logical/google_big_query/post_version.rb b/app/logical/google_big_query/post_version.rb index e3e670466..e430841f0 100644 --- a/app/logical/google_big_query/post_version.rb +++ b/app/logical/google_big_query/post_version.rb @@ -20,6 +20,13 @@ module GoogleBigQuery "regexp_match(removed_tags, \"(?:^| )#{es}(?:$| )\")" end + def find_for_post(post_id, created_at) + post_id = post_id.to_i + btime = created_at.strftime("%Y-%m-%d 00:00:00", created_at) + etime = 1.day.from(created_at).strftime("%Y-%m-%d 00:00:00") + "select updater_id, added_tag from [danbooru_#{Rails.env}].post_versions_flat_part where _partitiontime >= #{btime} and _partitiontime <= #{etime} and post_id = #{post_id}" + end + def find(user_id, added_tags, removed_tags, min_version_id, max_version_id, limit = 1_000) constraints = [] diff --git a/app/logical/post_keeper_manager.rb b/app/logical/post_keeper_manager.rb new file mode 100644 index 000000000..26fc22e23 --- /dev/null +++ b/app/logical/post_keeper_manager.rb @@ -0,0 +1,168 @@ +class PostKeeperManager + def self.enabled? + PostArchive.enabled? + end + + def self.queue_check(post_id) + delay(queue: "default").check_and_update(post_id) + end + + # in general, unweighted changes attribution 5% of the time, + # weighted changes attribution 12% of the time at w=1000, + # up to 17% of the time at w=100. + def self.evaluate(post_ids) + total = 0 + matches = 0 + weighted_matches = 0 + keeper_dist = {} + uploader_dist = {} + Post.where(id: post_ids).find_each do |post| + keeper = check(post) + total += 1 + if keeper != post.uploader_id + matches += 1 + # keeper_dist[keeper] ||= 0 + # keeper_dist[keeper] += 1 + # uploader_dist[post.uploader_id] ||= 0 + # uploader_dist[post.uploader_id] += 1 + end + if check_weighted(post) != post.uploader_id + puts post.id + weighted_matches += 1 + end + end + + puts "total: #{total}" + puts "unweighted changes: #{matches}" + puts "weighted changes: #{weighted_matches}" + # puts "keepers:" + # keeper_dist.each do |k, v| + # puts " #{k}: #{v}" + # end + # puts "uploaders:" + # uploader_dist.each do |k, v| + # puts " #{k}: #{v}" + # end + end + + def self.check_and_update(post_id) + post = Post.find(post_id) + keeper_id = check(post) + CurrentUser.as_system do + post.update_column(:keeper_data, {uid: keeper_id}) + end + end + + def self.print_weighted(post, w = 1000) + changes = {} + final_tags = Set.new(post.tag_array) + + # build a mapping of who added a tag first + PostArchive.where(post_id: post.id).order("updated_at").each do |pa| + pa.added_tags.each do |at| + if pa.updater_id + if !changes.has_key?(at) && final_tags.include?(at) + changes[at] = pa.updater_id + end + + if pa.source_changed? && pa.source == post.source + changes[" source"] = pa.updater_id + end + end + end + end + + # add up how many changes each user has made + ranking = changes.values.uniq.inject({}) do |h, user_id| + h[user_id] = changes.select {|k, v| v == user_id}.map do |tag, user_id| + count = Tag.find_by_name(tag).try(:post_count) || 0 + 1.0 / (w + count) + end.sum + h + end + + ranking.sort_by {|k, v| v}.each do |user_id, score| + user = User.find(user_id) + sum = changes.select {|k, v| v == user_id}.size + Rails.logger.debug "#{user.name}: %.4f (%d)" % [score, sum] + end + end + + def self.check_weighted(post, w = 1000) + changes = {} + final_tags = Set.new(post.tag_array) + + # build a mapping of who added a tag first + PostArchive.where(post_id: post.id).order("updated_at").each do |pa| + pa.added_tags.each do |at| + if pa.updater_id + if !changes.has_key?(at) && final_tags.include?(at) + changes[at] = pa.updater_id + end + + if pa.source_changed? && pa.source == post.source + changes[" source"] = pa.updater_id + end + end + end + end + + # add up how many changes each user has made + ranking = changes.values.uniq.inject({}) do |h, user_id| + h[user_id] = changes.select {|k, v| v == user_id}.map do |tag, user_id| + count = Tag.find_by_name(tag).try(:post_count) || 0 + 1.0 / (w + count) + end.sum + h + end + + ranking.max_by {|k, v| v}.first + end + + def self.check(post) + changes = {} + final_tags = Set.new(post.tag_array) + + # build a mapping of who added a tag first + PostArchive.where(post_id: post.id).order("updated_at").each do |pa| + # Rails.logger.debug "archive #{pa.id}" + pa.added_tags.each do |at| + # Rails.logger.debug " checking #{at}" + if pa.updater_id + if !changes.has_key?(at) && final_tags.include?(at) + # Rails.logger.debug " adding #{at} for #{pa.updater_id}" + changes[at] = pa.updater_id + end + + if pa.source_changed? && pa.source == post.source + # Rails.logger.debug " adding source for #{pa.updater_id}" + changes[" source"] = pa.updater_id + end + else + # Rails.logger.debug " no updater" + end + end + + # easy to double count trivial changes if a user is just fixing mistakes + # pa.removed_tags.each do |rt| + # Rails.logger.debug " checking -#{rt}" + # if pa.updater_id + # if !changes.has_key?("-#{rt}") && !final_tags.include?(rt) + # Rails.logger.debug " adding -#{rt} for #{pa.updater_id}" + # changes["-#{rt}"] = pa.updater_id + # end + # else + # Rails.logger.debug " no updater" + # end + # end + end + + # add up how many changes each user has made + ranking = changes.values.uniq.inject({}) do |h, user_id| + h[user_id] = changes.select {|k, v| v == user_id}.size + h + end + + ranking.max_by {|k, v| v}.first + end +end diff --git a/app/models/post.rb b/app/models/post.rb index 29985c556..2054f696e 100644 --- a/app/models/post.rb +++ b/app/models/post.rb @@ -57,15 +57,37 @@ class Post < ApplicationRecord has_many :favorites has_many :replacements, class_name: "PostReplacement", :dependent => :destroy + if PostKeeperManager.enabled? + serialize :keeper_data, JSON + end + if PostArchive.enabled? has_many :versions, lambda {order("post_versions.updated_at ASC")}, :class_name => "PostArchive", :dependent => :destroy end attr_accessible :source, :rating, :tag_string, :old_tag_string, :old_parent_id, :old_source, :old_rating, :parent_id, :has_embedded_notes, :as => [:member, :builder, :gold, :platinum, :moderator, :admin, :default] - attr_accessible :is_rating_locked, :is_note_locked, :has_cropped, :as => [:builder, :moderator, :admin] - attr_accessible :is_status_locked, :as => [:admin] + attr_accessible :is_rating_locked, :is_note_locked, :has_cropped, :keeper_data, :as => [:builder, :moderator, :admin] + attr_accessible :is_status_locked, :keeper_data, :as => [:admin] attr_accessor :old_tag_string, :old_parent_id, :old_source, :old_rating, :has_constraints, :disable_versioning, :view_count + concerning :KeeperMethods do + included do + before_create :initialize_keeper + end + + def keeper_id + keeper_data ? keeper_data[:uid] : uploader_id + end + + def keeper + User.find(keeper_id) + end + + def initialize_keeper + self.keeper_data = {uid: uploader_id} + end + end + module FileMethods extend ActiveSupport::Concern @@ -639,6 +661,11 @@ class Post < ApplicationRecord if decrement_tags.any? Tag.decrement_post_counts(decrement_tags) end + + if PostKeeperManager.enabled? && persisted? + # no need to do this check on the initial create + PostKeeperManager.queue_check(id) + end end def set_tag_count(category,tagcount) diff --git a/app/views/posts/partials/show/_information.html.erb b/app/views/posts/partials/show/_information.html.erb index 5a6ab8ade..f6fad113f 100644 --- a/app/views/posts/partials/show/_information.html.erb +++ b/app/views/posts/partials/show/_information.html.erb @@ -1,6 +1,9 @@