Merge pull request #4055 from r888888888/rollback-top-tagger

remove post keeper references
This commit is contained in:
Albert Yi
2019-02-04 16:02:29 -08:00
committed by GitHub
6 changed files with 2 additions and 310 deletions

View File

@@ -1,184 +0,0 @@
class PostKeeperManager
def self.enabled?
PostArchive.enabled?
end
# these are all class methods to simplify interaction with delayedjob
# in general we want to call these methods synchronously because updating
# the keeper data with a delay defeats the purpose. but this relies on
# archive db being up; we don't want to block updates in case it goes down.
# so we need to permit async updates also.
def self.queue_check(post_id, updater_id = nil)
delay(queue: "default", run_at: 1.minute.from_now).check_and_update(post_id)
end
def self.check_and_update(post, updater_id = nil, increment_tags = nil)
post = Post.find(post) unless post.is_a?(Post)
check_and_assign(post, updater_id, increment_tags)
post.update_column(:keeper_data, post.keeper_data)
rescue ActiveRecord::StatementInvalid => e
PostArchive.check_for_retry(e.message)
raise
end
def self.check_and_assign(post, updater_id = nil, increment_tags = nil)
post = Post.find(post) unless post.is_a?(Post)
keeper_id = check(post, updater_id, increment_tags)
post.keeper_data = {uid: keeper_id}
end
# because post archives might get delayed, we need to pass along the most
# recently added tags inside the job. downside: this doesn't keep track of
# source or rating changes. this method changes no state.
def self.check(post, updater_id = nil, increment_tags = nil, enable_async = true)
if enable_async && !PostArchive.test_connection
# if archive is down, just queue this work and do it later
queue_check(post.id, updater_id, increment_tags)
return
end
changes = {}
final_tags = Set.new(post.tag_array)
# build a mapping of who added a tag first
PostArchive.where(post_id: post.id).order("updated_at").each do |pa|
pa.added_tags.each do |at|
if pa.updater_id
if !changes.has_key?(at) && final_tags.include?(at)
changes[at] = pa.updater_id
end
if pa.source_changed? && pa.source == post.source
changes[" source"] = pa.updater_id
end
end
end
end
if updater_id && increment_tags.present?
increment_tags.each do |tag|
if !changes.has_key?(tag)
changes[tag] = updater_id
end
end
end
# add up how many changes each user has made
ranking = changes.values.uniq.inject({}) do |h, user_id|
h[user_id] = changes.select {|k, v| v == user_id}.size
h
end
ranking.max_by {|k, v| v}.try(:first)
end
# these methods are for reporting and are not used
# in general, unweighted changes attribution 5% of the time,
# weighted changes attribution 12% of the time at w=1000,
# up to 17% of the time at w=100.
def self.evaluate(post_ids)
total = 0
matches = 0
weighted_matches = 0
keeper_dist = {}
uploader_dist = {}
Post.where(id: post_ids).find_each do |post|
keeper = check(post)
total += 1
if keeper != post.uploader_id
matches += 1
# keeper_dist[keeper] ||= 0
# keeper_dist[keeper] += 1
# uploader_dist[post.uploader_id] ||= 0
# uploader_dist[post.uploader_id] += 1
end
if check_weighted(post) != post.uploader_id
puts post.id
weighted_matches += 1
end
end
puts "total: #{total}"
puts "unweighted changes: #{matches}"
puts "weighted changes: #{weighted_matches}"
# puts "keepers:"
# keeper_dist.each do |k, v|
# puts " #{k}: #{v}"
# end
# puts "uploaders:"
# uploader_dist.each do |k, v|
# puts " #{k}: #{v}"
# end
end
def self.print_weighted(post, w = 1000)
changes = {}
final_tags = Set.new(post.tag_array)
# build a mapping of who added a tag first
PostArchive.where(post_id: post.id).order("updated_at").each do |pa|
pa.added_tags.each do |at|
if pa.updater_id
if !changes.has_key?(at) && final_tags.include?(at)
changes[at] = pa.updater_id
end
if pa.source_changed? && pa.source == post.source
changes[" source"] = pa.updater_id
end
end
end
end
# add up how many changes each user has made
ranking = changes.values.uniq.inject({}) do |h, user_id|
h[user_id] = changes.select {|k, v| v == user_id}.map do |tag, user_id|
count = Tag.find_by_name(tag).try(:post_count) || 0
1.0 / (w + count)
end.sum
h
end
ranking.sort_by {|k, v| v}.each do |user_id, score|
user = User.find(user_id)
sum = changes.select {|k, v| v == user_id}.size
Rails.logger.debug "#{user.name}: %.4f (%d)" % [score, sum]
end
end
def self.check_weighted(post, w = 1000)
changes = {}
final_tags = Set.new(post.tag_array)
# build a mapping of who added a tag first
PostArchive.where(post_id: post.id).order("updated_at").each do |pa|
pa.added_tags.each do |at|
if pa.updater_id
if !changes.has_key?(at) && final_tags.include?(at)
changes[at] = pa.updater_id
end
if pa.source_changed? && pa.source == post.source
changes[" source"] = pa.updater_id
end
end
end
end
# add up how many changes each user has made
ranking = changes.values.uniq.inject({}) do |h, user_id|
h[user_id] = changes.select {|k, v| v == user_id}.map do |tag, user_id|
count = Tag.find_by_name(tag).try(:post_count) || 0
1.0 / (w + count)
end.sum
h
end
ranking.max_by {|k, v| v}.first
end
end

View File

@@ -59,31 +59,8 @@ class Post < ApplicationRecord
has_many :favorites
has_many :replacements, class_name: "PostReplacement", :dependent => :destroy
serialize :keeper_data, JSON
attr_accessor :old_tag_string, :old_parent_id, :old_source, :old_rating, :has_constraints, :disable_versioning, :view_count
concerning :KeeperMethods do
included do
before_create :initialize_keeper
end
def keeper_id
if PostKeeperManager.enabled?
(keeper_data && keeper_data["uid"]) ? keeper_data["uid"] : uploader_id
else
uploader_id
end
end
def keeper
User.find(keeper_id)
end
def initialize_keeper
self.keeper_data = {uid: uploader_id}
end
end
if PostArchive.enabled?
has_many :versions, -> { Rails.env.test? ? order("post_versions.updated_at ASC, post_versions.id ASC") : order("post_versions.updated_at ASC") }, :class_name => "PostArchive", :dependent => :destroy
end
@@ -620,14 +597,6 @@ class Post < ApplicationRecord
if decrement_tags.any?
Tag.decrement_post_counts(decrement_tags)
end
if PostKeeperManager.enabled? && persisted?
# no need to do this check on the initial create
PostKeeperManager.check_and_assign(self, CurrentUser.id, increment_tags)
# run this again async to check for race conditions
PostKeeperManager.queue_check(id, CurrentUser.id)
end
end
def set_tag_count(category,tagcount)

View File

@@ -111,7 +111,6 @@ class PostPresenter < Presenter
"data-pixiv-id" => post.pixiv_id,
"data-file-ext" => post.file_ext,
"data-source" => post.source,
"data-top-tagger" => post.keeper_id,
"data-uploader-id" => post.uploader_id,
"data-normalized-source" => post.normalized_source,
"data-is-favorited" => post.favorited_by?(CurrentUser.user.id)

View File

@@ -1,6 +1,5 @@
<ul>
<li>ID: <%= post.id %></li>
<li>Top Tagger: <%= link_to_user(post.keeper) %></li>
<% if CurrentUser.is_moderator? %>
<li>Uploader: <%= link_to_user(post.uploader) %></li>
<% end %>

View File

@@ -1,11 +1,9 @@
<div class="post-tooltip-header">
<span class="post-tooltip-header-left">
<% if CurrentUser.is_moderator? && @post.uploader != @post.keeper %>
<%= link_to_user @post.uploader %> +
<% if CurrentUser.is_moderator? %>
<%= link_to_user @post.uploader %>
<% end %>
<%= link_to_user @post.keeper %>
<%= link_to time_ago_in_words_tagged(@post.created_at, compact: true), posts_path(tags: "date:#{@post.created_at.strftime("%Y-%m-%d")}"), class: "post-tooltip-date post-tooltip-info" %>
<span class="post-tooltip-favorites post-tooltip-info">

View File

@@ -1,89 +0,0 @@
require 'test_helper'
class PostKeeperManagerTest < ActiveSupport::TestCase
subject { PostKeeperManager }
context "#check_and_update" do
context "when the connection is bad" do
setup do
@user = FactoryBot.create(:user)
as(@user) do
@post = FactoryBot.create(:post)
end
@post.stubs(:update_column).raises(ActiveRecord::StatementInvalid.new("can't get socket descriptor post_versions"))
end
should "retry" do
PostArchive.connection.expects(:reconnect!)
assert_raises(ActiveRecord::StatementInvalid) do
subject.check_and_update(@post)
end
end
end
end
context "#check_and_assign" do
setup do
Timecop.travel(1.month.ago) do
@alice = FactoryBot.create(:user)
@bob = FactoryBot.create(:user)
@carol = FactoryBot.create(:user)
end
PostArchive.sqs_service.stubs(:merge?).returns(false)
CurrentUser.scoped(@alice) do
@post = FactoryBot.create(:post)
end
CurrentUser.scoped(@bob) do
Timecop.travel(2.hours.from_now) do
@post.reload
@post.update(tag_string: "aaa bbb ccc")
end
end
CurrentUser.scoped(@carol) do
Timecop.travel(4.hours.from_now) do
@post.reload
@post.update(tag_string: "ccc ddd eee fff ggg")
end
end
end
should "update the post" do
assert_equal(3, @post.versions.count)
subject.check_and_assign(@post)
assert_equal({"uid" => @carol.id}, @post.keeper_data)
end
end
context "#check" do
setup do
Timecop.travel(1.month.ago) do
@alice = FactoryBot.create(:user)
@bob = FactoryBot.create(:user)
@carol = FactoryBot.create(:user)
end
CurrentUser.scoped(@alice) do
@post = FactoryBot.create(:post)
end
end
should "find the most frequent tagger for a post" do
assert_equal(@alice.id, subject.check(@post))
end
context "that is updated" do
setup do
CurrentUser.scoped(@bob) do
Timecop.travel(2.hours.from_now) do
@post.update_attributes(tag_string: "aaa bbb ccc")
end
end
end
should "find the most frequent tagger for a post" do
assert_equal(@carol.id, subject.check(@post, @carol.id, %w(ddd eee fff ggg)))
end
end
end
end