bulk revert

This commit is contained in:
Albert Yi
2016-11-04 11:33:59 -07:00
parent 619beb00af
commit a22a7c3302
5 changed files with 67 additions and 19 deletions

View File

@@ -1,6 +1,17 @@
class BulkRevert
BIG_QUERY_LIMIT = 5_000
attr_reader :constraints
class ConstraintTooGeneralError < Exception ; end
def self.process(constraints)
obj = BulkRevert.new(constraints)
obj.find_post_versions.order("updated_at, id").each do |version|
version.undo!
end
end
def initialize(constraints)
@constraints = constraints
end
@@ -18,14 +29,19 @@ class BulkRevert
if constraints[:user_id]
q = q.where("post_versions.updater_id = ?", constraints[:user_id])
end
if constraints[:added_tags] || constraints[:removed_tags]
hash = CityHash.hash64("#{constraints[:added_tags]} #{constraints{removed_tags}}").to_s(36)
sub_ids = Cache.get("br/fpv/#{hash}", 300) do
sub_ids = GoogleBigQuery::PostVersion.new.find(constraints[:user_id], constraints[:added_tags], constraints[:removed_tags])
end
q = q.where("post_versions.id in (?)", sub_ids)
if constraints[:added_tags] || constraints[:removed_tags]
hash = CityHash.hash64("#{constraints[:added_tags]} #{constraints{removed_tags}} #{constraints[:min_version_id]} #{constraints[:max_version_id]}").to_s(36)
sub_ids = Cache.get("br/fpv/#{hash}", 300) do
GoogleBigQuery::PostVersion.new.find(constraints[:user_id], constraints[:added_tags], constraints[:removed_tags], constraints[:min_version_id], constraints[:max_version_id], BIG_QUERY_LIMIT)
end
if sub_ids.size >= BIG_QUERY_LIMIT
raise ConstraintTooGeneralError.new
end
q = q.where("post_versions.id in (?)", sub_ids)
end
if constraints[:min_version_id].present?

View File

@@ -3,34 +3,50 @@ module GoogleBigQuery
def find_removed(tag, limit = 1_000)
tag = escape(tag)
limit = limit.to_i
query("select id, post_id, updated_at, updater_id, updater_ip_addr, tags, added_tags, removed_tags, parent_id, rating, source from [#{data_set}.post_versions] where regexp_match(removed_tags, \"(?:^| )#{tag}(?:$| )\") order by updated_at desc limit #{limit}")
query("select id, post_id, updated_at, updater_id, updater_ip_addr, tags, added_tags, removed_tags, parent_id, rating, source from [#{data_set}.post_versions] where #{remove_tag_condition(tag)} order by updated_at desc limit #{limit}")
end
def find_added(tag, limit = 1_000)
tag = escape(tag)
limit = limit.to_i
query("select id, post_id, updated_at, updater_id, updater_ip_addr, tags, added_tags, removed_tags, parent_id, rating, source from [#{data_set}.post_versions] where regexp_match(added_tags, \"(?:^| )#{tag}(?:$| )\") order by updated_at desc limit #{limit}")
query("select id, post_id, updated_at, updater_id, updater_ip_addr, tags, added_tags, removed_tags, parent_id, rating, source from [#{data_set}.post_versions] where #{add_tag_condition(tag)} order by updated_at desc limit #{limit}")
end
def find(user_id, added_tags, removed_tags, limit = 1_000)
def add_tag_condition(t)
es = escape(t)
"regexp_match(added_tags, \"(?:^| )#{es}(?:$| )\")"
end
def remove_tag_condition(t)
es = escape(t)
"regexp_match(removed_tags, \"(?:^| )#{es}(?:$| )\")"
end
def find(user_id, added_tags, removed_tags, min_version_id, max_version_id, limit = 1_000)
constraints = []
constraints << "updater_id = #{user_id.to_i}"
if added_tags
added_tags.scan(/\S+/).each do |tag|
escaped = escape(tag)
constraints << "regexp_match(added_tags, \"(?:^| )#{escaped}(?:$| )\")"
constraints << add_tag_condition(tag)
end
end
if removed_tags
removed_tags.scan(/\S+/).each do |tag|
escaped = escape(tag)
constraints << "not regexp_match(added_tags, \"(?:^| )#{escaped}(?:$| )\")"
constraints << remove_tag_condition(tag)
end
end
if min_version_id
constraints << "id >= #{min_version_id.to_i}"
end
if max_version_id
constraints << "id <= #{max_version_id.to_i}"
end
limit = limit.to_i
sql = "select id from [#{data_set}.post_versions] where " + constraints.join(" and ") + " order by updated_at desc limit #{limit}"
result = query(sql)