Strip junk from Pixiv URLs before indexing, #726

This commit is contained in:
Coconut
2013-03-29 10:27:55 -04:00
parent 7e3324b5f0
commit 844a077599
2 changed files with 30 additions and 3 deletions

View File

@@ -150,14 +150,17 @@ class PostQueryBuilder
relation = relation.where("posts.is_deleted = FALSE")
end
# The SourcePattern SQL function replaces Pixiv sources with "pixiv/[suffix]", where
# [suffix] is everything past the second-to-last slash in the URL. It leaves non-Pixiv
# URLs unchanged. This is to ease database load for Pixiv source searches.
if q[:source]
if q[:source] == "none%"
relation = relation.where("(posts.source = '' OR posts.source IS NULL)")
elsif q[:source] =~ /pixiv/
raise ::Post::SearchError.new("pixiv source searches temporarily disabled")
elsif q[:source] =~ /^%\.?pixiv(\.net(\/img)?)?(%\/|(?=%$))(.+)$/
relation = relation.where("SourcePattern(posts.source) LIKE ? ESCAPE E'\\\\'", "pixiv/" + $5)
has_constraints!
else
relation = relation.where("posts.source LIKE ? ESCAPE E'\\\\'", q[:source])
relation = relation.where("SourcePattern(posts.source) LIKE SourcePattern(?) ESCAPE E'\\\\'", q[:source])
has_constraints!
end
end

View File

@@ -0,0 +1,24 @@
class ChangeSourcePatternIndexOnPosts < ActiveRecord::Migration
def up
execute "set statement_timeout = 0"
execute "DROP INDEX index_posts_on_pixiv_suffix;"
execute "DROP INDEX index_posts_on_source_pattern;"
execute "CREATE FUNCTION SourcePattern(src text) RETURNS text AS $$
BEGIN
RETURN regexp_replace(src, '^[^/]*(//)?[^/]*\.pixiv\.net/img.*(/[^/]*/[^/]*)$', 'pixiv\2');
END;
$$ LANGUAGE plpgsql IMMUTABLE RETURNS NULL ON NULL INPUT;"
execute "CREATE INDEX index_posts_on_source_pattern ON posts USING btree
((SourcePattern(source)) text_pattern_ops);"
end
def down
execute "set statement_timeout = 0"
execute "DROP INDEX index_posts_on_source_pattern;"
execute "DROP FUNCTION SourcePattern(text);"
execute "CREATE INDEX index_posts_on_source_pattern ON posts USING btree
(source text_pattern_ops);"
execute "CREATE INDEX index_posts_on_pixiv_suffix ON posts USING btree
((substring(source, 'pixiv.net/img.*/([^/]*/[^/]*)$')) text_pattern_ops);"
end
end