From 3d9c6fef1d8d6445c0344457c778aaa3bc81d4de Mon Sep 17 00:00:00 2001 From: evazion Date: Fri, 11 Oct 2019 16:43:34 -0500 Subject: [PATCH] related tags: fix wiki page tag extraction. * Parse the wiki page with the actual dtext parser instead of by hand. This is so that wiki links inside things like [nodtext] or [code] blocks are handled properly. * Only include tags that exist and are nonempty. Don't include links to dead pages or blank tags. --- app/logical/d_text.rb | 12 ++++++++++++ app/logical/related_tag_query.rb | 6 +----- app/models/wiki_page.rb | 9 ++------- 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/app/logical/d_text.rb b/app/logical/d_text.rb index a0818a17f..ad9d31eae 100644 --- a/app/logical/d_text.rb +++ b/app/logical/d_text.rb @@ -19,6 +19,18 @@ class DText names.uniq end + def self.parse_wiki_titles(text) + html = DTextRagel.parse(text) + fragment = Nokogiri::HTML.fragment(html) + + titles = fragment.css("a.dtext-wiki-link").map do |node| + title = node["href"][%r!\A/wiki_pages/show_or_new\?title=(.*)\z!i, 1] + title = CGI.unescape(title) + title = WikiPage.normalize_title(title) + title + end + end + def self.strip_blocks(string, tag) n = 0 stripped = "" diff --git a/app/logical/related_tag_query.rb b/app/logical/related_tag_query.rb index 77310a105..018faefa9 100644 --- a/app/logical/related_tag_query.rb +++ b/app/logical/related_tag_query.rb @@ -42,11 +42,7 @@ class RelatedTagQuery end def wiki_page_tags - results = wiki_page.try(:tags) || [] - results.reject! do |name| - name =~ /^(?:list_of_|tag_group|pool_group|howto:|about:|help:|template:)/ - end - results + wiki_page.try(:tags) || [] end def other_wiki_pages diff --git a/app/models/wiki_page.rb b/app/models/wiki_page.rb index 5e1ddd79a..236bb24cf 100644 --- a/app/models/wiki_page.rb +++ b/app/models/wiki_page.rb @@ -200,13 +200,8 @@ class WikiPage < ApplicationRecord end def tags - body.scan(/\[\[(.+?)\]\]/).flatten.map do |match| - if match =~ /^(.+?)\|(.+)/ - $1 - else - match - end - end.map {|x| x.mb_chars.downcase.tr(" ", "_").to_s}.uniq + titles = DText.parse_wiki_titles(body) + Tag.nonempty.where(name: titles.uniq).pluck(:name) end def visible?