This issue was converted from https://jira.toolserver.org/browse/DBQ-45.
Summary: find articles with many Links to disambiguationspage
Issue type: Task - A task that needs to be done.
Priority: Minor
Status: Done
Assignee: merl <mewikipedia@to.mabomuja.de>
From: merl <mewikipedia@to.mabomuja.de>
Date: Tue, 07 Oct 2008 13:10:29
CREATE TABLE IF NOT EXISTS dewiki_disambiguations (page_title VARCHAR(255), page_id INT primary key, index(page_id));
DELETE FROM dewiki_disambiguations;
– All pages in the main namespace that are in the category "Begriffsklaerung"
INSERT INTO dewiki_disambiguations SELECT null, cl_from
FROM dewiki_p.categorylinks WHERE cl_to = "Begriffsklärung";
– Add Title
UPDATE dewiki_disambiguations d, dewiki_p.page p SET d.page_title = p.page_title
WHERE d.page_id=p.page_id AND p.page_namespace = 0;
– Delete pages in non article namespace (title of those was not set in query before)
DELETE FROM dewiki_disambiguations WHERE page_title IS NULL;
– All pages in the main namespace that redirect to a disambiguation page
INSERT IGNORE INTO dewiki_disambiguations SELECT null, rd_from
FROM dewiki_p.redirect r INNER JOIN dewiki_disambiguations d ON r.rd_title = d.page_title WHERE r.rd_namespace=0;
– Add Title
UPDATE dewiki_disambiguations d, dewiki_p.page p SET d.page_title = p.page_title
WHERE d.page_title IS NULL AND d.page_id=p.page_id AND p.page_namespace = 0;
– create link table
CREATE TABLE IF NOT EXISTS dewiki_pagelinksdab (page_title VARCHAR(255), page_id INT PRIMARY KEY, linkcount INT, INDEX(linkcount), INDEX(page_title));
– Links to dismabiguation pages
DELETE FROM dewiki_pagelinksdab;
– Links to disambiguations where the link source is in the main namespace
INSERT INTO dewiki_pagelinksdab SELECT NULL, pl_from, COUNT(p.pl_title) AS linkcount
FROM dewiki_p.pagelinks p INNER JOIN dewiki_disambiguations d ON p.pl_title = d.page_title
WHERE p.pl_namespace=0 GROUP BY p.pl_from HAVING linkcount > 20;
– Add Title
UPDATE dewiki_pagelinksdab d, dewiki_p.page p SET d.page_title = p.page_title
WHERE d.page_id=p.page_id AND p.page_namespace = 0;
– Delete pages in non article namespace (title of those was not set in query before)
DELETE FROM dewiki_pagelinksdab WHERE page_title IS NULL;
– Output in wiki format
SELECT CONCAT('# [[', page_title, ']]: ', ' (', linkcount, ')') FROM dewiki_pagelinksdab ORDER BY linkcount DESC limit 300;
Version: unspecified
Severity: minor