< User:Merge bot
Browse history interactively ← Previous edit Next edit → Content deleted Content addedVisual Wikitext Inline
Revision as of 13:45, 12 April 2020 edit Wbm1058 (talk | contribs )Autopatrolled , Administrators 264,845 edits v 1.93 – add reasons (for merging) to reports; list all other redirects at the end of the console report← Previous edit
Revision as of 18:13, 16 April 2020 edit undo Wbm1058 (talk | contribs )Autopatrolled , Administrators 264,845 edits v 1.94 – strip section links before testing to see if the page may have been mergedNext edit →
Line 28:
Line 28:
include("logininfo.php");
include("logininfo.php");
const bot_version = "1.93";
const bot_version = "1.94 ";
const botuser = "Merge bot";
const botuser = "Merge bot";
Line 382:
Line 382:
$target = trim($target);
$target = trim($target);
if ($target == $pagename) {
if (strpos ($target,"#") ! == false ) {
echo $otherpages . " redirects to " . $pagename . " -- may have been merged\n\n";
$len = strpos( $target , "# ") ;
if ($len == 0) $targetpage = $target;
else $targetpage = substr($target,0,$len); // strip section links
}
else $targetpage = $target;
if ($targetpage == $pagename) {
echo $otherpages . " redirects to " . $target . " -- may have been merged\n\n";
$maybes = $pagename . ":: " . $otherpages;
$maybes = $pagename . ":: " . $otherpages;
$maybe +=1;
$maybe +=1;
Revision as of 18:13, 16 April 2020
<?php
/** proposedmergers.php - To generate lists of proposed page mergers on Misplaced Pages
*
* (c) 2009 James Hare - http://en.wikipedia.org/User:Harej
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* Developers (add your self here if you worked on the code):
* James Hare - ] - Wrote everything
* WBM - ] - March/April 2013 updates
**/
ini_set("display_errors", 1);
error_reporting(E_ALL ^ E_NOTICE);
require_once 'botclasses.php'; // Botclasses.php was written by User:Chris_G and is available under the GNU General Public License
include("logininfo.php");
const bot_version = "1.94";
const botuser = "Merge bot";
function templateprocess($processed) {
echo " :: " . $processed;
$processed = preg_replace("/\s?\|\s?/", "|", $processed);
$processed = preg_replace("/\{{2}\s?/", "", $processed);
$processed = preg_replace("/\s?\}{2}/", "", $processed);
$processed = preg_replace("/\s?=\s?/", "=", $processed);
# Redirects to {{Merge}} (14 aliases)
$processed = preg_replace("/^(Mergewith|Merge with|Merge_with|MergeVfD|Mergesplit|MergeSplit|Merge)/i", "merge", $processed);
$processed = preg_replace("/^(Mergedisputed|MergeDisputed|Merge-disputed|Merge disputed|Merge_disputed)/i", "merge", $processed);
$processed = preg_replace("/^(Merge-multiple|Mergemulti|Mergetomultiple-with|Multimerge)/i", "merge", $processed);
$processed = preg_replace("/^(Proposed merge|Proposed_merge)/i", "merge", $processed);
# {{Merging}}
$processed = preg_replace("/^(Mergingsectionto|Merging|Merging to|Merging_to)(.*)\|\s*dir\s*\=\s*from/i", "mergefrom$2", $processed);
$processed = preg_replace("/^(Mergingsectionto|Merging|Merging to|Merging_to)/i", "merge", $processed);
# Redirects to {{Merge from}} (11 aliases)
$processed = preg_replace("/^(Merge from|Merge_from|Merge-from|Include|Mergrefrom|Mergefrom-category|MergeFrom|Mergefrom)/i", "mergefrom", $processed);
$processed = preg_replace("/^(Mergefrom-multiple|Multiplemergefrom|Mergefrommultiple|Mergefrommulti|Multimergefrom)/i", "mergefrom", $processed);
# {{Afd-merge from}}, {{Merging from}}
$processed = preg_replace("/^(Afd-merge from|Afd-merge_from|Afd-mergefrom|Afdmergefrom|Merging from|Merging_from|Mergingfrom)/i", "mergefrom", $processed);
# Redirects to {{Merge to}} (15 aliases) and {{Merge school}} (1 alias)
$processed = preg_replace("/^(Merge to|Merge_to|Merge-to|Mergeinto|MergetoCat|Mergelist|Mergeto-disputed)/i", "mergeto", $processed);
$processed = preg_replace("/^(MergePartial|Merge-multiple-to|Merge to article|Merge_to_article)/i", "mergeto", $processed);
$processed = preg_replace("/^(Mergeto-multiple|Multiplemergeto|Multiplemergeinto|Merge into|Merge_into|Merge-into|MergeTo|Mergeto|Merge2)/i", "mergeto", $processed);
$processed = preg_replace("/^(Merge school|Merge_school|Merge-school)/i", "mergeto", $processed);
# {{Afd-merge to}}
$processed = preg_replace("/^(Afd-merge to|Afd-merge_to|Afd-mergeto|Afdmergeto|AfD-merge to|AfD-merge_to)/i", "afd-mergeto", $processed);
# {{Merge portions from}} // This is really a splitting template, not a merging template
$processed = preg_replace("/^(Merge portions from|Merge_portions_from|Move portions from|Move_portions_from)/i", "false-positive", $processed);
return $processed;
}
echo "PHP version: " . PHP_VERSION . "\n";
#phpinfo();
echo "Bot version: " . bot_version . "\n";
$othernamespace = 0;
$matchfailed = 0;
$templatesplitfailed = 0;
$nopartner = 0;
$mismatched_date = 0;
$mismatched_dates = array();
$reason_count = 0;
$reasons = array();
$blank = 0;
$blanks = array();
$no_reciprocal = 0;
$redirects = 0;
$maybe = 0;
$maybes = array();
$diffcase = 0;
$diffcases = array();
$other_redirect = 0;
$other_redirects = array();
$selfmerge = 0;
$noname = 0;
$wikt = 0;
$mainsubmission = "";
echo "Logging in...\n";
$objwiki = new wikipedia();
$objwiki->http->useragent = '] php wikibot classes';
$objwiki->login($mbuser, $mbpass);
echo "...done.\n";
$mergemonths = $objwiki->categorymembers("Category:Articles to be merged");
print_r($mergemonths);
for ($a = 0; $a < count($mergemonths); $a++) { // for each month
#if (preg_match("/Category:Articles to be merged/", $mergemonths)) {
if (preg_match("/Category:Articles to be merged from/", $mergemonths)) {
echo "\n__________\n" . $a . ": " . $mergemonths . "\n";
$transcludes = $objwiki->categorymembers($mergemonths);
#print_r($transcludes);
$monthyear = str_replace("Category:Articles to be merged from ", "", $mergemonths, $repcount);
if ($repcount == 0) {
$monthyear = str_replace("Category:", "", $mergemonths);
}
for ($i = 0; $i < count($transcludes); $i++) { // for each page in the category
if ($i > $maxi) {
$maxi = $i;
}
if (preg_match("/^((User|Misplaced Pages|File|MediaWiki|Template|Help|Category|Portal)(( |_)talk)?|Talk):/", $transcludes)) {
echo "\n\n?? Other namespace: " . $transcludes . "\n\n";
$others = $transcludes;
$othernamespace += 1;
continue;
}
$getpagefailed = 0;
do {
if ($transcludes == $cachepage) {
echo "getpage 1: $transcludes|from cache\n";
$contents = $cachecontents;
}
else {
echo "getpage 1: $transcludes|";
$contents = $objwiki->getpage($transcludes);
}
if ($contents == FALSE) {
echo "\n\n?? getpage failed: " . $transcludes . "\n\n";
$getpagefailed += 1;
if ($getpagefailed == 5) {
die("getpage Error");
}
sleep(10);
continue;
}
} while ($contents == FALSE);
if (preg_match_all("/\{{2}\s?(merge|afd-merge|merging|move section)*\}{2}/i", $contents, $raw1) == 0) {
#echo "contents:\n";
#echo "$contents";
#echo "\n";
unset($contents);
echo "\n\n?? Match failed: " . $transcludes . "\n\n";
$nomatches = $transcludes;
$matchfailed += 1;
continue;
}
$mergetemplates = count($raw1);
if ($mergetemplates > 1) {
echo "$mergetemplates merge templates on $transcludes\n";
print_r($raw1);
}
$takeofflater = 0;
for ($j = 0; $j < $mergetemplates; $j++) { // for each merge template on the page
$key = $a . "-" . $i . "-" . $j;
$pagename = $transcludes;
echo $key . ">> " . $pagename;
$template = templateprocess($raw1);
#echo " § " . $template . " §";
$ptemplate = $raw1;
$pcontents = $contents;
unset($contents);
$templatesplit = array_map('trim',explode("|", $template));
echo " " . $templatesplit;
switch ($templatesplit) {
case "mergefrom":
$type = "into";
break;
case "merge":
$type = "with";
break;
case "mergeto":
$type = "to";
break;
case "afd-mergeto":
case "false-positive":
echo " . "]\n";
unset($pagename);
$takeofflater += 1;
continue 2;
default:
echo "*** templatesplitfailed: " . $template . "\n";
print_r($templatesplit);
$nosplits = $transcludes;
$templatesplitfailed += 1;
echo " . "]\n";
unset($pagename);
$takeofflater += 1;
continue 2;
}
$otherpage = "{{pagelist|nspace=|";
unset($otherpages);
$otherpages = array();
$otherpage_count = 0;
for ($para = 1; $para < count($templatesplit); $para++) {
#echo "\n" . $para . ") " . $templatesplit . "\n";
if (preg_match("/^date=/i", $templatesplit)) {
$date = str_replace("date=", "", $templatesplit);
$date = str_replace("_", " ", $date);
if ($date != $monthyear) {
if (in_array($transcludes, $mismatched_dates) == FALSE) {
$mismatched_dates = $transcludes;
$mismatched_date += 1;
}
echo " Another date: " . $date . " not " . $monthyear;
echo " . "]\n";
unset($pagename);
unset($type);
unset($date);
unset($discuss);
unset($target);
unset($section);
unset($multiplesections);
unset($reason);
$takeofflater += 1;
continue 2;
}
}
elseif (preg_match("/^(discuss|discussion|talk)=/i", $templatesplit)) {
$discuss = str_replace("discuss=", "", $templatesplit);
$discuss = str_replace("discussion=", "", $discuss);
$discuss = str_replace("talk=", "", $discuss);
}
elseif (preg_match("/^target=/i", $templatesplit)) {
$target = str_replace("target=", "", $templatesplit);
if (in_array($target, $pagename)) {
echo " . "]\n";
unset($pagename);
unset($type);
unset($date);
unset($discuss);
unset($target);
unset($section);
unset($multiplesections);
unset($reason);
$takeofflater += 1;
continue 2;
}
}
elseif (preg_match("/^section=/i", $templatesplit)) {
$section = str_replace("section=", "", $templatesplit);
}
elseif (preg_match("/^multiplesections=/i", $templatesplit)) { // Template:Mergefrom uses parameter multiplesections; Merge and Mergeto do not
$multiplesections = str_replace("multiplesections=", "", $templatesplit);
}
elseif (preg_match("/^dir=/i", $templatesplit)) { // Template:Merging
}
elseif (preg_match("/^(reason|comment)=/i", $templatesplit)) {
$reason = str_replace("reason=", "", $templatesplit);
$reasons = $pagename . " …… " . $reason;
$reason_count += 1;
}
else {
if (in_array($templatesplit, $pagename)) {
echo " . "]\n";
unset($pagename);
unset($type);
unset($date);
unset($discuss);
unset($target);
unset($section);
unset($multiplesections);
unset($reason);
$takeofflater += 1;
continue 2;
}
else {
if (preg_match("/^(1|2|3|4|5|6|7|8|9|10|11|12|13|14|15|16|17|18|19|20)=/i", $templatesplit)) {
echo "\nNumbered parameter(s):" . $templatesplit;
$templatesplit = str_replace("1=", "", $templatesplit);
}
$otherpage .= $templatesplit . "|";
$otherpages = $templatesplit;
$otherpage_count += 1;
}
}
}
$otherpage .= "}}";
echo " -- " . $otherpage . "\n";
#print_r($otherpages);
for ($ii = 0; $ii < count($otherpages); $ii++) {
$break = 0;
$pagecontents = "";
if (str_replace ("_"," ",$otherpages) != $otherpages) {
echo "\n$otherpages has underscores\n";
$pos = strpos($ptemplate, $otherpages);
$otherpages = str_replace ("_"," ",$otherpages);
if ($pos !== false) {
$new_template = substr_replace($ptemplate, $otherpages, $pos, strlen($otherpages));
$pcontents = str_replace($ptemplate,$new_template,$pcontents);
if ($objwiki->nobots($transcludes,botuser,$pcontents) == true)
$objwiki->edit($transcludes,$pcontents,"Replace underscores with spaces in merge template",true,true);
}
}
if (strpos($otherpages,"#") !== false) {
$len = strpos($otherpages,"#");
if ($len == 0) $mp = $pagename;
else $mp = substr($otherpages,0,$len); // strip section links
}
else $mp = $otherpages;
if ($pagename == $mp) {
echo "\n***** PAGE " . $mp . " PROPOSED FOR SELF-MERGE *****\n\n";
$selfmerge += 1;
}
else if ($mp == "") {
echo "\n**** Pagename not specified ****" . $ptemplate . "\n\n";
$noname += 1;
$new_template = str_replace ("||","|",$ptemplate);
if ($new_template !== $ptemplate) {
$pcontents = str_replace($ptemplate,$new_template,$pcontents);
if ($objwiki->nobots($transcludes,botuser,$pcontents) == true)
$objwiki->edit($transcludes,$pcontents,"Remove redundant pipe in merge template",true,true);
}
}
else if (preg_match("/^(Wiktionary|Wikt):/i",$otherpages)) {
echo "\n*** Wiktionary merge: " . $otherpages . " ***\n\n";
$wikt += 1;
$otherpage = preg_replace("/nspace=/", "nspace=wikt", $otherpage);
}
else {
while ($pagecontents == "") {
if ($break == 5) {
echo "\n*** " . $pagename . ": PAGE " . $otherpages . " IS BLANK OR DOES NOT EXIST ***\n\n";
$blanks = $pagename . ":: " . $otherpages;
unset($pagecontents);
$blank += 1;
break;
}
else {
echo "getpage 2: $otherpages|";
$pagecontents = $objwiki->getpage($otherpages);
if (preg_match_all("/\{{2}\s?(merge|afd-merge|merging|move section)*\}{2}/i", $pagecontents, $raw2) == 0) {
echo "*** Merge tag not found on " . $otherpages. " *2*\n";
$no_reciprocal += 1;
}
$cachepage = $otherpages;
$cachecontents = $pagecontents;
$break += 1;
}
}
if (preg_match("/^\#REDIRECT(\s*|:)\{2}/i", $pagecontents, $redirect)) {
echo "\n*** " . $pagename. ": PAGE " . $otherpages . " IS A REDIRECT!! ***\n";
echo $pagecontents . "\n\n";
$redirects += 1;
preg_match("/(?<=\{2}))/i", $redirect, $target);
echo "Target: " . $target . "\n";
$target = ucfirst($target);
$target = str_replace ("_"," ",$target);
$target = trim($target);
if (strpos($target,"#") !== false) {
$len = strpos($target,"#");
if ($len == 0) $targetpage = $target;
else $targetpage = substr($target,0,$len); // strip section links
}
else $targetpage = $target;
if ($targetpage == $pagename) {
echo $otherpages . " redirects to " . $target . " -- may have been merged\n\n";
$maybes = $pagename . ":: " . $otherpages;
$maybe +=1;
}
else if (strtoupper($otherpages) == strtoupper($target)) {
echo "Case difference\n";
$diffcases = $pagename . ":: " . $otherpages . " vs. " . $target;
$diffcase += 1;
$pos = strpos($ptemplate, $otherpages);
if ($pos !== false) {
$new_template = substr_replace($ptemplate, $target, $pos, strlen($otherpages));
$pcontents = str_replace($ptemplate,$new_template,$pcontents);
if ($objwiki->nobots($transcludes,botuser,$pcontents) == true)
$objwiki->edit($transcludes,$pcontents,"Bypass redirect in merge template – DIFFCAPS",true,true);
}
}
else {
$other_redirects = $pagename . ":: " . $otherpages;
$other_redirect += 1;
}
}
}
}
if ($otherpage == "{{pagelist|nspace=|}}") {
$nopartners = $transcludes;
$nopartner += 1;
if ($templatesplit == "mergeto") $type = "to ?";
}
else if (substr($otherpage,0,22) == "{{pagelist|nspace=wikt") {
#echo "\nwikt:\n";
}
else if ($templatesplit == "mergeto") {
$break = 0;
$pagecontents = "";
while ($pagecontents == "") {
if ($break == 5) {
echo "\n*** " . $pagename . ": PAGE " . $otherpages . " is BLANK or DOES NOT EXIST ***\n\n";
unset($pagecontents);
break;
}
else {
if ($otherpages == $cachepage) {
echo "getpage 3: $otherpages|from cache\n";
$pagecontents = $cachecontents;
}
else {
echo "getpage 3: $otherpages|";
$pagecontents = $objwiki->getpage($otherpages);
if (preg_match_all("/\{{2}\s?(merge|afd-merge|merging|move section)*\}{2}/i", $pagecontents, $raw3) == 0) {
echo "*** Merge tag not found on " . $otherpages. " *3*\n";
$no_reciprocal += 1;
}
}
$break += 1;
}
}
if (preg_match_all("/\{{2}\s?(merge|afd-merge|merging|move section)*\}{2}/i", $pagecontents, $raw) !== 0) {
$reciprocal = FALSE;
for ($jraw = 0; $jraw < count($raw); $jraw++) {
echo "(" . $jraw . ") " . $raw;
$pos = stripos($raw,$pagename);
if ($pos === false) {
echo " ** " . $pagename . " not found!";
}
else {
echo " ** found " . $pagename;
$reciprocal = TRUE;
}
}
if ($reciprocal == FALSE) {
echo "\n*** Reciprocal merge tag not found on " . $otherpages. " ***\n";
$no_reciprocal += 1;
}
else {
echo "\n";
echo "mergeto target: " . $otherpage . " ◆ " . $otherpages;
echo " . "]\n";
unset($pagename);
unset($type);
$takeofflater += 1;
continue;
}
}
}
if ($discuss == "") {
if (preg_match("/^(User|Misplaced Pages|Image|File|MediaWiki|Template|Help|Category|Portal):/i",$transcludes,$m)) {
$discuss = str_replace($m,$m.' talk',$transcludes);
}
else {
$discuss = "Talk:" . $transcludes;
}
}
}
}
$j = $j - $takeofflater;
if (count($transcludes) > 0) {
$mainsubmission .= "* ]\n";
}
else {
echo "\n*** Category is empty ***\n\n";
}
}
else {
echo "\n" . $a . ": Skipped: " . $mergemonths . "\n";
$skipped += 1;
}
}
if (count($pagename) < 1) {
echo "No pagenames! Exiting.";
die();
}
echo "\n*** Preparing Updates ***\n\n";
print_r($pagename);
foreach ($pagename as $key => $title) {
if ($counter] > $maxcounter) {
$maxcounter = $counter];
}
switch ($type) {
case "with": // regular merge
if ($target != "") {
$submission] .= "{{merge log entry|1=]|2=$otherpage|3=]]|talk=$discuss|type=with|reason=$reason}}\n";
}
else {
$submission] .= "{{merge log entry|1=]|2=$otherpage|talk=$discuss|type=with|reason=$reason}}\n";
}
break;
case "into": // merge-from
$submission] .= "{{merge log entry|1=$otherpage|2=]|talk=$discuss|type=into|reason=$reason}}\n";
break;
case "to": // merge-to
$submission] .= "{{merge log entry|1=]|2=$otherpage|talk=$discuss|type=into|reason=$reason}}\n";
break;
case "to ?": // no partner
$submission] .= "{{merge log entry|1=]|2=?|talk=$discuss|type=into|reason=$reason}}\n";
break;
default:
echo "\n*error* " . $type . "\n";
break; // use "continue 2"?
}
$counter] += 1;
echo $date . " | " . $counter] . ") " . $key . " ++ " . $title. "\n";
}
echo "\n\n*** Posting Updates ***\n\n";
foreach ($submission as $log => $content) {
echo "Updating> " . $log. "\n";
#echo $content . "\n";
$objwiki->edit("Misplaced Pages:Proposed mergers/Log/" . $log,$content,"Updating list of proposed mergers",false,true);
}
echo "Updating Misplaced Pages:Proposed mergers/Log\n";
$objwiki->edit("Misplaced Pages:Proposed mergers/Log",$mainsubmission,"Updating list of logs",true,true);
echo "done.\n";
echo "\nCategory members skipped: " . $skipped;
echo "\nMaximum pages in a category: " . $maxi;
echo "\nMaximum items on a report page: " . $maxcounter;
echo "\nOther namespaces skipped: " . $othernamespace . "\n";
print_r($others);
echo "\nPattern match failed: " . $matchfailed . "\n";
print_r($nomatches);
echo "\nTemplatesplit failed: " . $templatesplitfailed . "\n";
print_r($nosplits);
echo "\nNo partner: " . $nopartner . "\n";
print_r($nopartners);
echo "\nPages with multiple proposal dates: " . $mismatched_date . "\n";
print_r($mismatched_dates);
echo "\nPages where reasons are given in the template: " . $reason_count . "\n";
print_r($reasons);
echo "\nOther pages which are blank or don't exist: " . $blank . "\n";
print_r($blanks);
echo "\nOther pages which are redirects: " . $redirects;
echo "\n Maybe merged: " . $maybe . "\n";
print_r($maybes);
echo "\n Cases differ: " . $diffcase . "\n";
print_r($diffcases);
echo "\n Other redirects: " . $other_redirect . "\n";
print_r($other_redirects);
echo "\nMerge-to targets without reciprocal merge tags: " . $no_reciprocal . "\n";
echo "\nSelf-merges: " . $selfmerge . "\n";
echo "\nNo name specified: " . $noname . "\n";
echo "\nWiktionary merges: " . $wikt . "\n";
echo "\nMission accomplished.\n\n";
?>