Misplaced Pages

User:Merge bot/proposedmergers.php

Article snapshot taken from Wikipedia with creative commons attribution-sharealike license. Give it a read and then ask your questions in the chat. We can research this topic together.
< User:Merge bot

This is an old revision of this page, as edited by Wbm1058 (talk | contribs) at 23:03, 10 April 2020 (v 1.91 – Remove redundant pipes in merge templates). The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.

Revision as of 23:03, 10 April 2020 by Wbm1058 (talk | contribs) (v 1.91 – Remove redundant pipes in merge templates)(diff) ← Previous revision | Latest revision (diff) | Newer revision → (diff)
<?php
/** proposedmergers.php - To generate lists of proposed page mergers on Misplaced Pages
 *
 *  (c) 2009 James Hare - http://en.wikipedia.org/User:Harej
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *   
 *  Developers (add your self here if you worked on the code):
 *    James Hare - ] - Wrote everything
 *    WBM - ] - March/April 2013 updates
 **/
ini_set("display_errors", 1);
error_reporting(E_ALL ^ E_NOTICE);
require_once 'botclasses.php';  // Botclasses.php was written by User:Chris_G and is available under the GNU General Public License
include("logininfo.php");
const bot_version = "1.91";
const botuser = "Merge bot";
function templateprocess($processed) {
	echo " :: " . $processed;
	$processed = preg_replace("/\s?\|\s?/", "|", $processed);
	$processed = preg_replace("/\{{2}\s?/", "", $processed);
	$processed = preg_replace("/\s?\}{2}/", "", $processed);
	$processed = preg_replace("/\s?=\s?/", "=", $processed);
#				Redirects to {{Merge}} (14 aliases)
	$processed = preg_replace("/^(Mergewith|Merge with|Merge_with|MergeVfD|Mergesplit|MergeSplit|Merge)/i", "merge", $processed);
	$processed = preg_replace("/^(Mergedisputed|MergeDisputed|Merge-disputed|Merge disputed|Merge_disputed)/i", "merge", $processed);
	$processed = preg_replace("/^(Merge-multiple|Mergemulti|Mergetomultiple-with|Multimerge)/i", "merge", $processed);
	$processed = preg_replace("/^(Proposed merge|Proposed_merge)/i", "merge", $processed);
#				{{Merging}}
	$processed = preg_replace("/^(Mergingsectionto|Merging|Merging to|Merging_to)/i", "merge", $processed);
#				Redirects to {{Merge from}} (11 aliases)
	$processed = preg_replace("/^(Merge from|Merge_from|Merge-from|Include|Mergrefrom|Mergefrom-category|MergeFrom|Mergefrom)/i", "mergefrom", $processed);
	$processed = preg_replace("/^(Mergefrom-multiple|Multiplemergefrom|Mergefrommultiple|Mergefrommulti|Multimergefrom)/i", "mergefrom", $processed);
#				{{Afd-merge from}}, {{Merging from}}
	$processed = preg_replace("/^(Afd-merge from|Afd-merge_from|Afd-mergefrom|Afdmergefrom|Merging from|Merging_from|Mergingfrom)/i", "mergefrom", $processed);
#				Redirects to {{Merge to}} (15 aliases) and {{Merge school}} (1 alias)
	$processed = preg_replace("/^(Merge to|Merge_to|Merge-to|Mergeinto|MergetoCat|Mergelist|Mergeto-disputed)/i", "mergeto", $processed);
	$processed = preg_replace("/^(MergePartial|Merge-multiple-to|Merge to article|Merge_to_article)/i", "mergeto", $processed);
	$processed = preg_replace("/^(Mergeto-multiple|Multiplemergeto|Multiplemergeinto|Merge into|Merge_into|Merge-into|MergeTo|Mergeto|Merge2)/i", "mergeto", $processed);
	$processed = preg_replace("/^(Merge school|Merge_school|Merge-school)/i", "mergeto", $processed);
#				{{Afd-merge to}}
	$processed = preg_replace("/^(Afd-merge to|Afd-merge_to|Afd-mergeto|Afdmergeto|AfD-merge to|AfD-merge_to)/i", "afd-mergeto", $processed);
#				{{Merge portions from}}  // This is really a splitting template, not a merging template
	$processed = preg_replace("/^(Merge portions from|Merge_portions_from|Move portions from|Move_portions_from)/i", "false-positive", $processed);
	return $processed;
}
echo "PHP version: " . PHP_VERSION . "\n";
#phpinfo();
echo "Bot version: " . bot_version . "\n";
$othernamespace = 0;
$matchfailed = 0;
$templatesplitfailed = 0;
$nopartner = 0;
$mismatched_date = 0;
$mismatched_dates = array();
$reason_count = 0;
$reasons = array();
$blank = 0;
$blanks = array();
$no_reciprocal = 0;
$redirects = 0;
$maybe = 0;
$maybes = array();
$diffcase = 0;
$diffcases = array();
$selfmerge = 0;
$noname = 0;
$wikt = 0;
$mainsubmission = "";
echo "Logging in...\n";
$objwiki = new wikipedia();
$objwiki->http->useragent = '] php wikibot classes';
$objwiki->login($mbuser, $mbpass);
echo "...done.\n";
$mergemonths = $objwiki->categorymembers("Category:Articles to be merged");
print_r($mergemonths);
for ($a = 0; $a < count($mergemonths); $a++) { // for each month
	#if (preg_match("/Category:Articles to be merged/", $mergemonths)) {
	if (preg_match("/Category:Articles to be merged from/", $mergemonths)) {
		echo "\n__________\n" . $a . ": " . $mergemonths . "\n";
		$transcludes = $objwiki->categorymembers($mergemonths);
		#print_r($transcludes);
		$monthyear = str_replace("Category:Articles to be merged from ", "", $mergemonths, $repcount);
		if ($repcount == 0) {
			$monthyear = str_replace("Category:", "", $mergemonths);
		}
		for ($i = 0; $i < count($transcludes); $i++) { // for each page in the category
			if ($i > $maxi) {
				$maxi = $i;
			}
			if (preg_match("/^((User|Misplaced Pages|File|MediaWiki|Template|Help|Category|Portal)(( |_)talk)?|Talk):/", $transcludes)) {
				echo "\n\n?? Other namespace: " . $transcludes . "\n\n";
				$others = $transcludes;
				$othernamespace += 1;
				continue;
			}
			$getpagefailed = 0;
			do {
				if ($transcludes == $cachepage) {
					echo "getpage 1: $transcludes|from cache\n";
					$contents = $cachecontents;
				}
				else {
					echo "getpage 1: $transcludes|";
					$contents = $objwiki->getpage($transcludes);
				}
				if ($contents == FALSE) {
					echo "\n\n?? getpage failed: " . $transcludes . "\n\n";
					$getpagefailed += 1;
					if ($getpagefailed == 5) {
						die("getpage Error");
					}
					sleep(10);
					continue;
				}
			} while ($contents == FALSE);
			if (preg_match_all("/\{{2}\s?(merge|afd-merge|merging|move section)*\}{2}/i", $contents, $raw1) == 0) {
				#echo "contents:\n";
        			#echo "$contents";
        			#echo "\n";
				unset($contents);
				echo "\n\n?? Match failed: " . $transcludes . "\n\n";
				$nomatches = $transcludes;
				$matchfailed += 1;
				continue;
			}
			$mergetemplates = count($raw1);
			if ($mergetemplates > 1) {
				echo "$mergetemplates merge templates on $transcludes\n";
				print_r($raw1);
			}
			$takeofflater = 0;
			for ($j = 0; $j < $mergetemplates; $j++) { // for each merge template on the page
				$key = $a . "-" . $i . "-" . $j;
				$pagename = $transcludes;
				echo $key . ">> " . $pagename;
				$template = templateprocess($raw1);
				$ptemplate = $raw1;
				$pcontents = $contents;
				unset($contents);
				$templatesplit = array_map('trim',explode("|", $template));
				echo " " . $templatesplit;
				switch ($templatesplit) {
					case "mergefrom":
						$type = "into";
						break;
					case "merge":
						$type = "with";
						break;
					case "mergeto":
						$type = "to";
						break;
					case "afd-mergeto":
					case "false-positive":
						echo "  . "]\n";
						unset($pagename);
						$takeofflater += 1;
						continue 2;
					default:
						echo "*** templatesplitfailed: " . $template . "\n";
						print_r($templatesplit);
						$nosplits = $transcludes;
						$templatesplitfailed += 1;
						echo "  . "]\n";
						unset($pagename);
						$takeofflater += 1;
						continue 2;
				}
				$otherpage = "{{pagelist|nspace=|";
				unset($otherpages);
				$otherpages = array();
				$otherpage_count = 0;
				for ($para = 1; $para < count($templatesplit); $para++) {
					if (preg_match("/^date=/i", $templatesplit)) {
						$date = str_replace("date=", "", $templatesplit);
						$date = str_replace("_", " ", $date);
						if ($date != $monthyear) {
							if (in_array($transcludes, $mismatched_dates) == FALSE) {
								$mismatched_dates = $transcludes;
								$mismatched_date += 1;
							}
							echo " Another date: " . $date . " not " . $monthyear;
							echo "  . "]\n";
							unset($pagename);
							unset($type);
							unset($date);
							unset($discuss);
							unset($target);
							unset($section);
							unset($multiplesections);
							unset($reason);
							$takeofflater += 1;
							continue 2;
						}
					}
					elseif (preg_match("/^(discuss|discussion)=/i", $templatesplit)) {
						$discuss = str_replace("discuss=", "", $templatesplit);
						$discuss = str_replace("discussion=", "", $discuss);
					}
					elseif (preg_match("/^target=/i", $templatesplit)) {
						$target = str_replace("target=", "", $templatesplit);
						if (in_array($target, $pagename)) {
							echo "  . "]\n";
							unset($pagename);
							unset($type);
							unset($date);
							unset($discuss);
							unset($target);
							unset($section);
							unset($multiplesections);
							unset($reason);
							$takeofflater += 1;
							continue 2;
						}
					}
					elseif (preg_match("/^section=/i", $templatesplit)) {
						$section = str_replace("section=", "", $templatesplit);
					}
					elseif (preg_match("/^multiplesections=/i", $templatesplit)) { // Template:Mergefrom uses parameter multiplesections; Merge and Mergeto do not
						$multiplesections = str_replace("multiplesections=", "", $templatesplit);
					}
					elseif (preg_match("/^(reason|comment)=/i", $templatesplit)) {
						$reason = str_replace("reason=", "", $templatesplit);
						$reasons = $pagename . " …… " . $reason;
						$reason_count += 1;
					}
					else {
						if (in_array($templatesplit, $pagename)) {
							echo "  . "]\n";
							unset($pagename);
							unset($type);
							unset($date);
							unset($discuss);
							unset($target);
							unset($section);
							unset($multiplesections);
							unset($reason);
							$takeofflater += 1;
							continue 2;
						}
						else {
							if (preg_match("/^(1|2|3|4|5|6|7|8|9|10|11|12|13|14|15|16|17|18|19|20)=/i", $templatesplit)) {
								echo "\nNumbered parameter(s):" . $templatesplit;
								$templatesplit = str_replace("1=", "", $templatesplit);
							}
							$otherpage .= $templatesplit . "|";
							$otherpages = $templatesplit;
							$otherpage_count += 1;
						}
					}
				}
				$otherpage .= "}}";
				echo " -- " . $otherpage . "\n";
				#print_r($otherpages);
				for ($ii = 0; $ii < count($otherpages); $ii++) {
					$break = 0;
					$pagecontents = "";
					if (str_replace ("_"," ",$otherpages) != $otherpages) {
						echo "\n$otherpages has underscores\n";
						$pos = strpos($ptemplate, $otherpages);
						$otherpages = str_replace ("_"," ",$otherpages);
						if ($pos !== false) {
					            $new_template = substr_replace($ptemplate, $otherpages, $pos, strlen($otherpages));
					            $pcontents = str_replace($ptemplate,$new_template,$pcontents);
					            if ($objwiki->nobots($transcludes,botuser,$pcontents) == true)
					                $objwiki->edit($transcludes,$pcontents,"Replace underscores with spaces in merge template",true,true);
						}
					}
					if (strpos($otherpages,"#") !== false) {
						$len = strpos($otherpages,"#");
						if ($len == 0) $mp = $pagename;
						else $mp = substr($otherpages,0,$len); // strip section links
					}
					else $mp = $otherpages;
					if ($pagename == $mp) {
						echo "\n***** PAGE " . $mp . " PROPOSED FOR SELF-MERGE *****\n\n";
						$selfmerge += 1;
					}
					else if ($mp == "") {
						echo "\n**** Pagename not specified ****" . $ptemplate . "\n\n";
						$noname += 1;
					        $new_template = str_replace ("||","|",$ptemplate);
					        if ($new_template !== $ptemplate) {
					            	$pcontents = str_replace($ptemplate,$new_template,$pcontents);
					            	if ($objwiki->nobots($transcludes,botuser,$pcontents) == true)
					            	    $objwiki->edit($transcludes,$pcontents,"Remove redundant pipe in merge template",true,true);
					        }
					}
					else if (preg_match("/^(Wiktionary|Wikt):/i",$otherpages)) {
						echo "\n*** Wiktionary merge: " . $otherpages . " ***\n\n";
						$wikt += 1;
						$otherpage = preg_replace("/nspace=/", "nspace=wikt", $otherpage);
					}
					else {
					    while ($pagecontents == "") {
					        if ($break == 5) {
					            echo "\n*** " . $pagename . ": PAGE " . $otherpages . " IS BLANK OR DOES NOT EXIST ***\n\n";
					            $blanks = $pagename . ":: " . $otherpages;
					            unset($pagecontents);
					            $blank += 1;
					            break;
					        }
					        else {
					            echo "getpage 2: $otherpages|";
					            $pagecontents = $objwiki->getpage($otherpages);
					            if (preg_match_all("/\{{2}\s?(merge|afd-merge|merging|move section)*\}{2}/i", $pagecontents, $raw2) == 0) {
					                echo "*** Merge tag not found on " . $otherpages. " *2*\n";
					                $no_reciprocal += 1;
					            }
					            $cachepage = $otherpages;
					            $cachecontents = $pagecontents;
					            $break += 1;
					        }
					    }
					    if (preg_match("/^\#REDIRECT(\s*|:)\{2}/i", $pagecontents, $redirect)) {
					        echo "\n*** " . $pagename. ": PAGE " . $otherpages . " IS A REDIRECT!! ***\n";
					        echo $pagecontents . "\n\n";
					        $redirects += 1;
					        preg_match("/(?<=\{2}))/i", $redirect, $target);
					        echo "Target: " . $target . "\n";
					        $target = ucfirst($target);
					        $target = str_replace ("_"," ",$target);
					        $target = trim($target);
					        if ($target == $pagename) {
					            echo $otherpages . " redirects to " . $pagename . " -- may have been merged\n\n";
					            $maybes = $pagename . ":: " . $otherpages;
					            $maybe +=1;
					        }
					        else if (strtoupper($otherpages) == strtoupper($target)) {
					            echo "Case difference\n";
					            $diffcases =  $pagename . ":: " . $otherpages . " vs. " . $target;
					            $diffcase += 1;
					            $pos = strpos($ptemplate, $otherpages);
					            if ($pos !== false) {
					                $new_template = substr_replace($ptemplate, $target, $pos, strlen($otherpages));
					                $pcontents = str_replace($ptemplate,$new_template,$pcontents);
					                if ($objwiki->nobots($transcludes,botuser,$pcontents) == true)
					                    $objwiki->edit($transcludes,$pcontents,"Bypass redirect in merge template – DIFFCAPS",true,true);
					            }
					        }
					    }
					}
				}
				if ($otherpage == "{{pagelist|nspace=|}}") {
					$nopartners = $transcludes;
					$nopartner += 1;
					if ($templatesplit == "mergeto") $type = "to ?";
				}
				else if (substr($otherpage,0,22) == "{{pagelist|nspace=wikt") {
					#echo "\nwikt:\n";
				}
				else if ($templatesplit == "mergeto") {
					$break = 0;
					$pagecontents = "";
					while ($pagecontents == "") {
						if ($break == 5) {
							echo "\n*** " . $pagename . ": PAGE " . $otherpages . " is BLANK or DOES NOT EXIST ***\n\n";
							unset($pagecontents);
							break;
						}
						else {
							if ($otherpages == $cachepage) {
								echo "getpage 3: $otherpages|from cache\n";
								$pagecontents = $cachecontents;
							}
							else {
								echo "getpage 3: $otherpages|";
								$pagecontents = $objwiki->getpage($otherpages);
								if (preg_match_all("/\{{2}\s?(merge|afd-merge|merging|move section)*\}{2}/i", $pagecontents, $raw3) == 0) {
									echo "*** Merge tag not found on " . $otherpages. " *3*\n";
									$no_reciprocal += 1;
								}
							}
							$break += 1;
						}
					}
					if (preg_match_all("/\{{2}\s?(merge|afd-merge|merging|move section)*\}{2}/i", $pagecontents, $raw) !== 0) {
						$reciprocal = FALSE;
						for ($jraw = 0; $jraw < count($raw); $jraw++) {
							echo "(" . $jraw . ") " . $raw;
							$pos = stripos($raw,$pagename);
							if ($pos === false) {
								echo " ** " . $pagename . " not found!";
							}
							else {
								echo " ** found " . $pagename;
								$reciprocal = TRUE;
							}
						}
						if ($reciprocal == FALSE) {
							echo "\n*** Reciprocal merge tag not found on " . $otherpages. " ***\n";
							$no_reciprocal += 1;
						}
						else {
							echo "\n";
							echo "mergeto target: " . $otherpage . " ◆ " . $otherpages;
							echo "  . "]\n";
							unset($pagename);
							unset($type);
							$takeofflater += 1;
							continue;
						}
					}
				}
				if ($discuss == "") {
					if (preg_match("/^(User|Misplaced Pages|Image|File|MediaWiki|Template|Help|Category|Portal):/i",$transcludes,$m)) {
						$discuss = str_replace($m,$m.' talk',$transcludes);
					}
					else {
						$discuss = "Talk:" . $transcludes;
					}
				}
			}
		}
		$j = $j - $takeofflater;
		if (count($transcludes) > 0) {
			$mainsubmission .= "* ]\n";
		}
		else {
			echo "\n*** Category is empty ***\n\n";
		}
	}
	else {
		echo "\n" . $a . ": Skipped: " . $mergemonths . "\n";
		$skipped += 1;
	}
}
if (count($pagename) < 1) {
	echo "No pagenames! Exiting.";
	die();
}
echo "\n*** Preparing Updates ***\n\n";
print_r($pagename);
foreach ($pagename as $key => $title) {
	if ($counter] > $maxcounter) {
		$maxcounter = $counter];
	}
	switch ($type) {
		case "with":  // regular merge
			if ($target != "") {
				$submission] .= "{{merge log entry|1=]|2=$otherpage|3=]]|talk=$discuss|type=with}}\n";
			}
			else {
				$submission] .= "{{merge log entry|1=]|2=$otherpage|talk=$discuss|type=with}}\n";
			}
			break;
		case "into":  // merge-from
			$submission] .= "{{merge log entry|1=$otherpage|2=]|talk=$discuss|type=into}}\n";
			break;
		case "to":  // merge-to
			$submission] .= "{{merge log entry|1=]|2=$otherpage|talk=$discuss|type=into}}\n";
			break;
		case "to ?":  // no partner
			$submission] .= "{{merge log entry|1=]|2=?|talk=$discuss|type=into}}\n";
			break;
		default:
			echo "\n*error* " . $type . "\n";
			break; // use "continue 2"?
	}
	$counter] += 1;
	echo $date . " | " . $counter] . ") " . $key . " ++ " . $title. "\n";
}
echo "\n\n*** Posting Updates ***\n\n";
foreach ($submission as $log => $content) {
	echo "Updating> " . $log. "\n";
	#echo $content . "\n";
	$objwiki->edit("Misplaced Pages:Proposed mergers/Log/" . $log,$content,"Updating list of proposed mergers",false,true);
}
echo "Updating Misplaced Pages:Proposed mergers/Log\n";
$objwiki->edit("Misplaced Pages:Proposed mergers/Log",$mainsubmission,"Updating list of logs",true,true);
echo "done.\n";
echo "\nCategory members skipped: " . $skipped;
echo "\nMaximum pages in a category: " . $maxi;
echo "\nMaximum items on a report page: " . $maxcounter;
echo "\nOther namespaces skipped: " . $othernamespace . "\n";
print_r($others);
echo "\nPattern match failed: " . $matchfailed . "\n";
print_r($nomatches);
echo "\nTemplatesplit failed: " . $templatesplitfailed . "\n";
print_r($nosplits);
echo "\nNo partner: " . $nopartner . "\n";
print_r($nopartners);
echo "\nPages with multiple proposal dates: " . $mismatched_date . "\n";
print_r($mismatched_dates);
echo "\nPages where reasons are given in the template: " . $reason_count . "\n";
print_r($reasons);
echo "\nOther pages which are blank or don't exist: " . $blank . "\n";
print_r($blanks);
echo "\nOther pages which are redirects: " . $redirects;
echo "\n  Maybe merged: " . $maybe . "\n";
print_r($maybes);
echo "\n  Cases differ: " . $diffcase . "\n";
print_r($diffcases);
echo "\nMerge-to targets without reciprocal merge tags: " . $no_reciprocal . "\n";
echo "\nSelf-merges: " . $selfmerge . "\n";
echo "\nNo name specified: " . $noname . "\n";
echo "\nWiktionary merges: " . $wikt . "\n";
echo "\nMission accomplished.\n\n";
?>