This is an old revision of this page, as edited by Wbm1058 (talk | contribs) at 23:12, 10 April 2020 (v 1.91 – fix indents). The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.
Revision as of 23:12, 10 April 2020 by Wbm1058 (talk | contribs) (v 1.91 – fix indents)(diff) ← Previous revision | Latest revision (diff) | Newer revision → (diff)<?php /** proposedmergers.php - To generate lists of proposed page mergers on Misplaced Pages * * (c) 2009 James Hare - http://en.wikipedia.org/User:Harej * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * * Developers (add your self here if you worked on the code): * James Hare - ] - Wrote everything * WBM - ] - March/April 2013 updates **/ ini_set("display_errors", 1); error_reporting(E_ALL ^ E_NOTICE); require_once 'botclasses.php'; // Botclasses.php was written by User:Chris_G and is available under the GNU General Public License include("logininfo.php"); const bot_version = "1.91"; const botuser = "Merge bot"; function templateprocess($processed) { echo " :: " . $processed; $processed = preg_replace("/\s?\|\s?/", "|", $processed); $processed = preg_replace("/\{{2}\s?/", "", $processed); $processed = preg_replace("/\s?\}{2}/", "", $processed); $processed = preg_replace("/\s?=\s?/", "=", $processed); # Redirects to {{Merge}} (14 aliases) $processed = preg_replace("/^(Mergewith|Merge with|Merge_with|MergeVfD|Mergesplit|MergeSplit|Merge)/i", "merge", $processed); $processed = preg_replace("/^(Mergedisputed|MergeDisputed|Merge-disputed|Merge disputed|Merge_disputed)/i", "merge", $processed); $processed = preg_replace("/^(Merge-multiple|Mergemulti|Mergetomultiple-with|Multimerge)/i", "merge", $processed); $processed = preg_replace("/^(Proposed merge|Proposed_merge)/i", "merge", $processed); # {{Merging}} $processed = preg_replace("/^(Mergingsectionto|Merging|Merging to|Merging_to)/i", "merge", $processed); # Redirects to {{Merge from}} (11 aliases) $processed = preg_replace("/^(Merge from|Merge_from|Merge-from|Include|Mergrefrom|Mergefrom-category|MergeFrom|Mergefrom)/i", "mergefrom", $processed); $processed = preg_replace("/^(Mergefrom-multiple|Multiplemergefrom|Mergefrommultiple|Mergefrommulti|Multimergefrom)/i", "mergefrom", $processed); # {{Afd-merge from}}, {{Merging from}} $processed = preg_replace("/^(Afd-merge from|Afd-merge_from|Afd-mergefrom|Afdmergefrom|Merging from|Merging_from|Mergingfrom)/i", "mergefrom", $processed); # Redirects to {{Merge to}} (15 aliases) and {{Merge school}} (1 alias) $processed = preg_replace("/^(Merge to|Merge_to|Merge-to|Mergeinto|MergetoCat|Mergelist|Mergeto-disputed)/i", "mergeto", $processed); $processed = preg_replace("/^(MergePartial|Merge-multiple-to|Merge to article|Merge_to_article)/i", "mergeto", $processed); $processed = preg_replace("/^(Mergeto-multiple|Multiplemergeto|Multiplemergeinto|Merge into|Merge_into|Merge-into|MergeTo|Mergeto|Merge2)/i", "mergeto", $processed); $processed = preg_replace("/^(Merge school|Merge_school|Merge-school)/i", "mergeto", $processed); # {{Afd-merge to}} $processed = preg_replace("/^(Afd-merge to|Afd-merge_to|Afd-mergeto|Afdmergeto|AfD-merge to|AfD-merge_to)/i", "afd-mergeto", $processed); # {{Merge portions from}} // This is really a splitting template, not a merging template $processed = preg_replace("/^(Merge portions from|Merge_portions_from|Move portions from|Move_portions_from)/i", "false-positive", $processed); return $processed; } echo "PHP version: " . PHP_VERSION . "\n"; #phpinfo(); echo "Bot version: " . bot_version . "\n"; $othernamespace = 0; $matchfailed = 0; $templatesplitfailed = 0; $nopartner = 0; $mismatched_date = 0; $mismatched_dates = array(); $reason_count = 0; $reasons = array(); $blank = 0; $blanks = array(); $no_reciprocal = 0; $redirects = 0; $maybe = 0; $maybes = array(); $diffcase = 0; $diffcases = array(); $selfmerge = 0; $noname = 0; $wikt = 0; $mainsubmission = ""; echo "Logging in...\n"; $objwiki = new wikipedia(); $objwiki->http->useragent = '] php wikibot classes'; $objwiki->login($mbuser, $mbpass); echo "...done.\n"; $mergemonths = $objwiki->categorymembers("Category:Articles to be merged"); print_r($mergemonths); for ($a = 0; $a < count($mergemonths); $a++) { // for each month #if (preg_match("/Category:Articles to be merged/", $mergemonths)) { if (preg_match("/Category:Articles to be merged from/", $mergemonths)) { echo "\n__________\n" . $a . ": " . $mergemonths . "\n"; $transcludes = $objwiki->categorymembers($mergemonths); #print_r($transcludes); $monthyear = str_replace("Category:Articles to be merged from ", "", $mergemonths, $repcount); if ($repcount == 0) { $monthyear = str_replace("Category:", "", $mergemonths); } for ($i = 0; $i < count($transcludes); $i++) { // for each page in the category if ($i > $maxi) { $maxi = $i; } if (preg_match("/^((User|Misplaced Pages|File|MediaWiki|Template|Help|Category|Portal)(( |_)talk)?|Talk):/", $transcludes)) { echo "\n\n?? Other namespace: " . $transcludes . "\n\n"; $others = $transcludes; $othernamespace += 1; continue; } $getpagefailed = 0; do { if ($transcludes == $cachepage) { echo "getpage 1: $transcludes|from cache\n"; $contents = $cachecontents; } else { echo "getpage 1: $transcludes|"; $contents = $objwiki->getpage($transcludes); } if ($contents == FALSE) { echo "\n\n?? getpage failed: " . $transcludes . "\n\n"; $getpagefailed += 1; if ($getpagefailed == 5) { die("getpage Error"); } sleep(10); continue; } } while ($contents == FALSE); if (preg_match_all("/\{{2}\s?(merge|afd-merge|merging|move section)*\}{2}/i", $contents, $raw1) == 0) { #echo "contents:\n"; #echo "$contents"; #echo "\n"; unset($contents); echo "\n\n?? Match failed: " . $transcludes . "\n\n"; $nomatches = $transcludes; $matchfailed += 1; continue; } $mergetemplates = count($raw1); if ($mergetemplates > 1) { echo "$mergetemplates merge templates on $transcludes\n"; print_r($raw1); } $takeofflater = 0; for ($j = 0; $j < $mergetemplates; $j++) { // for each merge template on the page $key = $a . "-" . $i . "-" . $j; $pagename = $transcludes; echo $key . ">> " . $pagename; $template = templateprocess($raw1); $ptemplate = $raw1; $pcontents = $contents; unset($contents); $templatesplit = array_map('trim',explode("|", $template)); echo " " . $templatesplit; switch ($templatesplit) { case "mergefrom": $type = "into"; break; case "merge": $type = "with"; break; case "mergeto": $type = "to"; break; case "afd-mergeto": case "false-positive": echo " . "]\n"; unset($pagename); $takeofflater += 1; continue 2; default: echo "*** templatesplitfailed: " . $template . "\n"; print_r($templatesplit); $nosplits = $transcludes; $templatesplitfailed += 1; echo " . "]\n"; unset($pagename); $takeofflater += 1; continue 2; } $otherpage = "{{pagelist|nspace=|"; unset($otherpages); $otherpages = array(); $otherpage_count = 0; for ($para = 1; $para < count($templatesplit); $para++) { if (preg_match("/^date=/i", $templatesplit)) { $date = str_replace("date=", "", $templatesplit); $date = str_replace("_", " ", $date); if ($date != $monthyear) { if (in_array($transcludes, $mismatched_dates) == FALSE) { $mismatched_dates = $transcludes; $mismatched_date += 1; } echo " Another date: " . $date . " not " . $monthyear; echo " . "]\n"; unset($pagename); unset($type); unset($date); unset($discuss); unset($target); unset($section); unset($multiplesections); unset($reason); $takeofflater += 1; continue 2; } } elseif (preg_match("/^(discuss|discussion)=/i", $templatesplit)) { $discuss = str_replace("discuss=", "", $templatesplit); $discuss = str_replace("discussion=", "", $discuss); } elseif (preg_match("/^target=/i", $templatesplit)) { $target = str_replace("target=", "", $templatesplit); if (in_array($target, $pagename)) { echo " . "]\n"; unset($pagename); unset($type); unset($date); unset($discuss); unset($target); unset($section); unset($multiplesections); unset($reason); $takeofflater += 1; continue 2; } } elseif (preg_match("/^section=/i", $templatesplit)) { $section = str_replace("section=", "", $templatesplit); } elseif (preg_match("/^multiplesections=/i", $templatesplit)) { // Template:Mergefrom uses parameter multiplesections; Merge and Mergeto do not $multiplesections = str_replace("multiplesections=", "", $templatesplit); } elseif (preg_match("/^(reason|comment)=/i", $templatesplit)) { $reason = str_replace("reason=", "", $templatesplit); $reasons = $pagename . " …… " . $reason; $reason_count += 1; } else { if (in_array($templatesplit, $pagename)) { echo " . "]\n"; unset($pagename); unset($type); unset($date); unset($discuss); unset($target); unset($section); unset($multiplesections); unset($reason); $takeofflater += 1; continue 2; } else { if (preg_match("/^(1|2|3|4|5|6|7|8|9|10|11|12|13|14|15|16|17|18|19|20)=/i", $templatesplit)) { echo "\nNumbered parameter(s):" . $templatesplit; $templatesplit = str_replace("1=", "", $templatesplit); } $otherpage .= $templatesplit . "|"; $otherpages = $templatesplit; $otherpage_count += 1; } } } $otherpage .= "}}"; echo " -- " . $otherpage . "\n"; #print_r($otherpages); for ($ii = 0; $ii < count($otherpages); $ii++) { $break = 0; $pagecontents = ""; if (str_replace ("_"," ",$otherpages) != $otherpages) { echo "\n$otherpages has underscores\n"; $pos = strpos($ptemplate, $otherpages); $otherpages = str_replace ("_"," ",$otherpages); if ($pos !== false) { $new_template = substr_replace($ptemplate, $otherpages, $pos, strlen($otherpages)); $pcontents = str_replace($ptemplate,$new_template,$pcontents); if ($objwiki->nobots($transcludes,botuser,$pcontents) == true) $objwiki->edit($transcludes,$pcontents,"Replace underscores with spaces in merge template",true,true); } } if (strpos($otherpages,"#") !== false) { $len = strpos($otherpages,"#"); if ($len == 0) $mp = $pagename; else $mp = substr($otherpages,0,$len); // strip section links } else $mp = $otherpages; if ($pagename == $mp) { echo "\n***** PAGE " . $mp . " PROPOSED FOR SELF-MERGE *****\n\n"; $selfmerge += 1; } else if ($mp == "") { echo "\n**** Pagename not specified ****" . $ptemplate . "\n\n"; $noname += 1; $new_template = str_replace ("||","|",$ptemplate); if ($new_template !== $ptemplate) { $pcontents = str_replace($ptemplate,$new_template,$pcontents); if ($objwiki->nobots($transcludes,botuser,$pcontents) == true) $objwiki->edit($transcludes,$pcontents,"Remove redundant pipe in merge template",true,true); } } else if (preg_match("/^(Wiktionary|Wikt):/i",$otherpages)) { echo "\n*** Wiktionary merge: " . $otherpages . " ***\n\n"; $wikt += 1; $otherpage = preg_replace("/nspace=/", "nspace=wikt", $otherpage); } else { while ($pagecontents == "") { if ($break == 5) { echo "\n*** " . $pagename . ": PAGE " . $otherpages . " IS BLANK OR DOES NOT EXIST ***\n\n"; $blanks = $pagename . ":: " . $otherpages; unset($pagecontents); $blank += 1; break; } else { echo "getpage 2: $otherpages|"; $pagecontents = $objwiki->getpage($otherpages); if (preg_match_all("/\{{2}\s?(merge|afd-merge|merging|move section)*\}{2}/i", $pagecontents, $raw2) == 0) { echo "*** Merge tag not found on " . $otherpages. " *2*\n"; $no_reciprocal += 1; } $cachepage = $otherpages; $cachecontents = $pagecontents; $break += 1; } } if (preg_match("/^\#REDIRECT(\s*|:)\{2}/i", $pagecontents, $redirect)) { echo "\n*** " . $pagename. ": PAGE " . $otherpages . " IS A REDIRECT!! ***\n"; echo $pagecontents . "\n\n"; $redirects += 1; preg_match("/(?<=\{2}))/i", $redirect, $target); echo "Target: " . $target . "\n"; $target = ucfirst($target); $target = str_replace ("_"," ",$target); $target = trim($target); if ($target == $pagename) { echo $otherpages . " redirects to " . $pagename . " -- may have been merged\n\n"; $maybes = $pagename . ":: " . $otherpages; $maybe +=1; } else if (strtoupper($otherpages) == strtoupper($target)) { echo "Case difference\n"; $diffcases = $pagename . ":: " . $otherpages . " vs. " . $target; $diffcase += 1; $pos = strpos($ptemplate, $otherpages); if ($pos !== false) { $new_template = substr_replace($ptemplate, $target, $pos, strlen($otherpages)); $pcontents = str_replace($ptemplate,$new_template,$pcontents); if ($objwiki->nobots($transcludes,botuser,$pcontents) == true) $objwiki->edit($transcludes,$pcontents,"Bypass redirect in merge template – DIFFCAPS",true,true); } } } } } if ($otherpage == "{{pagelist|nspace=|}}") { $nopartners = $transcludes; $nopartner += 1; if ($templatesplit == "mergeto") $type = "to ?"; } else if (substr($otherpage,0,22) == "{{pagelist|nspace=wikt") { #echo "\nwikt:\n"; } else if ($templatesplit == "mergeto") { $break = 0; $pagecontents = ""; while ($pagecontents == "") { if ($break == 5) { echo "\n*** " . $pagename . ": PAGE " . $otherpages . " is BLANK or DOES NOT EXIST ***\n\n"; unset($pagecontents); break; } else { if ($otherpages == $cachepage) { echo "getpage 3: $otherpages|from cache\n"; $pagecontents = $cachecontents; } else { echo "getpage 3: $otherpages|"; $pagecontents = $objwiki->getpage($otherpages); if (preg_match_all("/\{{2}\s?(merge|afd-merge|merging|move section)*\}{2}/i", $pagecontents, $raw3) == 0) { echo "*** Merge tag not found on " . $otherpages. " *3*\n"; $no_reciprocal += 1; } } $break += 1; } } if (preg_match_all("/\{{2}\s?(merge|afd-merge|merging|move section)*\}{2}/i", $pagecontents, $raw) !== 0) { $reciprocal = FALSE; for ($jraw = 0; $jraw < count($raw); $jraw++) { echo "(" . $jraw . ") " . $raw; $pos = stripos($raw,$pagename); if ($pos === false) { echo " ** " . $pagename . " not found!"; } else { echo " ** found " . $pagename; $reciprocal = TRUE; } } if ($reciprocal == FALSE) { echo "\n*** Reciprocal merge tag not found on " . $otherpages. " ***\n"; $no_reciprocal += 1; } else { echo "\n"; echo "mergeto target: " . $otherpage . " ◆ " . $otherpages; echo " . "]\n"; unset($pagename); unset($type); $takeofflater += 1; continue; } } } if ($discuss == "") { if (preg_match("/^(User|Misplaced Pages|Image|File|MediaWiki|Template|Help|Category|Portal):/i",$transcludes,$m)) { $discuss = str_replace($m,$m.' talk',$transcludes); } else { $discuss = "Talk:" . $transcludes; } } } } $j = $j - $takeofflater; if (count($transcludes) > 0) { $mainsubmission .= "* ]\n"; } else { echo "\n*** Category is empty ***\n\n"; } } else { echo "\n" . $a . ": Skipped: " . $mergemonths . "\n"; $skipped += 1; } } if (count($pagename) < 1) { echo "No pagenames! Exiting."; die(); } echo "\n*** Preparing Updates ***\n\n"; print_r($pagename); foreach ($pagename as $key => $title) { if ($counter] > $maxcounter) { $maxcounter = $counter]; } switch ($type) { case "with": // regular merge if ($target != "") { $submission] .= "{{merge log entry|1=]|2=$otherpage|3=]]|talk=$discuss|type=with}}\n"; } else { $submission] .= "{{merge log entry|1=]|2=$otherpage|talk=$discuss|type=with}}\n"; } break; case "into": // merge-from $submission] .= "{{merge log entry|1=$otherpage|2=]|talk=$discuss|type=into}}\n"; break; case "to": // merge-to $submission] .= "{{merge log entry|1=]|2=$otherpage|talk=$discuss|type=into}}\n"; break; case "to ?": // no partner $submission] .= "{{merge log entry|1=]|2=?|talk=$discuss|type=into}}\n"; break; default: echo "\n*error* " . $type . "\n"; break; // use "continue 2"? } $counter] += 1; echo $date . " | " . $counter] . ") " . $key . " ++ " . $title. "\n"; } echo "\n\n*** Posting Updates ***\n\n"; foreach ($submission as $log => $content) { echo "Updating> " . $log. "\n"; #echo $content . "\n"; $objwiki->edit("Misplaced Pages:Proposed mergers/Log/" . $log,$content,"Updating list of proposed mergers",false,true); } echo "Updating Misplaced Pages:Proposed mergers/Log\n"; $objwiki->edit("Misplaced Pages:Proposed mergers/Log",$mainsubmission,"Updating list of logs",true,true); echo "done.\n"; echo "\nCategory members skipped: " . $skipped; echo "\nMaximum pages in a category: " . $maxi; echo "\nMaximum items on a report page: " . $maxcounter; echo "\nOther namespaces skipped: " . $othernamespace . "\n"; print_r($others); echo "\nPattern match failed: " . $matchfailed . "\n"; print_r($nomatches); echo "\nTemplatesplit failed: " . $templatesplitfailed . "\n"; print_r($nosplits); echo "\nNo partner: " . $nopartner . "\n"; print_r($nopartners); echo "\nPages with multiple proposal dates: " . $mismatched_date . "\n"; print_r($mismatched_dates); echo "\nPages where reasons are given in the template: " . $reason_count . "\n"; print_r($reasons); echo "\nOther pages which are blank or don't exist: " . $blank . "\n"; print_r($blanks); echo "\nOther pages which are redirects: " . $redirects; echo "\n Maybe merged: " . $maybe . "\n"; print_r($maybes); echo "\n Cases differ: " . $diffcase . "\n"; print_r($diffcases); echo "\nMerge-to targets without reciprocal merge tags: " . $no_reciprocal . "\n"; echo "\nSelf-merges: " . $selfmerge . "\n"; echo "\nNo name specified: " . $noname . "\n"; echo "\nWiktionary merges: " . $wikt . "\n"; echo "\nMission accomplished.\n\n"; ?>