用户:Xcnbot/1
外观
#!/usr/bin/perl # catnav bot by WikiPedia:User:下一次登录 # Sisclaimer: No warranty granted, use at your own risk! # call requirements use Getopt::Std; use LWP::Simple; use LWP::UserAgent; use HTTP::Request; use HTTP::Response; use HTTP::Cookies; #subroutine #parameters local $username="xcnbot"; #input your username here, only English names are tested. local $password="******"; #input your password here local $WIKI_PATH="zh.wikipedia.org"; local $WIKI_PAGE; ### Login to wiki # Set up connection data my $browser=LWP::UserAgent->new(); my @ns_headers = ( 'User-Agent' => 'Xcnbot 1.0 by 下一次登录', #Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.7) Gecko/20041107 Firefox/1.0', 'Accept' => 'image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, image/png, */*', 'Accept-Charset' => 'iso-8859-1,*,utf-8', 'Accept-Language' => 'en-US', ); # Hold cookies $browser->cookie_jar( {} ); {# Login # Make login request $response=$browser->post("http://".$WIKI_PATH."/w/index.php?title=Special:Userlogin&action=submitlogin", @ns_headers, Content=>[wpName=>$username,wpPassword=>$password,wpRemember=>"1",wpLoginAttempt=>"Log in"]); # After logging in, we should be redirected to another page. # If we aren't, something is wrong. if($response->code!=302) { #cannot login print "We weren't able to login.\n\n"; close(DEBUG); exit 1; } } print "Logged in \n"; # Trivial variables local $content; local $content1; local $content2; local @cnTree; local $editToken; local $editTime; # Set parameters local $DEPTH=5; #predefined DEPTH local $cChange=0; #Counter of CHANGEs my $vName; #Variable: current cat NAME my $vNameU="%E9%A0%81%E9%9D%A2%E5%88%86%E9%A1%9E"; #vName: Unicode, starting from “页面分类” my @aTree; #Array: current cat-TREE my $vDepth=0; #Variable: current cat-tree DEPTH my @aChild; #Array: current cat's CHILDREN my @aChildU; #aChildren: Unicode # Connect to root cat $WIKI_PAGE=$vNameU; $URL="http://".$WIKI_PATH."/wiki/Category:".$WIKI_PAGE; $response=$browser->get($URL, @ns_headers); $content=$response->as_string; { # extract vName $filestartstr="<h1 class=\"firstHeading\">Category:"; $filestart=index($content, $filestartstr); $content1=substr($content, $filestart+34); $fileendstr="</h1>"; $fileend=index($content1, $fileendstr); $vName=substr($content1, 0, $fileend); } # extract vName print "got vName\n"; # Change @aTree $aTree[$vDepth]=$vName; $vDepth+=1; if(1) { #debug open INPUT, ">debug1.txt"; print INPUT $URL; print INPUT "\n\n before digui\n\n"; close INPUT; } #debug if(1) { #log open INPUT, ">cnlog.txt"; print INPUT "program start\n"; close INPUT; } #log &gotocat($vName, $vNameU, $vDepth, @aTree); sub gotocat { #递归程序 my($vName, $vNameU, $vDepth, @aTree) = @_; if(1) { #debug log open INPUT, ">diguilog.txt"; print INPUT "Name="; print INPUT $vName; print INPUT " UName="; print INPUT $vNameU; print INPUT " vDepth="; print INPUT " aTree="; print INPUT $vDepth; for($i=0;$i<$vDepth;$i+=1) { print INPUT $aTree[$i]; print INPUT ">"; } print INPUT "\n"; close INPUT; } #debug log my @aChild; my @aChildU; my $nChild=0; #Child cat number if($vDepth>$DEPTH) { #depth exceeds $aTree[1]="..."; for($i=2;$i<$DEPTH;$i+=1) { $aTree[$i]=$aTree[$i+1]; } $vDepth=$DEPTH; if(1) { #debug log open INPUT, ">>diguilog.txt"; print INPUT "Depth exceeds\n"; close INPUT; } #debug log } print "in digui. vDepth="; print $vDepth; print "\n"; if(1) { # list and sort all children ## list print "list "; $WIKI_PAGE=$vNameU; $URL="http://".$WIKI_PATH."/wiki/Category:".$WIKI_PAGE; $response=$browser->get($URL, @ns_headers); $content=$response->as_string; $content1=$content; $filestartstr="<a class=\"CategoryTreeLabel CategoryTreeLabelNs14 CategoryTreeLabelCategory\" href=\"/wiki/Category:"; while(index($content1, $filestartstr)>=0) { #while there is more children print "c "; { # extract a Child cat line $filestartstr="<a class=\"CategoryTreeLabel CategoryTreeLabelNs14 CategoryTreeLabelCategory\" href=\"/wiki/Category:"; $filestart=index($content1, $filestartstr); $content1=substr($content1, $filestart+99); $fileendstr="</a></div>"; $fileend=index($content1, $fileendstr); $content2=substr($content1, 0, $fileend); $content1=substr($content1, $fileend+10); } # extract a Child cat line { # cat name and Uname $fileendstr="\">"; $fileend=index($content2, $fileendstr); $aChildU[$nChild]=substr($content2, 0, $fileend); $aChild[$nChild]=substr($content2, $fileend+2); } # cat name and Uname $nChild+=1; } #while there is more children if(0) { ## check if there is "next 200" $filestartstr="<br style=\"clear:both;\"/>"; $filestart=index($content, $filestartstr); $content1=substr($content, $filestart+25); $fileendstr="<h2>"; $fileend=index($content1, $fileendstr); $content1=substr($content1, 0, $fileend); $filestartstr="200</a>)<div id"; while(index($content1, $filestartstr)>=0) { # while there is "next 200" print "200 "; { ## extract url $filestartstr="200) (<a href=\""; $filestart=index($content1, $filestartstr); $content1=substr($content1, $filestart+15); $fileendstr="\" title="; $fileend=index($content1, $fileendstr); $WIKI_PAGE=substr($content1, 0, $fileend); } ## extract url $URL="http://".$WIKI_PATH.$WIKI_PAGE; if(1) { #log open INPUT, ">>cnlog.txt"; print INPUT $URL; print INPUT "\n"; close INPUT; } #log $response=$browser->get($URL, @ns_headers); $content=$response->as_string; $content1=$content; $filestartstr="<a class=\"CategoryTreeLabel CategoryTreeLabelNs14 CategoryTreeLabelCategory\" href=\"/wiki/Category:"; while(index($content1, $filestartstr)>=0) { #while there is more children print "c "; { # extract a Child cat line $filestartstr="<a class=\"CategoryTreeLabel CategoryTreeLabelNs14 CategoryTreeLabelCategory\" href=\"/wiki/Category:"; $filestart=index($content1, $filestartstr); $content1=substr($content1, $filestart+99); $fileendstr="</a></div>"; $fileend=index($content1, $fileendstr); $content2=substr($content1, 0, $fileend); $content1=substr($content1, $fileend+10); } # extract a Child cat line { # cat name and Uname $fileendstr="\">"; $fileend=index($content2, $fileendstr); $aChildU[$nChild]=substr($content2, 0, $fileend); $aChild[$nChild]=substr($content2, $fileend+2); } # cat name and Uname $nChild+=1; } #while there is more children # next 200? $filestartstr="<br style=\"clear:both;\"/>"; $filestart=index($content, $filestartstr); $content1=substr($content, $filestart+25); $fileendstr="<h2>"; $fileend=index($content1, $fileendstr); $content1=substr($content1, 0, $fileend); $filestartstr="200</a>)<div id"; } # while there is "next 200" } ## check if there is "next 200" ## sort print "sort "; my $i,$j; my $swap; for($i=0;$i<$nChild-1;$i+=1) { for($j=$i+1;$j<$nChild;$j+=1) { if($aChild[$i] gt $aChild[$j]) { $swap=$aChild[$i]; $aChild[$i]=$aChild[$j]; $aChild[$j]=$swap; $swap=$aChildU[$i]; $aChildU[$i]=$aChildU[$j]; $aChildU[$j]=$swap; } } } print "entered current page. nChild="; print $nChild; print "\n"; if(0) { #debug open INPUT, ">>debug1.txt"; my $i; for($i=0;$i<$nChild;$i+=1) { print INPUT $aChild[$i]; print INPUT " "; print INPUT $aChildU[$i]; print INPUT "\n"; } print INPUT "\n\n\n"; close INPUT; } #debug } # list and sort all children if(1) { #log open INPUT, ">>cnlog.txt"; print INPUT "Digui: Cat="; print INPUT $vName; print INPUT ", Tree="; for($i=0;$i<$vDepth;$i+=1) { print INPUT $aTree[$i]; print INPUT ">"; } print INPUT "\n"; print INPUT " "; print INPUT $nChild; print INPUT " children: "; for($i=0;$i<$nChild;$i+=1) { print INPUT $aChild[$i]; print INPUT " "; } print INPUT "\n"; close INPUT; } #log my $i; for($i=0;$i<$nChild;$i+=1) { # check and add catnav to all children my $cnDepth; my $bFound=0; #found same tree? my $bSame=1; #cat name is the same? my $cnFound=0; # get edit content $WIKI_PAGE=$aChildU[$i]; #$WIKI_PAGE="%E4%BA%9A%E6%B4%B2%E5%9B%BD%E5%AE%B6"; $URL="http://".$WIKI_PATH."/w/index.php?title=Category:".$WIKI_PAGE."&action=edit"; $response=$browser->get($URL, @ns_headers); $content=$response->as_string; # Get EditToken ($editToken) = ( $content =~ m/value\=\"([0-9a-f\\]*)\" name\=\"wpEditToken\"/ ); ($editTime) = ( $content =~ m/value\=\"([0-9a-f]*)\" name\=\"wpEdittime\"/ ); $filestartstr="<textarea tabindex='1' accesskey=\",\" name=\"wpTextbox1\" id=\"wpTextbox1\" rows='25'"; $fileendstr="</textarea>"; $filestart= index($content, $filestartstr); $filestart+=92; $fileend= index($content, $fileendstr); $content=substr($content, $filestart, $fileend-$filestart); # check if there is Catnav $filestartstr="{{Catnav|"; $content1=$content; while(index($content1, $filestartstr)>=0) { #while there is still Catnav $cnFound=1; $cnDepth=0; $filestart=index($content1, $filestartstr); $content1=substr($content1, $filestart+9); $fileendstr="}}"; $fileend=index($content1, $fileendstr); $content2=substr($content1, 0, $fileend); $content1=substr($content1, $fileend+1); $filestartstr="|"; while(index($content2, $filestartstr)>=0) { #while there is | $filestart=index($content2, $filestartstr); $cnTree[$cnDepth]=substr($content2, 0, $filestart); $content2=substr($content2, $filestart+1); $cnDepth+=1; } #while there is | #the last one $cnTree[$cnDepth]=$content2; $cnDepth+=1; if(0) { #debug open INPUT, ">>debug1.txt"; my $j; for($j=0;$j<$cnDepth;$j+=1) { # check and add catnav to all children { print INPUT $cnTree[$j]; print INPUT " "; } print INPUT "\n\n\n"; } # check and add catnav to all children } #debug $bSame=1; if($vDepth == $cnDepth) { for($j=0;$j<$cnDepth;$j+=1) { if($cnTree[$j] ne $aTree[$j]) { $bSame=0;} } } else { $bSame=0; } if($bSame) { $bFound=1; } $filestartstr="{{Catnav|"; } #while there is still Catnav print "entered child page. bFound="; print $bFound; print "\n"; if($bFound<1) { # add new tree my $treecontent; $treecontent="{{Catnav"; for($j=0;$j<$vDepth;$j+=1) { $treecontent=$treecontent."|".$aTree[$j]; } if($cnFound) { $treecontent=$treecontent."}}\n"; } else { $treecontent=$treecontent."}}\n\n"; } $content=$treecontent.$content; $WIKI_PAGE=$aChildU[$i]; $URL="http://".$WIKI_PATH."/w/index.php?title=Category:".$WIKI_PAGE."&action=edit"; #check for illegal characters my $special_char; $special_char="""; #" while(index($content, $special_char)>=0) { substr($content, index($content, $special_char), length($special_char) ) ="\""; } $special_char="<"; #< while(index($content, $special_char)>=0) { substr($content, index($content, $special_char), length($special_char) ) ="<"; } $special_char=">"; #> while(index($content, $special_char)>=0) { substr($content, index($content, $special_char), length($special_char) ) =">"; } $special_char="&"; #& while(index($content, $special_char)>=0) { substr($content, index($content, $special_char), length($special_char) ) ="&"; } if(1) {#Update $response=$browser -> post($URL, @ns_headers, Content_Type=>'form-data',Content=> [ wpTextbox1 => $content, wpSummary => "[[User:xcnbot|xcnbot]] testing", wpSave => "Save page", wpSection => "", wpEdittime => $editTime, wpEditToken => $editToken, wpMinoredit => "1", ]); print "Change made\n"; if(1) { #log open INPUT, ">>cnlog.txt"; print INPUT " Child:"; print INPUT $aChild[$i]; print INPUT " change made.\n"; close INPUT; } #log } #Update } } # check and add catnav to all children # call sub-gotocat on every child my $ch; for($ch=0;$ch<$nChild;$ch+=1) { # Change @aTree if(1) { #log open INPUT, ">>cnlog.txt"; print INPUT "Go into: Child="; print INPUT $aChild[$ch]; print INPUT "\n"; close INPUT; } #log $aTree[$vDepth]=$aChild[$ch]; &gotocat($aChild[$ch], $aChildU[$ch], $vDepth+1, @aTree); if(1) { #log open INPUT, ">>cnlog.txt"; print INPUT "Jump outto: Parent="; print INPUT $vName; print INPUT "\n"; close INPUT; } #log } } #递归程序