用户:Xcnbot/2
外观
#!/usr/bin/perl # catnav bot by WikiPedia:User:下一次登录 # Disclaimer: No warranty granted, use at your own risk! # call requirements use Getopt::Std; use LWP::Simple; use LWP::UserAgent; use HTTP::Request; use HTTP::Response; use HTTP::Cookies; #subroutine #parameters local $username="xcnbot"; #input your username here, only English names are tested. local $password="******"; #input your password here local $WIKI_PATH="zh.wikipedia.org"; local $WIKI_PAGE; ### Login to wiki # Set up connection data my $browser=LWP::UserAgent->new(); my @ns_headers = ( 'User-Agent' => 'Xcnbot1.0 by 下一次登录', #Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.7) Gecko/20041107 Firefox/1.0', 'Accept' => 'image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, image/png, */*', 'Accept-Charset' => 'iso-8859-1,*,utf-8', 'Accept-Language' => 'en-US', ); # Hold cookies $browser->cookie_jar( {} ); {# Login # Make login request $response=$browser->post("http://".$WIKI_PATH."/w/index.php?title=Special:Userlogin&action=submitlogin", @ns_headers, Content=>[wpName=>$username,wpPassword=>$password,wpRemember=>"1",wpLoginAttempt=>"Log in"]); # After logging in, we should be redirected to another page. # If we aren't, something is wrong. if($response->code!=302) { #cannot login print "We weren't able to login.\n\n"; close(DEBUG); exit 1; } print "Logged in \n"; } # Trivial variables local $content; #target page content local $editcontent; #target edit content local $neweditcontent; #modified target edit content local $content1; #parent category content local $editToken; local $editTime; local $beforeCatnav; local @Catnav; local $afterCatnav; local $bError; local $vLine; local $nDepth; local $nCatnav; local @vCat; local $bChange; # Set parameters my $vNameU="%E5%BE%8C%E7%87%95%E5%B9%B4%E8%99%9F"; # get a list of categories from special:allpages my $article_count=0; #number of articles in allpages my @article_name; #the characters of the article names for log my @article_unicode; #the unicode article names for connection my $last_string; #the unicode of the last article in the last run (init="%21") my $article_line; #one article line in allpage content my $article_ID; open LOG1, ">>log.txt"; while(1) { #process #read last_string.txt and start allpages from that article open FILE, "<last_string.txt"; $last_string=""; while (<FILE>) { $last_string.=$_; } if(1) { #process allpages contents and make a list #go to allpages and get the contents $URL="http://".$WIKI_PATH."/w/index.php?title=Special:Allpages&from=".$last_string."&namespace=14"; $response=$browser->get($URL, @ns_headers); $filename1=$response->as_string; $article_count=0; #reset the article count { #truncate the contents #find the start point and extract the content $filestartstr="<table style=\"background: inherit;\" border=\"0\" width=\"100%\">"; $filestart=index($filename1, $filestartstr); $filename1=substr($filename1, $filestart+60); #find the end point and cut $fileendstr="<div class=\"printfooter\">"; $fileend=index($filename1, $fileendstr); $filename1=substr($filename1, 0, $fileend); } #truncate the contents { #find all the cat names #extract a line (between<td> </td>)and leave rest to $filename1 $filestartstr="<td>"; $fileendstr="</td>"; $filestart=index($filename1, $filestartstr)+4; $fileend=index($filename1, $fileendstr); $article_line=substr($filename1, $filestart, $fileend-$filestart); $filename1=substr($filename1, $fileend+5); while($article_count<=202) { #if there is article names in allpage contents #process $article_line #extract the unicode name $filestartstr="<a href=\"/wiki/"; $filestart=index($article_line, $filestartstr)+15+9; $article_line=substr($article_line, $filestart); $fileendstr="\""; $fileend=index($article_line, $fileendstr); $article_unicode[$article_count]=substr($article_line, 0, $fileend); $article_line=substr($article_line, $fileend+1); #extract the character name $filestartstr="title=\""; $filestart=index($article_line, $filestartstr)+7; $article_line=substr($article_line, $filestart); $fileendstr="\""; $fileend=index($article_line, $fileendstr); $article_name[$article_count]=substr($article_line, 0, $fileend); $article_count+=1; #extract a line (between<td> </td>)and leave rest to $filestartstr="<td>"; $fileendstr="</td>"; $filestart=index($filename1, $filestartstr)+4; $fileend=index($filename1, $fileendstr); $article_line=substr($filename1, $filestart, $fileend-$filestart); $filename1=substr($filename1, $fileend+5); } #while } #find all the cat names } #process allpages contents and make a list $article_ID=0; while($article_ID<$article_count) { #go through all the pages and process $vNameU=$article_unicode[$article_ID]; # Connect to root cat $WIKI_PAGE=$vNameU; $URL="http://".$WIKI_PATH."/wiki/Category:".$WIKI_PAGE; $response=$browser->get($URL, @ns_headers); $content=$response->as_string; print "\n"; print $URL; print LOG1 "\n"; print LOG1 $URL; $URL="http://".$WIKI_PATH."/w/index.php?title=Category:".$WIKI_PAGE."&action=edit"; $response=$browser->get($URL, @ns_headers); $editcontent=$response->as_string; # Get EditToken ($editToken) = ( $editcontent =~ m/value\=\"([0-9a-f\\]*)\" name\=\"wpEditToken\"/ ); ($editTime) = ( $editcontent =~ m/value\=\"([0-9a-f]*)\" name\=\"wpEdittime\"/ ); $filestartstr="<textarea tabindex='1' accesskey=\",\" name=\"wpTextbox1\" id=\"wpTextbox1\" rows='25'"; $fileendstr="</textarea>"; $filestart= index($editcontent, $filestartstr); $filestart+=92; $fileend= index($editcontent, $fileendstr); $editcontent=substr($editcontent, $filestart, $fileend-$filestart); $afterCatnav=$editcontent; print "\nC "; print LOG1 "\nC "; $bChange=0; #organize edit content for catnav $filestartstr="{{catnav"; $neweditcontent=$editcontent; while(index($neweditcontent, $filestartstr)>=0) { substr($neweditcontent, index($neweditcontent, $filestartstr), length($filestartstr) ) ="{{Catnav"; } #find all {{catnav $filestartstr="{{Catnav"; $nCatnav=0; while(index($neweditcontent, $filestartstr)>=0) { #find all {{catnav $filestartstr="{{Catnav"; $neweditcontent=substr($neweditcontent, index($neweditcontent, $filestartstr)); $filestartstr="}}"; $Catnav[$nCatnav]=substr($neweditcontent, 0, index($neweditcontent, $filestartstr)+2); $neweditcontent=substr($neweditcontent, index($Catnav, $filestartstr)+2); $nCatnav+=1; $filestartstr="{{Catnav"; } #find all {{catnav print $nCatnav; print "cn(s) "; print LOG1 $nCatnav; print LOG1 "cn(s) "; # process catnav $nCatnav=0; $filestartstr="title=\"catnav\" style="; while(index($content, $filestartstr)>=0) { # process catnav {# process catnav #get a line $filestartstr="title=\"catnav\" style="; $filestart=index($content, $filestartstr); $content=substr($content, $filestart+21); $fileendstr="</div>"; $fileend=index($content, $fileendstr); $vLine=substr($content, 0, $fileend); $content=substr($content, $fileend+5); #get all links $filestartstr="..."; if(index($vLine, $filestartstr)>=0) { $filestart=index($vLine, $filestartstr); $vLine=substr($vLine, $filestart+3); } $nDepth=0; $filestartstr="<a href=\"/wiki/Category:"; while(index($vLine, $filestartstr)>=0) { #while there is a cat link $filestartstr="<a href=\"/wiki/Category:"; $filestart=index($vLine, $filestartstr)+24; $vLine=substr($vLine, $filestart); $filestartstr="\" title=\"Category:"; $filestart=index($vLine, $filestartstr); $vCat[$nDepth]=substr($vLine, 0, $filestart); $vLine=substr($vLine, $filestart+18); $nDepth+=1; } #while there is a cat link }# process catnav #check parent cats $bError=0; local $Page; $Page=$vNameU; while($nDepth>0) { $nDepth=$nDepth-1; if($bError==0) { $WIKI_PAGE=$Page; $URL="http://".$WIKI_PATH."/wiki/Category:".$WIKI_PAGE; $response=$browser->get($URL, @ns_headers); $content1=$response->as_string; $filestartstr="<div id=\"catlinks\"><p class='catlinks'><a href=\"/wiki/Special:Categories\" title="; $content1=substr($content1, index($content1, $filestartstr)); $filestartstr="</span></p></div>"; $content1=substr($content1, 0, index($content1, $filestartstr)); $Page=$vCat[$nDepth]; my $Page1; $Page1="Category:".$vCat[$nDepth]."\" title"; if(index($content1, $Page1)<0) { $bError=1; print "Err "; print LOG1 "Err "; } } } #delete catnav if($bError>0) { #delete catnav my $cnstring="{{catnav|"; substr($editcontent, index($editcontent, $cnstring), length($cnstring) ) ="{{Catnav|"; substr($editcontent, index($editcontent, $Catnav[$nCatnav]), length($Catnav[$nCatnav])+1 ) =""; $bChange=1; } #delete catnav $filestartstr="title=\"catnav\" style="; $nCatnav+=1; } # process catnav #update if($bChange>0) { { #check for illegal characters my $special_char; $special_char="""; #" while(index($neweditcontent, $special_char)>=0) { substr($neweditcontent, index($neweditcontent, $special_char), length($special_char) ) ="\""; } $special_char="<"; #< while(index($neweditcontent, $special_char)>=0) { substr($neweditcontent, index($neweditcontent, $special_char), length($special_char) ) ="<"; } $special_char=">"; #> while(index($neweditcontent, $special_char)>=0) { substr($neweditcontent, index($neweditcontent, $special_char), length($special_char) ) =">"; } $special_char="&"; #& while(index($neweditcontent, $special_char)>=0) { substr($neweditcontent, index($neweditcontent, $special_char), length($special_char) ) ="&"; } $special_char=" "; # while(index($neweditcontent, $special_char)>=0) { substr($neweditcontent, index($neweditcontent, $special_char), length($special_char) ) =" "; } } #check for illegal characters $WIKI_PAGE=$vNameU; $URL="http://".$WIKI_PATH."/w/index.php?title=Category:".$WIKI_PAGE."&action=edit"; if(1) {#Update $response=$browser -> post($URL, @ns_headers, Content_Type=>'form-data',Content=> [ wpTextbox1 => $editcontent, wpSummary => "Testing: Wrong catnav deleted.", wpSave => "Save page", wpSection => "", wpEdittime => $editTime, wpEditToken => $editToken, wpMinoredit => "1", ]); print "Changed."; print LOG1 "Changed."; } } $article_ID+=1; } #while ID<count if(1) { #record last string.txt open INPUT, ">last_string.txt"; print INPUT $article_unicode[$article_count-1]; close INPUT; } } #while whole