用戶:Antigng-bot/replace

#include <stdio.h>
#include <string.h>
#include <process.h>
#include <windows.h>
#include "mem.h"
#include "network.h"
#include "convert.h"
#include "auth.h"
struct problemlist
{
	unsigned long pageid;
	struct problemlist *next;
};
struct neditargv
{
	const char *id;
	HTTP newtext;
	const char *time;
};
int threadc[1024];
SRWLOCK rwcs;
CRITICAL_SECTION tcs;
CRITICAL_SECTION hcs;
int threadnumber=0;
unsigned char action=0;
struct problemlist *pbl=NULL;
int *nextm=NULL;
int pagenum=0;
const char *username=NULL;
const char *passwd=NULL;
const char *rpl=NULL;
char *replacestring=NULL;
int replacelength=0;
int matchlength=0;
int maxquerytimes=0;
int hasquerytimeslimit=0;
int hideredirect=0;
char *matchstring=NULL;
char *matchstring_e=NULL;
const char *matchstring_ori=NULL;
const char *replacestring_ori=NULL;
const char *ns=NULL;
int searchfortransclude=0;
const char *templatename=NULL;
int dosort=0;
int maxthread=256;
static void displayerr(unsigned int code)
{
	if(!(code&0x1))
	{
		printf(
			"\tNo username.\n"
			"\t\tA valid username must be specified via \"-u\".\n"
			);
	}
	if(code&0x2)
	{
		printf(
			"\tUsername too long.\n"
			"\t\tThe username should not be longer than 64 bytes.\n"
			);
	}
	if(!(code&0x4))
	{
		printf(
			"\tNo search string.\n"
			"\t\tA search string should be set via \"-s\".\n"
			);
	}
	if(code&0x8)
	{
		printf(
			"\tPassword too long.\n"
			"\t\tThe password should not be longer than 64 bytes.\n"
			);
	}
	if(!(code&0x10))
	{
		printf(
			"\tNo password.\n"
			"\t\tA valid password must be specified via \"-p\".\n"
			);
	}
	if(code&0x40)
	{
		printf(
			"\tns string too long.\n"
			"\t\tThe ns string should not be longer than 32 bytes.\n"
			);
	}
	return;
}
static int parsearg(int argc,const char *argv[])
{
	int cur_arg=0;
	unsigned int err=0;
	for(cur_arg=1;cur_arg<argc;cur_arg++)
	{
		if(argv[cur_arg][0]=='-'&&((argv[cur_arg+1]&&argv[cur_arg+1][0]!='-')||(argv[cur_arg][1]=='h')||(argv[cur_arg][1]=='S')))
		{
			switch(argv[cur_arg][1])
			{
			case 'u':
				username=G2U(argv[cur_arg+1]);
				if(strlen(username)>64)
				{
					err|=0x2;
				}
				else
				{
					err|=0x1;
				}
				cur_arg++;
				break;
			case 'p':
				passwd=argv[cur_arg+1];
				if(strlen(passwd)>64)
				{
					err|=0x8;
				}
				else
				{
					err|=0x10;
				}
				cur_arg++;
				break;
			case 's':
				matchstring_ori=argv[cur_arg+1];
				matchstring=G2U(matchstring_ori);
				matchstring_e=(char *)s_malloc(strlen(matchstring)*4+1);
				URLEncode(matchstring,strlen(matchstring),matchstring_e,strlen(matchstring)*4+1);
				matchlength=strlen(matchstring);
				err|=0x4;
				cur_arg++;
				break;
			case 'n':
				ns=argv[cur_arg+1];
				if(strlen(ns)>32)
				{
					err|=0x40;
				}
				cur_arg++;
				break;
			case 'T':
				maxthread=atoi(argv[cur_arg+1]);
				if(maxthread<1||maxthread>1024) maxthread=32;
				cur_arg++;
				break;
			case 'r':
				replacestring_ori=argv[cur_arg+1];
				rpl=G2U(replacestring_ori);
				replacestring=(char *)s_malloc(strlen(rpl)*4+1);
				URLEncode(rpl,strlen(rpl),replacestring,4*strlen(rpl)+1);
				replacelength=strlen(replacestring);
				cur_arg++;
				break;
			case 'm':
				maxquerytimes=atoi(argv[cur_arg+1]);
				if(maxquerytimes>0) hasquerytimeslimit=1;
				cur_arg++;
				break;
			case 'h':
				hideredirect=1;
				break;
			case 'S':
				dosort=1;
				break;
			case 't':
				searchfortransclude=1;
				templatename=G2U(argv[cur_arg+1]);
				cur_arg++;
				break;
			}
		}
	}
	if(!ns) ns="0";
	if(0x1+0x4+0x10==err) return 0;
	else
	{
		printf("Error code 0x%x:\n",err);
		displayerr(err);
		return 1;
	}
}
static int smartedit(struct neditargv *p,const char *reason, const char *tags)
{
	HTTP res;
	char line[2048],url[4096]={0};
	char reason_e[512];
	char tags_e[256];
	char aft[1024],statusline[128];
	char cur_token[128];
	char err_type[8192];
	char *erm[]={"code"};
	char *erv[1];
	int find=0;
	int has_err=0,token_err=0,filtered=0;
	int retry=0;
	erv[0]=err_type;
	if(reason) URLEncode(reason,strlen(reason),reason_e,510);
	if(tags) URLEncode(tags,strlen(tags),tags_e,254);
	sprintf(url,"/w/api.php?action=edit&pageid=%s&basetimestamp=%s",p->id,p->time);
	find=sprintf(aft,"%s%s&summary=%s&bot=1&minor=1&nocreate=1&format=xml&token=",tags?"&tags=":"",tags?tags_e:"",reason_e);
	if(find<0) return -4;
	do
	{
		res=hopen();;
		while(1)
		{
			AcquireSRWLockShared(&rwcs);
			if(hastoken) break;
			else ReleaseSRWLockShared(&rwcs);
			Sleep(100);
		}		
		aft[find]=0;
		strcat(aft,token);
		ReleaseSRWLockShared(&rwcs);
		hrewind(p->newtext);
		if(smartpost(url,p->newtext,aft,8888,1,res))
		{
			hclose(res);
			return -1;
		}
		hgets(statusline,127,res);
		if(!strstr(statusline," 200"))
		{
			hclose(res);
			return -2;
		}
		skipresponseheader(res);
		filtered=token_err=has_err=0;
		while(!heof(res))
		{
			if(xmlparsetag(res,line)==XML_HAS_VALUE)
			{
				if(!strcmp(line,"error"))
				{
					has_err=1;
					xmlparsearg(res,1,erm,erv);
					if((!strcmp(err_type,"notoken"))||(!strcmp(err_type,"badtoken")))
					{
						token_err=1;
					}
					else if(!strcmp(err_type,"abusefilter-warning"))
					{
						filtered=1;
					}
					break;
				}
			}
		}
		if(token_err)
		{
			AcquireSRWLockExclusive(&rwcs);
			if(!strcmp(aft+find,cur_token)) hastoken=0;
			ReleaseSRWLockExclusive(&rwcs);
		}
		retry++;
		hclose(res);
	}while((token_err||filtered)&&(retry<3));
	if(has_err) return -3;
	else return 0;
}
char *replace_reason=NULL;
static int kmpini()
{
	int i,j;
	nextm=(int *)s_malloc(matchlength*sizeof(int));
	i=0;
	nextm[0]=j=-1;
	while(i<matchlength-1)
	{
		if(j==-1)
		{
			j=0;
			i++;
			nextm[i]=0;
		}
		else if(matchstring[i]==matchstring[j])
		{
			i++;j++;
			nextm[i]=j;
		}
		else j=nextm[j];
	}
#ifdef _DEBUG
	for(i=0;i<matchlength;i++)
	{
		printf("%d ",nextm[i]);
	}
	printf("\n");
#endif
	replace_reason=(char *)s_calloc(sizeof(char)*(64+strlen(matchstring)+(rpl?strlen(rpl):0)),1);
	sprintf(replace_reason,"bot: massreplace: \"%s\" ->\"%s\"",matchstring,rpl);
	return 0;
}
static int kmp(HTTP h,HTTP result)
{
	char ch,*pch;
	int i=0,j=0,k=0,todo=0,exit=0;
	pch=&ch;
	xmlpulltext(h,&ch);
	do
	{
		while(i<matchlength)
		{
			if(i==-1)
			{
				if(xmlpulltext(h,&ch)!=XML_TEXT_CONTINUE)
				{
					exit=1;
					break;
				}
				i=0;
			}
			else if(ch==matchstring[i])
			{
				i++;
				if(xmlpulltext(h,&ch)!=XML_TEXT_CONTINUE)
				{
					exit=1;
					break;
				}
			}
			else 
			{
				if(nextm[i]==-1)
				{
					for(k=0;k<i;k++)
					{
						smartURLEncode(matchstring[k],result);
					}
					smartURLEncode(ch,result);
					i=-1;
				}
				else
				{
					j=i-nextm[i];
					for(k=0;k<j;k++)
					{
						smartURLEncode(matchstring[k],result);
					}
					i=i-j;
				}
			}
		}
		if(i==matchlength)
		{
			todo=1;
			i=0;
			if(replacestring) hputs(replacestring,replacelength,result);
		}
	}while(!exit);	
	if(i>0)
	{
		for(k=0;k<i;k++)
		{
			smartURLEncode(matchstring[k],result);
		}
	}
	if(xmlpulltext(h,&ch)!=XML_TEXT_END) todo=0;
	return todo;
}
static int pagecheck(const char *pageid,const char *timestamp,HTTP f)
{
	HTTP newtext;
	newtext=hopen();
	hputs("&text=",6,newtext);	
	if(kmp(f,newtext))
	{
		struct neditargv point;
		point.newtext=newtext;
		point.id=pageid;
		point.time=timestamp;
		smartedit(&point,replace_reason,NULL);
	}
	hclose(newtext);
	return 0;
}
static int proceedchild(const char *ids)
{
	char url[4096];
	char buf[8192];
	char pageid[256];
	char timestamp[256];
	char contentmodel[64],contentformat[64];
	const char *ttm[]={"pageid"};
	const char *tmm[]={"timestamp"};
	const char *cmm[]={"contentmodel","contentformat"};
	char *ttv[1];
	char *tmv[1];
	char *cmv[2];
	int result;
	int status;
	HTTP h;
	ttv[0]=pageid;
	tmv[0]=timestamp;
	cmv[0]=contentmodel;
	cmv[1]=contentformat;
	if(!ids)
	{
		return -1;
	}
	sprintf(url,"/w/api.php?action=query&format=xml&prop=revisions&rvprop=content|timestamp&pageids=%s&rvslots=main",ids);
	h=hopen();
	if(get(url,8888,1,h))
	{
		hclose(h);
		return -2;
	}
	hgets(buf,4096,h);
	if(!strstr(buf," 200"))
	{
		hclose(h);
		return -3;
	}
	if(skipresponseheader(h))
	{
		hclose(h);
		return -3;
	}
	status=0;
	while(!heof(h))
	{
		result=xmlparsetag(h,buf);
		if(result==XML_HAS_VALUE)
		{
			switch(status)
			{
			case 0:
				if(!strcmp(buf,"page")&&(xmlparsearg(h,1,ttm,ttv)==XML_HAS_VALUE))
				{
					if(atoi(pageid)>0) status=1;
				}
				break;
			case 1:
				if(!strcmp(buf,"rev"))
				{
					if(xmlparsearg(h,1,tmm,tmv)!=XML_HAS_VALUE)
					{
						status=3;
						goto _cleanup;
					}
					status=2;
				}
				else goto _cleanup;
				break;
			case 2:
				if(!strcmp(buf,"slot"))
				{
					if(xmlparsearg(h,2,cmm,cmv)!=XML_HAS_VALUE)
					{
						status=3;
						goto _cleanup;
					}
					if(!strcmp(contentmodel,"wikitext")&&!strcmp(contentformat,"text/x-wiki"))
					{
						pagecheck(pageid,timestamp,h);
					}
					status=0;
				}
				else goto _cleanup;
				break;
			}
		}
		else if(result==XML_PARSE_ERROR)
		{
			status=3;
			goto _cleanup;
		}
	}
_cleanup:
	hclose(h);
	return status?-4:0;
}
static void threadfunc(void *c)
{
	int i=*(int *)c;
	int ext=0;
	char pageid[10][64];
	int count=0;
	char ids[4096];
	int result=0;
	struct problemlist *temp;
	while(!action) Sleep(1);
	while(1)
	{
		EnterCriticalSection(&hcs);
		for(count=0;count<10;count++)
		{
			if(pbl)
			{
				temp=pbl;
				pbl=pbl->next;
				sprintf(pageid[count],"%d",temp->pageid);
				s_free(temp);
			}
			else
			{
				ext=1;
				count++;
				break;				
			}
		}
		count--;
		if(count>=0)
		{
			strcpy(ids,pageid[count]);
			count--;
			while(count>=0)
			{
				strcat(ids,"|");
				strcat(ids,pageid[count]);
				count--;
			}
		}
		LeaveCriticalSection(&hcs);
		if(ext) break;
		else
		{
			result=proceedchild(ids);
		}
	}
	EnterCriticalSection(&tcs);
	threadnumber--;
	LeaveCriticalSection(&tcs);
	return;
} 
static int threadini(int count)
{
	int i=0;
	int flag=0;
	threadnumber=0;
	for(i=0;i<count;i++)
	{
		threadc[i]=i;
		flag=_beginthread(threadfunc,0,(void *)(threadc+i));
		if(flag>0) threadnumber++;
	}
	return 0;
}
static struct problemlist **sort;
static void sortini()
{
	int i=0;
	struct problemlist *newlist=pbl;
	sort=(struct problemlist **)s_calloc(pagenum*sizeof(struct problemlist *)+5,1);
	for(i=0;i<pagenum;i++)
	{
		sort[i]=newlist;
		newlist=newlist->next;
		sort[i]->next=0;
	}
	sort[i]=0;
}

static struct problemlist *mergelist(struct problemlist *a,struct problemlist *b)
{
	struct problemlist *head,*cur;
	if(!a) return b;
	if(!b) return a;
	if(a->pageid>b->pageid)
	{
		head=a;
		a=a->next;
	}
	else 
	{
		head=b;
		b=b->next;
	}
	cur=head;
	while(a&&b)
	{
		if(a->pageid>b->pageid)
		{
			cur->next=a;
			cur=cur->next;
			a=a->next;
			cur->next=0;
		}
		else
		{
			cur->next=b;
			cur=cur->next;
			b=b->next;
			cur->next=0;
		}
	}
	if(a)
	{
		cur->next=a;
	}
	else cur->next=b;
	return head;
}
static int mergesort(struct problemlist **tg,int number)
{
	int count=0;
	int pos=0;
	struct problemlist *a,*b,*c;
	if(number==1) return 0;
	else
	{
		for(count=0,pos=0;pos<number;count++)
		{
			a=tg[pos];
			b=tg[pos+1];
			c=mergelist(a,b);
			tg[count]=c;
			pos+=2;
		}
		tg[count]=0;
		mergesort(tg,count);
		return 1;
	}
}

static void pagesort()
{
	sortini();
	mergesort(sort,pagenum);
	pbl=sort[0];
	s_free(sort);
	sort=0;
}
static int allpagequery(const char *ns)
{
	HTTP f;
	char line[2048]={0},url[4096]={0},id[512]={0},title[512]={0},sroffset[2048]={0},offseto[512]={0};
	char statusline[128];
	int next=0,retry=0,pageid=0;
	struct problemlist *temp=0;
	char *ctm[]={"apcontinue"};
	char *ctv[1];
	char *idm[]={"pageid","title"};
	char *idv[2];
	char *ptourl=url;
	ctv[0]=offseto;
	idv[0]=id;
	idv[1]=title;
	ptourl+=sprintf(url,"/w/api.php?action=query&format=xml&list=allpages&apnamespace=%s&aplimit=5000&apfilterredir=%s",ns,hideredirect?"nonredirects":"all");
	do
	{
		if(next)
		{
			sprintf(ptourl,"&apcontinue=%s",sroffset);
		}
		else *ptourl=0;
		f=hopen();
		for(retry=0;retry<20;retry++)
		{
			if(get(url,8888,1,f))
			{
				hclose(f);
				f=hopen();
			}
			else
			{
				hgets(statusline,127,f);
				if(strstr(statusline,"200")) break;
				else
				{
					hclose(f);
					f=hopen();
				}
			}
		}
		if(retry==20)
		{
			hclose(f);
			return 1;
		}
		skipresponseheader(f);
		next=0;
		do
		{
			xmlparsetag(f,line);
			if(!next)
			{
				if(!strcmp(line,"continue"))
				{
					xmlparsearg(f,1,ctm,ctv);
					URLEncode(offseto,strlen(offseto),sroffset,2047);
					next=1;
				}
			}
			if(!strcmp(line,"p"))
			{
				xmlparsearg(f,2,idm,idv);
				if((pageid=atoi(id))>0)
				{
					temp=(struct problemlist *)s_malloc(sizeof(struct problemlist));
					temp->pageid=pageid;
					temp->next=pbl;
					pbl=temp;
					pagenum++;
				}
			}
		}while(!heof(f));
		hclose(f);
		if(hasquerytimeslimit)
		{
			if(--maxquerytimes<=0) break;
		}
	}while(next);
	if(dosort) pagesort();
	return 0;
}
static int templatequery(const char *templatename,const char *ns)
{
	HTTP f;
	char line[2048]={0},url[4096]={0},id[512]={0},title[512]={0},sroffset[2048]={0},offseto[512]={0};
	char templatename_e[8192];
	char statusline[128];
	int next=0,retry=0,pageid=0;
	struct problemlist *temp=0;
	char *ctm[]={"ticontinue"};
	char *ctv[1];
	char *idm[]={"pageid","title"};
	char *idv[2];
	char *ptourl=url;
	ctv[0]=offseto;
	idv[0]=id;
	idv[1]=title;
	URLEncode(templatename,strlen(templatename),templatename_e,8191);
	ptourl+=sprintf(url,"/w/api.php?action=query&format=xml&prop=transcludedin&titles=%s&tinamespace=%s&tilimit=5000&tishow=%s",templatename_e,ns,hideredirect?"!redirect":"");
	do
	{
		if(next)
		{
			sprintf(ptourl,"&ticontinue=%s",sroffset);
		}
		else *ptourl=0;
		f=hopen();
		for(retry=0;retry<20;retry++)
		{
			if(get(url,8888,1,f))
			{
				hclose(f);
				f=hopen();
			}
			else
			{
				hgets(statusline,127,f);
				if(strstr(statusline,"200")) break;
				else
				{
					hclose(f);
					f=hopen();
				}
			}
		}
		if(retry==20)
		{
			hclose(f);
			return 1;
		}
		skipresponseheader(f);
		next=0;
		do
		{
			xmlparsetag(f,line);
			if(!next)
			{
				if(!strcmp(line,"continue"))
				{
					xmlparsearg(f,1,ctm,ctv);
					URLEncode(offseto,strlen(offseto),sroffset,2047);
					next=1;
				}
			}
			if(!strcmp(line,"ti"))
			{
				xmlparsearg(f,2,idm,idv);
				if((pageid=atoi(id))>0)
				{
					temp=(struct problemlist *)s_malloc(sizeof(struct problemlist));
					temp->pageid=pageid;
					temp->next=pbl;
					pbl=temp;
					pagenum++;
				}
			}
		}while(!heof(f));
		hclose(f);
		if(hasquerytimeslimit)
		{
			if(--maxquerytimes<=0) break;
		}
	}while(next);
	if(dosort) pagesort();
	return 0;
}
static int confirm()
{
	int ch=0;
	while(1)
	{
		printf("Replace \"%s\" with \"%s\", OK? Press \'Y\' to continue, \'N\' to abort.\n",matchstring_ori,replacestring_ori);
		ch=getchar();
		fflush(stdin);
		if(ch=='Y') return 0;
		else if(ch=='N') return 1;
		printf("Invalid command.\n");
	}
}
int main(int argc,char *argv[])
{
	int count=0;
	HANDLE tk_thread;
	if(parsearg(argc,argv))
	{
		printf("usage: -u username -p passwd -s searchstring [-T concurrency -r replacestring -n namespace -m maxquery -h hideredirect -S sortpage -t transclude]\n");
		return -1;
	}
	if(confirm())
	{
		printf("Aborted!\n");
		return -2;
	}
	InitializeSRWLock(&rwcs);
	InitializeCriticalSection(&tcs);
	InitializeCriticalSection(&hcs);
	buckini(20);
	if(login(username,passwd))
	{
		printf("Login error!\n");
		return -3;
	}
	hastoken=0;
	printf("Login complete.\n");
	fflush(stdout);
	kmpini();
	tk_thread=(HANDLE)_beginthread(tokenmanage,0,0);
	if(searchfortransclude)
	{
		templatequery(templatename,ns);
	}
	else allpagequery(ns);
	if(pbl==NULL)
	{
		printf("No page!\n");
		return -4;
	}
	printf("Query complete. %d pages found. Checking...\n",pagenum);
	action=0;
	threadini(maxthread);
	action=1;
	while(1)
	{
		EnterCriticalSection(&hcs);
		if(pbl!=NULL)
		{
			LeaveCriticalSection(&hcs);
			Sleep(1000);
		}
		else
		{
			LeaveCriticalSection(&hcs);
			break;
		}
	}
	count=0;
	while(count<150)
	{
		count++;
		EnterCriticalSection(&tcs);
		if(threadnumber>0)
		{
			printf("Waiting for all threads to exit. Current thread number: %d\n",threadnumber);
			LeaveCriticalSection(&tcs);
		}
		else
		{
			LeaveCriticalSection(&tcs);
			break;
		}
		fflush(stdout);
		Sleep(1000);
	}
	if(!threadnumber)
	{
		printf("Cleanup..\n");
		DeleteCriticalSection(&tcs);
		DeleteCriticalSection(&hcs);
		AcquireSRWLockExclusive(&rwcs);
		hastoken=-1;
		ReleaseSRWLockExclusive(&rwcs);
		WaitForSingleObject(tk_thread,INFINITE);
		buckdestroy();
	}
	printf("---------------Ok done.---------------\n");
	fflush(stdout);
	system("PAUSE");
	return 0;
}