import urllib2,re,threading
from time import ctime
def RegexMatch(regmatch):
for reg_temp in re.findall(regmatch,file):
print reg_temp,ctime()
regex_match_element=['\s*<span\s*id="thread_\d*"><a\s*href="thread-\d*-\d*-\d*\.html">(.+?)</a></span>','<em><a\s*href="redirect\.php\?tid=\d*&goto=lastpost#lastpost">(\d*-\d*-\d*\s* \d*:\d*)</a></em>']
def main():
global file
f=urllib2.urlopen('http://bbs.cfan.com.cn/forum-53-1.html')
file=f.read()
threads=[]
nloops=range(len(regex_match_element))
for i in nloops:
t=threading.Thread(target=RegexMatch,args=(regex_match_element[i],))
threads.append(t)
for i in nloops:
threads[i].start()
for i in nloops:
threads[i].join()
print '结束'
if __name__=='__main__':
main()
�ر���л
11 年前
没有评论:
发表评论