1 #-*-coding:utf-8-*- 2 __author__ = 'Deen' 3 ''' 4 题目描述:任一个英文的纯文本文件,统计其中的单词出现的个数。 5 参考学习链接: 6 re http://www.cnblogs.com/tina-python/p/5508402.html#undefined 7 collections http://blog.csdn.net/liufang0001/article/details/54618484 8 ''' 9 import re,collections10 with open('english.txt','r') as fp:11 text=fp.read().strip(',')12 s=re.compile(r'\w+\b')13 words=s.findall(text)14 b=list()15 dic=collections.defaultdict(lambda :0)16 for word in words:17 dic[word.lower()] +=118 19 print dic20 21 '''22 import collections,re23 import sys24 def cal(filename = 'english.txt'):25 print 'now processing:' + filename + '......'26 f = open(filename,'r')27 data = f.read()28 dic = collections.defaultdict(lambda :0)29 data = re.sub(r'[\W\d]',' ',data)30 data = data.lower()31 datalist = data.split(' ')32 for item in datalist:33 dic[item] += 134 del dic['']35 return dic36 try:37 print sorted(cal().items())38 except:39 print 'no input file'40 '''