#Part I
keys=list(counts.keys())
for key in keys:
if key in thewords:
counts.pop(key)
要遍历字典过程中,如果没有
keys=list(counts.keys()) #对keys转换成list,不再是dict_key.
对字典的处理过程中删除增加会报错:RuntimeError: dictionary changed size during iteration
#del counts[key]也可以换成counts.pop(key)
#Part II
上述写法可以更为精简,参考: Python if 和 for 的多种写法
#源代码是Mooc的python课程 的6.9的作业,
#上面切词的方式是用空格替换掉所有非字母数字字符,此代码用正则表达式来切词
import re
re_word=re.compile(r'[a-z]+')
def get_words():#切词
txt_path = "hamlet.txt"
with open(txt_path,"r") as txt:
txt_r=txt.read()
txt=txt_r.lower()
word_list=re_word.findall(txt)
print('word:%s'%word_list)
return word_list
#对每个单词出现的次数统计
def count_words(object):
counts={}
for word in object:#也可以写成counts[word]=counts.get(word,0)
if word in counts:
counts[word]=counts[word]+1
else:
counts[word]=1
thewords=['the', 'a', 'to', 'of', 'a', 'i', 'in', 'and', 'you', 'your','it','that','is']
keys=list(counts.keys())
# for key in keys:
# if key in thewords:
# counts.pop(key)
[counts.pop(key) for key in keys if key in thewords]#等价comment的部分
items=list(counts.items())
items.sort(key=lambda x:x[1],reverse=True)
print(items)
for i in range(10):
word,count=items[i]
print("{0:<10}{1:>5}".format(word,count))
if __name__ == '__main__':
words=get_words()
count_words(words)
此上的代码可以用来做
Github 上的0006题