思路整理

1.打开文件，处理其中的每一行
2.将每个词添加到字典中更新其计数
3.显示输出，在此例中按频率高低进行排列

使用的python函数

Dict.items()：所有键值对的元组列表
str.strip():用于移除字符串头尾指定的字符（默认为空格）
list.append(obj)：用于在列表末尾添加新的对象
str.split():通过指定分隔符对字符串进行切片

函数定义

addWord:将每个单词添加到字典中
processLine：要处理行，还要完成剔除不同的字符，分割出单词等工作
prettyPrint:格式化的显示，将针对每种情况，将显示功能分离出来
main：使用主函数为主程序

addWord

def addWord(word,wordCountDict):
    '''Update the word frequency:word is the key,frequency is the value.'''
    if word in wordCountDict:
        wordCountDict[word] += 1
    else:
        wordCountDict[word] = 1

processLine

import sting
def processLine(line,wcDict):
    '''Process the line to get lowercase words to add to the dictionary.'''
    line = line.strip()
    wordList = line.split()
    for word in wordList:
        #ignore the '--' that is in file
       if word != '--':
           word = word.lower()
           word = word.strip()
           #get commas,periods and punctuation out as well

           word  = word.strip(string.punctuation)
           addWord(word,wcDict)

prettyPrint

def prettyPrint(wcDict):
    '''Print nicely from highest to lowest frequency.'''
    #create a list of tuple,(value,key)

    #valKeyList = [(val,key) for key,val in d.itemw()]

    valKeyList = []

    for key,val in wcDict.items():
        valKeyList.append((val,key))

    #sort method sorts on list's first element,here the frequency.
    #Reverse to get biggest first

    valKeyList.sort(reverse=True)
    print '%-10s%10s' %('Word','Count')
    print '_'*21
    for val,key in valKeyList:
        print '%-12s     %3d' %(key,val)

main

def main():
    wcDict = {}
    fobj = open('gettysbury.txt','r')
    for line in fobj:
        processLine(line,wcDict)
    print 'Length of the dictionary:',len(wcDict)
    prettyPrint(wcDict)

最终代码如下：

# __author__ == 'xjiao'
# -*- coding:utf-8 -*-

import string

def addWord(word,wordCountDict):
    '''Update the word frequency:word is the key,frequency is the value.'''

    if word in wordCountDict:

        wordCountDict[word] += 1 #如果单词在wordCountDict词典已存在，数量加1
    else:
        wordCountDict[word] = 1 #如果不存在，插入一条记录

def processLine(line,wcDict):
    '''Process the line to get lowercase words to add to the dictionary.'''

    line = line.strip() #去掉每行的空白
    wordList = line.split()  #去掉空白字符，分割字符

    for word in wordList:
        #ignore the '--' that is in file
        if word != '--':
            word = word.lower()
            word = word.strip()
           #get commas,periods and punctuation out as well

            word  = word.strip(string.punctuation)
            addWord(word,wcDict)


def prettyPrint(wcDict):
    '''Print nicely from highest to lowest frequency.'''
    #create a list of tuple,(value,key)

    #valKeyList = [(val,key) for key,val in d.itemw()]

    valKeyList = []

    for key,val in wcDict.items():
        valKeyList.append((val,key))

    #sort method sorts on list's first element,here the frequency.
    #Reverse to get biggest first

    valKeyList.sort(reverse=True)
    print '%-10s%10s' %('Word','Count')
    print '_'*21
    for val,key in valKeyList:
            print '%-12s     %3d' %(key,val)

def main():
    wcDict = {}
    fobj = open('gettysburg.txt','r')
    for line in fobj:
        processLine(line,wcDict)
    print 'Length of the dictionary:',len(wcDict)
    prettyPrint(wcDict)


if __name__ == "__main__":
    main()

输出结果如下：

D:\python\python.exe E:/python_project/gettysburg.py
Length of the dictionary: 136
Word           Count
_____________________
the               15
that              11
we                10
to                 9
here               9
of                 6
and                6
a                  6
nation             5
it                 5
have               5
in                 4
for                4
dedicated          4
who                3
us                 3
this               3
they               3
so                 3
shall              3
people             3
is                 3
great              3
dead               3
cannot             3
are                3
which              2
what               2
war                2
these              2
rather             2
our                2
or                 2
not                2
new                2
men                2
long               2
living             2
gave               2
from               2
field              2
far                2
devotion           2
dedicate           2
conceived          2
can                2
but                2
be                 2
years              1
world              1
work               1
will               1
whether            1
vain               1
upon               1
unfinished         1
thus               1
those              1
their              1
testing            1
task               1
take               1
struggled          1
should             1
seven              1
sense              1
score              1
say                1
resting-place       1
resolve            1
remember           1
remaining          1
proposition        1
proper             1
power              1
portion            1
perish             1
on                 1
now                1
note               1
nor                1
nobly              1
never              1
might              1
met                1
measure            1
lives              1
live               1
little             1
liberty            1
last               1
larger             1
increased          1
honored            1
highly             1
hallow             1
ground             1
government         1
full               1
freedom            1
four               1
fought             1
forth              1
forget             1
fitting            1
final              1
fathers            1
equal              1
engaged            1
endure             1
earth              1
do                 1
died               1
did                1
detract            1
created            1
continent          1
consecrated        1
consecrate         1
come               1
civil              1
cause              1
by                 1
brought            1
brave              1
birth              1
before             1
battle             1
as                 1
any                1
altogether         1
all                1
ago                1
advanced           1
add                1
above              1

Process finished with exit code 0