CS106B Serafini

作者: kolibreath | 来源:发表于2018-03-14 13:28 被阅读0次

CS106B Serafini
CS106B学习笔记之makefile

这篇博客记录一下完成Serafini这个lab的过程

1.看书

完成这个lab需要看<<Programming abstraction in C++>>的前四章节.
前四章主要叙述了一些简单的C++的语法,此外介绍了集中重要的数据结构,这几种数据结构是由Stanford 大学提供的(库,文档在此)[https://cs.stanford.edu/people/eroberts/StanfordCPPLib/doc/index.html]

读完前四章我自己有如下总结,由于平时所用的语言是Java,这个总结也比较偏向Java的使用者:

1.关于引用:

 public static void main(String args[]) throws Exception {
            Type type = new Type(1);
            System.out.println("this type is "+type.id);
            change(type);
            System.out.println("this type is "+type.id);
    
            int a = 1;
            System.out.println("this a is " + a);
            change(a);
            System.out.println("this a is " + a);
    }

   public static void change(Type type){
        type.id = 2;
    }

public static int  change(int i){
        i = 2;
        return i ;
    }

输出是

this type is 1
this type is 2
this a is 1
this a is 1

而在c++中使用&可以让基本数据类型在通过函数之后都发生改变.

2.关于构造函数

Java :

Type type = new Type(1);

c++

Type type(1);

2.完成lab

第一个实验: Wordladder
要求: 给定起始单词 和 结尾单词,每次改变这个单词的一个字母,并且改变之后这个单词需要在字典中存在. 请求出从起始单词到结尾单词的最短路径.

怎么实现: 使用BFS算法
为什么使用BFS算法可以让路径最短:
如何优化程序让代码跑起来更快?

直接上代码:


#include <cctype>
#include <cmath>
#include <fstream>
#include <iostream>
#include <sstream>
#include <string>
#include "console.h"
#include "queue.h"
#include "set.h"
#include "stack.h"
#include "vector.h"
#include "lexicon.h"

using namespace std;
string word1,word2;
Set<string> usedString;
string lexiconLocation ;
Lexicon dictionary;

void Welcome(){
    //absolute path
    string locationPrefix = "/home/kolibreath/桌面/WordLadder/res/",suffix ="";
    cin>>suffix;
    lexiconLocation = locationPrefix+suffix;
    cout<<"Word #1 (or Enter to quit):"<<endl;
    cin>>word1;
    cout<<"Word #2 (or Enter to quit):"<<endl;
    cin>>word2;
}

Lexicon loadDictionary(string location){
    Lexicon lexicon(lexiconLocation);
    return lexicon;
}

//return ture if the word is used else false
bool IsUsed(string word){
    if(usedString.contains(word)){
        return true;
    }else{
        return false;
    }
}

void BuildWordLadder(){
    usedString.add(word1);
    Queue<Stack<string>> queueStack;
    Stack<string> stack;
    stack.push(word1);
    queueStack.enqueue(stack);

    while(!queueStack.isEmpty()){
        Stack<string> copy = queueStack.dequeue();
        string top = copy.peek();
        if(top == word2){
           cout<<"A ladder from data back to code:"<<endl;
           cout<<copy.toString();
           break;
        }else{
            for(int i=0;i<word1.length();i++){
                for(int j=0;j<26;j++){
                    string wordCopy = top;
                    wordCopy[i] = 'a' + j;

                    if(!usedString.contains(wordCopy)&&dictionary.contains(wordCopy)){
                        Stack<string> partialLadder = copy;
                        partialLadder.push(wordCopy);
                        queueStack.enqueue(partialLadder);
                        usedString.add(wordCopy);
                }
            }
        }
        }
    }
}

int main() { 
    setConsoleSize(750, 450);
    setConsoleFont("Courier New-16");
    setConsoleEcho(true);

    cout<<"Welcome to CS 106B Word Ladder.\n"
       <<"Please give me two English words, and I will change the\n"
       <<"first into the second by changing one letter at a time.\n";
    cout<<"Dictionary file name?";
    Welcome();
    dictionary = loadDictionary(lexiconLocation);
    BuildWordLadder();
    return 0;

}

好的,现在来解答这些问题:

1.使用BFS算法的目的:
首先简要的解释一下什么是BFS算法:
Youtube 视频链接
看完这个视频我们有如下经验:
1.走过的点不能走,需要额外的有一个数据结构保存这些点
2.主要的数据结构是队列和堆栈
3.BFS算法的路径不止有一条
2.怎么能确保使用BFS的路径是最短的?
事实上使用得到的路径不止一条. 但是英文单词是很特殊的.举个例子:
起始单词 code 终点单词 data

已知的最短路径是:

 code → cade → cate → date → data

图片.png

假设图单词都存在,因为一个字母之差可能失之千里.就算最后可以得到一个终点的单词但是这个wordladder一定不是最短的. 所以最短的ladder是唯一的

另外需要提及的是储存已经使用过了的单词是避免ladder中产生loop的关键

3.关于优化代码速度:

读取:只读取一次字典中的值放到Lexicon中
寻找neighbor word 不应当找到所有的neighbor word 再进行判断,直接从一个字母一个字母的改变入手.

第二个lab N-gram
要求:读入一篇文章,需要"模仿"作者的口吻说出一段话.
具体的原理可以翻看CS106B的要求.

直接上代码:

// This is the CPP file you will edit and turn in.
// Also remove these comments here and add your own.
// TODO: remove this comment header

#include <cctype>
#include <cmath>
#include <fstream>
#include <iostream>
#include <string>
#include "console.h"
#include "tokenscanner.h"
#include "hashmap.h"
#include "random.h"
#include "vector.h"
#include "queue.h"


using namespace std;
string word1,word2;
string location = "/home/kolibreath/CLionProjects/Ngrams/res/";
HashMap<string, Vector<string> > hashMap;
Vector<string> keySet;


void GenerateNgramMap(int n,string path){
    ifstream stream;
    location += path;
    stream.open(location);
    TokenScanner scanner(stream);
    scanner.ignoreWhitespace();
    //store the input steam of words

    Vector<string> temp;
    while(scanner.hasMoreTokens()){
       temp.add(scanner.nextToken());
    }

    for(int i=0;i<temp.size()-(n-1);i++){
        string key="",value = "";
        for(int j=i,count = 0;count<n-1;count++){
            key += temp[j+count]+" ";
        }
        keySet.add(key);
        value = temp[i+n-1];
        if(hashMap.get(key).size()==0){
            Vector<string> vector;
            vector.add(value);
            hashMap.put(key,vector);
        }else{
            hashMap.get(key).add(value);
        }
    }
}

bool Contains(string key){
    for(int i=0;i<keySet.size();i++){
        if(keySet[i]==key){
            return true;
        }
    }
    return false;
}

string GetValidString(Vector<string> window){
    string nextKey = "";
    for(int i=1;i<window.size();i++){
        nextKey += window.get(i) + " ";
    }
//    cout<<"this is key "<<nextKey;
    Vector<string> values = hashMap.get(nextKey);
    string nextValue = values[randomInteger(0,values.size()-1)];
    return nextValue;
}

//n means the N-Gram number means the total words need to be generate
void RandomWriter(int n,int number){
    string firstRandomKey = keySet[randomInteger(0,keySet.size()-1)];
    Vector<string >values = hashMap.get(firstRandomKey);
    string firstValue = values[randomInteger(0,values.size()-1)];
    cout<<"..."<<firstRandomKey<<firstValue;

    TokenScanner scanner(firstRandomKey+firstValue);
    scanner.ignoreWhitespace();
    Vector<string> window;
    while(scanner.hasMoreTokens()){
        window.add(scanner.nextToken());
    }
    while(number>0){

        string nextValue = GetValidString(window);
        window.remove(0);
        window.add(nextValue);
        number--;
        cout<<" "<<nextValue;
    }
    cout<<"...";
}

void Welcome()
{
    cout << "Welcome to CS 106B Random Writer (aka 'N-Grams')." << endl;
    cout << "This program generates random text based on a document." << endl;
    cout << "Give me an input file and an 'N' value for groups" << endl;
    cout << "of words, and I'll generate random text for you." << endl;
    cout << endl;
}

int main() {
        setConsoleSize(750, 450);
        setConsoleFont("Courier New-16");
        setConsoleEcho(true);

        Welcome();
        string path;
        cin>>path;
        GenerateNgramMap(3,path);
        RandomWriter(3,20);
    return 0;
}

其实这个第二个lab挺容易的,感觉比第一个lab还要简单一些,只需要定义一个完整的Map<string,Vector<string>>就可以完成

网友评论

本文标题：CS106B Serafini

本文链接：https://www.haomeiwen.com/subject/akytqftx.html

延伸阅读

深度阅读

您也可以注册成为美文阅读网的作者，发表您的原创作品、分享您的心情！

CS106B Serafini

相关文章

CS106B Serafini

CS106B学习笔记之makefile

网友评论

延伸阅读

深度阅读

栏目导航

热点阅读