1.源码实现
<?php
class Type {
const POSITIVE = 'positive';
const NEGATIVE = 'negative';
}
class Classifier {
public $types = array(Type::POSITIVE, Type::NEGATIVE);
public $words = array();
public $documents = array();
public function learn($statement, $type)
{
$words = $this->getWords($statement);
foreach($words as $word)
{
if(!isset($this->words[$type][$word]))
{
$this->words[$type][$word] = 0;
}
$this->words[$type][$word]++;
}
$this->documents[$type]++;
}
private function totalP($type)
{
return ($this->documents[$type]+1) / (array_sum($this->documents) + 1);
}
private function p($word, $type)
{
$count = isset($this->words[$type][$word]) ? $this->words[$type][$word] : 0;
return ($count + 1) / (array_sum($this->words[$type]) + 1);
}
private function getWords($string)
{
return preg_split('/\s+/', preg_replace('/[^A-Za-z0-9\s]/', '', strtolower($string)));
}
public function guess($statement)
{
$words = $this->getWords($statement); // 得到单词
$best_likelihood = 0;
$best_type = null;
foreach($this->types as $type)
{
$likelihood = $this->totalP($type); //计算 P(Type)
foreach($words as $word)
{
$likelihood *= $this->p($word, $type); // 计算 P(word, Type)
//echo $word."\n";
}
if($likelihood > $best_likelihood)
{
$best_likelihood = $likelihood;
$best_type = $type;
}
}
return $best_type;
}
}
$classifier = new Classifier();
$classifier->learn('Symfony is the best', Type::POSITIVE);
$classifier->learn('PhpStorm is great', Type::POSITIVE);
$classifier->learn('Iltar complains a lot', Type::NEGATIVE);
$classifier->learn('No Symfony is bad', Type::NEGATIVE);
var_dump($classifier->guess('Symfony is great')); // string(8) "positive"
var_dump($classifier->guess('I complain a lot')); // string(8) "negative"
?>
2.运行及其结果
$ php example.php
string(8) "positive"
string(8) "negative"
网友评论