玩过storm的人都知道storm有流分组的概念。但上级组件传递给下一集组件的策略。但其实stomr还有流id的概念。可以用来方便storm上级组件有选择的指定流的发送目标。比如在传统的单词计数中,我们可以按单词长度对流进行标记,把长度小于等于4的单词标记为stream_1,把长度大于4的单词标记为steam_2,然后分别发送给不同的bolt,分别进行统计。
storm的每个组件(spout和bolt)都有OutputFieldsDeclarer接口,通过该接口可以告诉其他组件,我的输出是哪些字段,有需要的其他组件可以前来订阅。OutputFieldsDeclarer一般调用一下两个函数,
void declareStream(String streamId,Fields fields)申明本组件输出使用到得流id和字段fields。
void declare(Fieldsfields)这个方法其实也用到了流id,默认的“default stream id”。
下面我们用流id的方法改写FastWordCountTopology,让WordCount1统计长度小于等于4的单词,让让WordCount1统计长度大于4的单词。
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
packageorg.apache.storm.starter;
importorg.apache.storm.Config;
importorg.apache.storm.StormSubmitter;
importorg.apache.storm.generated.*;
importorg.apache.storm.spout.SpoutOutputCollector;
importorg.apache.storm.task.TopologyContext;
importorg.apache.storm.topology.BasicOutputCollector;
importorg.apache.storm.topology.OutputFieldsDeclarer;
importorg.apache.storm.topology.TopologyBuilder;
importorg.apache.storm.topology.base.BaseBasicBolt;
importorg.apache.storm.topology.base.BaseRichSpout;
importorg.apache.storm.tuple.Fields;
importorg.apache.storm.tuple.Tuple;
importorg.apache.storm.tuple.Values;
importorg.apache.storm.utils.Utils;
importorg.apache.storm.utils.NimbusClient;
importjava.util.HashMap;
importjava.util.Map;
importjava.util.Random;
importjava.util.concurrent.ThreadLocalRandom;
/**
* WordCount but the spout does not stop, and the bolts are implemented in
* java. This can show how fast the word count can run.
*/
public class FastWordCountTopology{
public static class FastRandomSentenceSpout extends BaseRichSpout{
SpoutOutputCollector _collector;
Random _rand;
private static finalString[] CHOICES={
"marry had a little lamb whos fleese was white as snow",
"and every where that marry went the lamb was sure to go",
"one two three four five six seven eight nine ten",
"this is a test of the emergency broadcast system this is only a test",
"peter piper picked a peck of pickeled peppers"
};
@Override
public void open(Mapconf,TopologyContext context,SpoutOutputCollector collector) {
_collector=collector;
_rand=ThreadLocalRandom.current();
}
@Override
public void nextTuple() {
String sentence=CHOICES[_rand.nextInt(CHOICES.length)];
_collector.emit(new Values(sentence), sentence);
}
@Override
public void ack(Objectid) {
//Ignored
}
@Override
public void fail(Objectid) {
_collector.emit(new Values(id), id);
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("sentence"));
}
}
public static class SplitSentence extends BaseBasicBolt{
@Override
public void execute(Tuple tuple,BasicOutputCollector collector) {
String sentence=tuple.getString(0);
for(String word:sentence.split("\\s+")) {
if (4 <= word.length())
{
collector.emit("streamid_1" , newValues(word,1));
}
esle
{
collector.emit("streamid_2" , new Values(word,1));
}
}
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declareStream("streamid_1", new Fields("word","count"));
declarer.declareStream("streamid_2", new Fields("word","count"));
}
}
public static class WordCount1 extends BaseBasicBolt{
Map counts=new HashMap();
@Override
public void execute(Tuple tuple,BasicOutputCollector collector) {
String word=tuple.getString(0);
Integer count=counts.get(word);
if(count==null)count=0;
count++;
counts.put(word, count);
collector.emit("streamid_1",new Values(word, count));
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declareStream("streamid_1", new Fields("word","count"));
}
}
public static class WordCount2 extends BaseBasicBolt{
Map counts=new HashMap();
@Override
public void execute(Tuple tuple,BasicOutputCollector collector) {
String word=tuple.getString(0);
Integer count=counts.get(word);
if(count==null)count=0;
count++;
counts.put(word, count);
collector.emit("streamid_2",new Values(word, count));
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declareStream("streamid_2", new Fields("word","count"));
}
}
public static void main(String[] args)throws Exception{
TopologyBuilder builder=new TopologyBuilder();
builder.setSpout("spout",new FastRandomSentenceSpout(),4);
builder.setBolt("split",new SplitSentence(),4).shuffleGrouping("spout");
builder.setBolt("count",new WordCount1(),4).fieldsGrouping("split","streamid_1",new Fields("word"));
builder.setBolt("count",new WordCount2(),4).fieldsGrouping("split","streamid_2",new Fields("word"));
Config conf=new Config();
String name="wc-test";
if(args!=null&&args.length>0) {
name=args[0];
}
conf.setNumWorkers(1);
StormSubmitter.submitTopologyWithProgressBar(name, conf, builder.createTopology());
}
}
网友评论