streaming.conf
agent1.sources=avro-source
agent1.channels=logger-channel
agent1.sinks=log-sink
define source
agent1.sources.avro-source.type=avro
agent1.sources.avro-source.bind=0.0.0.0
agent1.sources.avro-source.port=41414
define channel
agnet1.channels.logger-channel.type=memory
define sink
agent1.sinks.log-sink.type=logger
agnet1.sources.avro-source.channels=logger-channel
agent1.sinks.log-sink.channel=logger-channel
启动
flume-ng agent
--conf FLUME_HOME/conf \ --conf-fileFLUME_HOME/conf/streaming.conf
--name agnet1
--Dflume.root.logger=INFO,console
启动zookeeper
./zkServer.sh start
启动akfaka
./kafka-server-start.sh -daemon $KAFKA_HOME/config/server.properties
查看topic
关联zookeeper和kafka
./kafka-topics.sh --list --zookeeper 192.168.122.53:2181
出现的服务
hello-topic
kafka_streaming_topic
my-replicated-topic
./kafka-topics.sh --create --zookeeper 192.168.122.53:2181 --replication-factor 1 --partitions 1 --topic streamingtopic
streaming2.conf
agent1.sources=avro-source
agent1.channels=logger-channel
agent1.sinks=kafka-sink
define source
agent1.sources.avro-source.type=avro
agent1.sources.avro-source.bind=0.0.0.0
agent1.sources.avro-source.port=41414
define channel
agnet1.channels.logger-channel.type=memory
define sink
agent1.sinks.kafka-sink.type=org.apache.flume.sink.kafka.KafkaSink
agnet1.sources.avro-source.channels=logger-channel
agent1.sinks.kafka-sink.channel=logger-channel
agent1.sinks.kafka-sink.type = org.apache.flume.sink.kafka.KafkaSink
agent1.sinks.kafka-sink.kafka.topic=streamingtopic
agent1.sinks.kafka-sink.kafka.brokerList=192.168.122.53:9092
agent1.sinks.kafka-sink.requiredAcks=1
agnet1.sinks.kafka-sink.kafka.BatchSize=20
flume-ng agent
--conf FLUME_HOME/conf \ --conf-fileFLUME_HOME/conf/streaming2.conf
--name agnet1
--Dflume.root.logger=INFO,console
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.imooc.spsark</groupId>
<artifactId>sparkstreaming</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
<scala.version>2.11.8</scala.version>
<kafka.version>0.9.0.0</kafka.version>
<spark.version>2.2.0</spark.version>
<hadoop.version>2.6.0-cdh5.7.0</hadoop.version>
<hbase.version>1.2.0-cdh5.7.0</hbase.version>
</properties>
<!--添加cloudera的repository-->
<repositories>
<repository>
<id>cloudera</id>
<url>https://repository.cloudera.com/artifactory/cloudera-repos</url>
</repository>
</repositories>
<dependencies>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>${scala.version}</version>
</dependency>
<!-- Kafka 依赖--><!--
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka_2.11</artifactId>
<version>${kafka.version}</version>
</dependency>
-->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>{hadoop.version} org.apache.hbase hbase-client {hbase.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-server</artifactId>
<version>${hbase.version}</version>
</dependency>
<!-- Spark Streaming 依赖-->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_2.11</artifactId>
<version>${spark.version}</version>
</dependency>
<!-- Spark Streaming整合Flume 依赖-->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming-flume_2.11</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming-flume-sink_2.11</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming-kafka-0-8_2.11</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.5</version>
</dependency>
<!-- Spark SQL 依赖-->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.module</groupId>
<artifactId>jackson-module-scala_2.11</artifactId>
<version>2.6.5</version>
</dependency>
<dependency>
<groupId>net.jpountz.lz4</groupId>
<artifactId>lz4</artifactId>
<version>1.3.0</version>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.38</version>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.31</version>
</dependency>
<dependency>
<groupId>org.apache.flume.flume-ng-clients</groupId>
<artifactId>flume-ng-log4jappender</artifactId>
<version>1.6.0</version>
</dependency>
</dependencies>
<build><!--
<sourceDirectory>src/main/scala</sourceDirectory>
<testSourceDirectory>src/test/scala</testSourceDirectory>
-->
<plugins>
<plugin>
<groupId>org.scala-tools</groupId>
<artifactId>maven-scala-plugin</artifactId>
<executions>
<execution>
<goals>
<goal>compile</goal>
<goal>testCompile</goal>
</goals>
</execution>
</executions>
<configuration>
<scalaVersion>{scala.version} -target:jvm-1.5 org.apache.maven.plugins maven-eclipse-plugin true ch.epfl.lamp.sdt.core.scalabuilder ch.epfl.lamp.sdt.core.scalanature org.eclipse.jdt.launching.JRE_CONTAINER ch.epfl.lamp.sdt.launching.SCALA_CONTAINER org.scala-tools maven-scala-plugin {scala.version}</scalaVersion>
</configuration>
</plugin>
</plugins>
</reporting>
</project>
网友评论