美文网首页我爱编程
sparkstreaming配置文件

sparkstreaming配置文件

作者: 文子轩 | 来源:发表于2018-07-25 18:14 被阅读16次

    streaming.conf

    agent1.sources=avro-source
    agent1.channels=logger-channel
    agent1.sinks=log-sink

    define source

    agent1.sources.avro-source.type=avro
    agent1.sources.avro-source.bind=0.0.0.0
    agent1.sources.avro-source.port=41414

    define channel

    agnet1.channels.logger-channel.type=memory

    define sink

    agent1.sinks.log-sink.type=logger

    agnet1.sources.avro-source.channels=logger-channel

    agent1.sinks.log-sink.channel=logger-channel

    启动

    flume-ng agent
    --conf FLUME_HOME/conf \ --conf-fileFLUME_HOME/conf/streaming.conf
    --name agnet1
    --Dflume.root.logger=INFO,console

    启动zookeeper
    ./zkServer.sh start

    启动akfaka
    ./kafka-server-start.sh -daemon $KAFKA_HOME/config/server.properties
    查看topic
    关联zookeeper和kafka
    ./kafka-topics.sh --list --zookeeper 192.168.122.53:2181
    出现的服务
    hello-topic
    kafka_streaming_topic
    my-replicated-topic

    ./kafka-topics.sh --create --zookeeper 192.168.122.53:2181 --replication-factor 1 --partitions 1 --topic streamingtopic

    streaming2.conf

    agent1.sources=avro-source
    agent1.channels=logger-channel
    agent1.sinks=kafka-sink

    define source

    agent1.sources.avro-source.type=avro
    agent1.sources.avro-source.bind=0.0.0.0
    agent1.sources.avro-source.port=41414

    define channel

    agnet1.channels.logger-channel.type=memory

    define sink

    agent1.sinks.kafka-sink.type=org.apache.flume.sink.kafka.KafkaSink

    agnet1.sources.avro-source.channels=logger-channel

    agent1.sinks.kafka-sink.channel=logger-channel

    agent1.sinks.kafka-sink.type = org.apache.flume.sink.kafka.KafkaSink
    agent1.sinks.kafka-sink.kafka.topic=streamingtopic
    agent1.sinks.kafka-sink.kafka.brokerList=192.168.122.53:9092

    agent1.sinks.kafka-sink.requiredAcks=1
    agnet1.sinks.kafka-sink.kafka.BatchSize=20

    flume-ng agent
    --conf FLUME_HOME/conf \ --conf-fileFLUME_HOME/conf/streaming2.conf
    --name agnet1
    --Dflume.root.logger=INFO,console

    <?xml version="1.0" encoding="UTF-8"?>
    <project xmlns="http://maven.apache.org/POM/4.0.0"
    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>com.imooc.spsark</groupId>
    <artifactId>sparkstreaming</artifactId>
    <version>1.0-SNAPSHOT</version>
    
    <properties>
        <scala.version>2.11.8</scala.version>
        <kafka.version>0.9.0.0</kafka.version>
        <spark.version>2.2.0</spark.version>
        <hadoop.version>2.6.0-cdh5.7.0</hadoop.version>
        <hbase.version>1.2.0-cdh5.7.0</hbase.version>
    </properties>
    
    <!--添加cloudera的repository-->
    <repositories>
        <repository>
            <id>cloudera</id>
            <url>https://repository.cloudera.com/artifactory/cloudera-repos</url>
        </repository>
    </repositories>
    <dependencies>
        <dependency>
            <groupId>org.scala-lang</groupId>
            <artifactId>scala-library</artifactId>
            <version>${scala.version}</version>
        </dependency>
    
    <!-- Kafka 依赖--><!--
    
        <dependency>
            <groupId>org.apache.kafka</groupId>
            <artifactId>kafka_2.11</artifactId>
            <version>${kafka.version}</version>
        </dependency>
    

    -->
    <dependency>
    <groupId>org.apache.hadoop</groupId>
    <artifactId>hadoop-client</artifactId>
    <version>{hadoop.version} org.apache.hbase hbase-client {hbase.version}</version>
    </dependency>

    <dependency>
        <groupId>org.apache.hbase</groupId>
        <artifactId>hbase-server</artifactId>
        <version>${hbase.version}</version>
    </dependency>
    <!-- Spark Streaming 依赖-->
    <dependency>
        <groupId>org.apache.spark</groupId>
        <artifactId>spark-streaming_2.11</artifactId>
        <version>${spark.version}</version>
    </dependency>
    
    <!-- Spark Streaming整合Flume 依赖-->
    <dependency>
        <groupId>org.apache.spark</groupId>
        <artifactId>spark-streaming-flume_2.11</artifactId>
        <version>${spark.version}</version>
    </dependency>
    
    <dependency>
        <groupId>org.apache.spark</groupId>
        <artifactId>spark-streaming-flume-sink_2.11</artifactId>
        <version>${spark.version}</version>
    </dependency>
    
    <dependency>
        <groupId>org.apache.spark</groupId>
        <artifactId>spark-streaming-kafka-0-8_2.11</artifactId>
        <version>${spark.version}</version>
    </dependency>
    
    <dependency>
        <groupId>org.apache.commons</groupId>
        <artifactId>commons-lang3</artifactId>
        <version>3.5</version>
    </dependency>
    
    <!-- Spark SQL 依赖-->
    <dependency>
        <groupId>org.apache.spark</groupId>
        <artifactId>spark-sql_2.11</artifactId>
        <version>${spark.version}</version>
    </dependency>
    
    <dependency>
        <groupId>com.fasterxml.jackson.module</groupId>
        <artifactId>jackson-module-scala_2.11</artifactId>
        <version>2.6.5</version>
    </dependency>
    
    <dependency>
        <groupId>net.jpountz.lz4</groupId>
        <artifactId>lz4</artifactId>
        <version>1.3.0</version>
    </dependency>
    
    <dependency>
        <groupId>mysql</groupId>
        <artifactId>mysql-connector-java</artifactId>
        <version>5.1.38</version>
    </dependency>
    
     <dependency>
            <groupId>mysql</groupId>
            <artifactId>mysql-connector-java</artifactId>
            <version>5.1.31</version>
     </dependency>
    
    <dependency>
        <groupId>org.apache.flume.flume-ng-clients</groupId>
        <artifactId>flume-ng-log4jappender</artifactId>
        <version>1.6.0</version>
    </dependency>
    

    </dependencies>

    <build><!--
    
        <sourceDirectory>src/main/scala</sourceDirectory>
        <testSourceDirectory>src/test/scala</testSourceDirectory>
    

    -->
    <plugins>
    <plugin>
    <groupId>org.scala-tools</groupId>
    <artifactId>maven-scala-plugin</artifactId>
    <executions>
    <execution>
    <goals>
    <goal>compile</goal>
    <goal>testCompile</goal>
    </goals>
    </execution>
    </executions>
    <configuration>
    <scalaVersion>{scala.version} -target:jvm-1.5 org.apache.maven.plugins maven-eclipse-plugin true ch.epfl.lamp.sdt.core.scalabuilder ch.epfl.lamp.sdt.core.scalanature org.eclipse.jdt.launching.JRE_CONTAINER ch.epfl.lamp.sdt.launching.SCALA_CONTAINER org.scala-tools maven-scala-plugin {scala.version}</scalaVersion>
    </configuration>
    </plugin>
    </plugins>
    </reporting>
    </project>

    相关文章

      网友评论

        本文标题:sparkstreaming配置文件

        本文链接:https://www.haomeiwen.com/subject/hthhyftx.html