美文网首页
Java+Scala混合开发Spark应用

Java+Scala混合开发Spark应用

作者: niyiwei | 来源:发表于2020-10-26 10:44 被阅读0次

    Java+Scala混合开发Spark应用

    我主要使用Spark GraphX 的api,但是使用Scala对项目的成员不是很友好,考虑到Scala在市场上的用户没有Java的多,就打算使用Java+Scala混合开发Spark GraphX应用,

    环境:

    Java8

    Scala 2.12.*

    pom.xml

    需要修改一下

    <groupId>xxx</groupId>
    <artifactId>xxx</artifactId>
    
    <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
             xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
    
        <modelVersion>4.0.0</modelVersion>
        <groupId>xxx</groupId>
        <artifactId>xxx</artifactId>
        <inceptionYear>2008</inceptionYear>
    
        <parent>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-parent_2.12</artifactId>
            <version>3.0.1</version>
        </parent>
    
        <properties>
            <sbt.project.name>examples</sbt.project.name>
            <build.testJarPhase>none</build.testJarPhase>
            <build.copyDependenciesPhase>package</build.copyDependenciesPhase>
            <!--        <hadoop.deps.scope>provided</hadoop.deps.scope>-->
            <!--        <hive.deps.scope>provided</hive.deps.scope>-->
            <!--        <parquet.deps.scope>provided</parquet.deps.scope>-->
            <scala.version>2.12.12</scala.version>
            <scala.binary.version>2.12</scala.binary.version>
        </properties>
    
    
        <dependencies>
            <!-- Prevent our dummy JAR from being included in Spark distributions or uploaded to YARN -->
            <dependency>
                <groupId>org.spark-project.spark</groupId>
                <artifactId>unused</artifactId>
                <version>1.0.0</version>
            </dependency>
            <dependency>
                <groupId>org.apache.spark</groupId>
                <artifactId>spark-core_${scala.binary.version}</artifactId>
                <version>${project.version}</version>
            </dependency>
            <dependency>
                <groupId>org.apache.spark</groupId>
                <artifactId>spark-streaming_${scala.binary.version}</artifactId>
                <version>${project.version}</version>
            </dependency>
            <dependency>
                <groupId>org.apache.spark</groupId>
                <artifactId>spark-mllib_${scala.binary.version}</artifactId>
                <version>${project.version}</version>
            </dependency>
            <dependency>
                <groupId>org.apache.spark</groupId>
                <artifactId>spark-hive_${scala.binary.version}</artifactId>
                <version>${project.version}</version>
            </dependency>
            <dependency>
                <groupId>org.apache.spark</groupId>
                <artifactId>spark-graphx_${scala.binary.version}</artifactId>
                <version>${project.version}</version>
            </dependency>
            <dependency>
                <groupId>org.apache.spark</groupId>
                <artifactId>spark-streaming-kafka-0-10_${scala.binary.version}</artifactId>
                <version>${project.version}</version>
            </dependency>
            <dependency>
                <groupId>org.apache.spark</groupId>
                <artifactId>spark-sql-kafka-0-10_${scala.binary.version}</artifactId>
                <version>${project.version}</version>
            </dependency>
            <dependency>
                <groupId>org.apache.commons</groupId>
                <artifactId>commons-math3</artifactId>
            </dependency>
            <dependency>
                <groupId>org.scalacheck</groupId>
                <artifactId>scalacheck_${scala.binary.version}</artifactId>
                <scope>test</scope>
            </dependency>
            <dependency>
                <groupId>org.scala-lang</groupId>
                <artifactId>scala-library</artifactId>
    
            </dependency>
            <dependency>
                <groupId>com.github.scopt</groupId>
                <artifactId>scopt_${scala.binary.version}</artifactId>
                <version>3.7.1</version>
            </dependency>
    
            <dependency>
                <groupId>com.google.guava</groupId>
                <artifactId>guava</artifactId>
                <version>${guava.version}</version>
                <scope>compile</scope>
            </dependency>
    
        </dependencies>
    
        <build>
            <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
            <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
            <plugins>
                <plugin>
                    <groupId>org.apache.maven.plugins</groupId>
                    <artifactId>maven-deploy-plugin</artifactId>
                    <configuration>
                        <skip>true</skip>
                    </configuration>
                </plugin>
                <plugin>
                    <groupId>org.apache.maven.plugins</groupId>
                    <artifactId>maven-install-plugin</artifactId>
                    <configuration>
                        <skip>true</skip>
                    </configuration>
                </plugin>
                <plugin>
                    <groupId>org.apache.maven.plugins</groupId>
                    <artifactId>maven-jar-plugin</artifactId>
                    <configuration>
                        <outputDirectory>${jars.target.dir}</outputDirectory>
                    </configuration>
                </plugin>
    
            </plugins>
    
    
        </build>
    
    
        <profiles>
            <profile>
                <id>kinesis-asl</id>
                <dependencies>
                    <dependency>
                        <groupId>org.apache.spark</groupId>
                        <artifactId>spark-streaming-kinesis-asl_${scala.binary.version}</artifactId>
                        <version>${project.version}</version>
                        <scope>provided</scope>
                    </dependency>
                </dependencies>
            </profile>
        </profiles>
    </project>
    

    等待依赖下载

    速度可能有点慢,可以设置maven的代理

    修改maven settings.xml文件,在settings 里面添加 
    
       <proxies>
            <proxy>
                <id>optional</id>
                <active>true</active>
                <protocol>http</protocol>
                <host>127.0.0.1</host>
                <port>1081</port>
            </proxy>
        </proxies>
    
    
    
    

    然后打开命令行窗口 执行

    mvn compile

    这样就能够通过代理下载依赖了

    相关文章

      网友评论

          本文标题:Java+Scala混合开发Spark应用

          本文链接:https://www.haomeiwen.com/subject/dqhamktx.html