美文网首页开源GeoSpark
GeoSpark-[阶段性总结]

GeoSpark-[阶段性总结]

作者: 一个懒散的人 | 来源:发表于2021-08-26 10:00 被阅读0次

    一、 Geospark安装:

    我采用的是gradle安装,安装环境是Spark3.0.3+Scala.2.12.3

    plugins {
        id 'java'
        id 'scala'
    }
    
    group 'org.example'
    version '1.0-SNAPSHOT'
    
    configurations {
        jar.archiveName = 'OutputToDeltaV3.jar'
    }
    
    
    
    repositories {
        maven{ url 'http://maven.aliyun.com/nexus/content/groups/public'}
    }
    
    
    dependencies {
        compile group: 'org.scala-lang', name: 'scala-library', version: '2.12.2'
        compile group: 'org.apache.spark', name: 'spark-core_2.12', version: '3.0.3'
        compile group: 'org.apache.spark', name: 'spark-sql_2.12', version: '3.0.3'
        compile group: 'org.apache.spark', name: 'spark-hive_2.12', version: '3.0.3'
        compile group: 'org.apache.hive',name: 'hive-jdbc',version: '1.2.0'
        compile group: 'io.delta', name: 'delta-core_2.12', version: '0.7.0'
        compile group: 'org.apache.spark', name: 'spark-sql-kafka-0-10_2.12', version: '3.0.3'
        compile group: 'mysql', name: 'mysql-connector-java', version: '5.1.41'
        compile group: 'org.apache.hadoop',name: 'hadoop-common',version: '2.7.7'
        compile group: 'org.apache.hadoop',name: 'hadoop-mapreduce-client-core',version: '2.7.7'
    //    compile group: 'org.apache.logging.log4j', name: 'log4j-api', version: '2.14.1'
    //    compile group: 'org.apache.logging.log4j', name: 'log4j-core', version: '2.14.1'
    
    // geospark 依赖开始的地方
        compile group: 'org.apache.sedona',name: 'sedona-core-3.0_2.12',version: '1.0.1-incubating'
        compile group: 'org.apache.sedona',name: 'sedona-sql-3.0_2.12',version: '1.0.1-incubating'
        compile group: 'org.apache.sedona',name: 'sedona-viz-3.0_2.12',version: '1.0.1-incubating'
        compile group: 'org.locationtech.jts',name: 'jts-core',version: '1.18.0'
        compile group: 'org.datasyslab',name: 'geotools-wrapper',version: 'geotools-24.0'
        compile ('org.wololo:jts2geojson:0.16.1'){
            exclude group: 'org.locationtech.jt',module: 'jts-core'
            exclude group: 'com.fasterxml.jackson.core',module:'*'
        }
    // geospark依赖结束的地方
        compile('ru.yandex.clickhouse:clickhouse-jdbc:0.1.53')
                {
            exclude group: 'com.fasterxml.jackson.core',module: 'jackson-databind'
            exclude group: 'com.fasterxml.jackson.core',module: 'jackson-core'
        }
    
        compile fileTree(dir:'lib',includes:['*jar'])
        implementation fileTree(dir:'lib',includes:['*jar'])
    }
    

    二、 Geospark 能够读取文件的结构有:CSV,TSV,Shapefile, Geojson;

    S1.CSV结构如下:

    // 单点坐标
    -88.331492,32.324142,hotel
    -88.175933,32.360763,gas
    -88.388954,32.357073,bar
    -88.221102,32.35078,restaurant
    
    // 多点坐标
    -88.331492,32.324142,-88.331492,32.324142,-88.331492,32.324142,-88.331492,32.324142,-88.331492,32.324142,hotel
    -88.175933,32.360763,-88.175933,32.360763,-88.175933,32.360763,-88.175933,32.360763,-88.175933,32.360763,gas
    -88.388954,32.357073,-88.388954,32.357073,-88.388954,32.357073,-88.388954,32.357073,-88.388954,32.357073,bar
    -88.221102,32.35078,-88.221102,32.35078,-88.221102,32.35078,-88.221102,32.35078,-88.221102,32.35078,restaurant
    

    C1.CSV读取方式:

    // 创建单个点坐标的方法
    val pointRDDInputLocation = "/Download/checkin.csv"
    val pointRDDOffset = 0 // The point long/lat starts from Column 0
    val pointRDDSplitter = FileDataSplitter.CSV
    val carryOtherAttributes = true // Carry Column 2 (hotel, gas, bar...)
    var objectRDD = new PointRDD(sc, pointRDDInputLocation, pointRDDOffset, pointRDDSplitter, carryOtherAttributes)
    
    // 创建多变形RDD
    val polygonRDDInputLocation = "/Download/checkinshape.csv"
    val polygonRDDStartOffset = 0 // The coordinates start from Column 0
    val polygonRDDEndOffset = 9 // The coordinates end at Column 9
    val polygonRDDSplitter = FileDataSplitter.CSV
    val carryOtherAttributes = true // Carry Column 10 (hotel, gas, bar...)
    var objectRDD = new PolygonRDD(sc, polygonRDDInputLocation, polygonRDDStartOffset, polygonRDDEndOffset, polygonRDDSplitter, carryOtherAttributes)
    

    S2. TSV结构如下:

    POINT (-88.331492 32.324142)    hotel
    POINT (-88.175933 32.360763)    gas
    POINT (-88.388954 32.357073)    bar
    POINT (-88.221102 32.35078) restaurant
    

    C2. TSV读取方式:

    val inputLocation = "/Download/checkin.tsv"
    val wktColumn = 0 // The WKT string starts from Column 0
    val allowTopologyInvalidGeometries = true // Optional
    val skipSyntaxInvalidGeometries = false // Optional
    val spatialRDD = WktReader.readToGeometryRDD(sparkSession.sparkContext, inputLocation, wktColumn, allowTopologyInvalidGeometries, skipSyntaxInvalidGeometries)
    

    S3.Shapefile结构如下:

    - shapefile1
    - shapefile2
    - myshapefile
        - myshapefile.shp
        - myshapefile.shx
        - myshapefile.dbf
        - myshapefile...
        - ...
    

    C3. Shapefile 读取方式:

    val shapefileInputLocation="/Download/myshapefile"
    val spatialRDD = ShapefileReader.readToGeometryRDD(sparkSession.sparkContext, shapefileInputLocation)
    

    S4. Geojson结构如下:

    { "type": "Feature", "properties": { "STATEFP": "01", "COUNTYFP": "077", "TRACTCE": "011501", "BLKGRPCE": "5", "AFFGEOID": "1500000US010770115015", "GEOID": "010770115015", "NAME": "5", "LSAD": "BG", "ALAND": 6844991, "AWATER": 32636 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -87.621765, 34.873444 ], [ -87.617535, 34.873369 ], [ -87.6123, 34.873337 ], [ -87.604049, 34.873303 ], [ -87.604033, 34.872316 ], [ -87.60415, 34.867502 ], [ -87.604218, 34.865687 ], [ -87.604409, 34.858537 ], [ -87.604018, 34.851336 ], [ -87.603716, 34.844829 ], [ -87.603696, 34.844307 ], [ -87.603673, 34.841884 ], [ -87.60372, 34.841003 ], [ -87.603879, 34.838423 ], [ -87.603888, 34.837682 ], [ -87.603889, 34.83763 ], [ -87.613127, 34.833938 ], [ -87.616451, 34.832699 ], [ -87.621041, 34.831431 ], [ -87.621056, 34.831526 ], [ -87.62112, 34.831925 ], [ -87.621603, 34.8352 ], [ -87.62158, 34.836087 ], [ -87.621383, 34.84329 ], [ -87.621359, 34.844438 ], [ -87.62129, 34.846387 ], [ -87.62119, 34.85053 ], [ -87.62144, 34.865379 ], [ -87.621765, 34.873444 ] ] ] } },
    { "type": "Feature", "properties": { "STATEFP": "01", "COUNTYFP": "045", "TRACTCE": "021102", "BLKGRPCE": "4", "AFFGEOID": "1500000US010450211024", "GEOID": "010450211024", "NAME": "4", "LSAD": "BG", "ALAND": 11360854, "AWATER": 0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -85.719017, 31.297901 ], [ -85.715626, 31.305203 ], [ -85.714271, 31.307096 ], [ -85.69999, 31.307552 ], [ -85.697419, 31.307951 ], [ -85.675603, 31.31218 ], [ -85.672733, 31.312876 ], [ -85.672275, 31.311977 ], [ -85.67145, 31.310988 ], [ -85.670622, 31.309524 ], [ -85.670729, 31.307622 ], [ -85.669876, 31.30666 ], [ -85.669796, 31.306224 ], [ -85.670356, 31.306178 ], [ -85.671664, 31.305583 ], [ -85.67177, 31.305299 ], [ -85.671878, 31.302764 ], [ -85.671344, 31.302123 ], [ -85.668276, 31.302076 ], [ -85.66566, 31.30093 ], [ -85.665687, 31.30022 ], [ -85.669183, 31.297677 ], [ -85.668703, 31.295638 ], [ -85.671985, 31.29314 ], [ -85.677177, 31.288211 ], [ -85.678452, 31.286376 ], [ -85.679236, 31.28285 ], [ -85.679195, 31.281426 ], [ -85.676865, 31.281049 ], [ -85.674661, 31.28008 ], [ -85.674377, 31.27935 ], [ -85.675714, 31.276882 ], [ -85.677938, 31.275168 ], [ -85.680348, 31.276814 ], [ -85.684032, 31.278848 ], [ -85.684387, 31.279082 ], [ -85.692398, 31.283499 ], [ -85.705032, 31.289718 ], [ -85.706755, 31.290476 ], [ -85.718102, 31.295204 ], [ -85.719132, 31.29689 ], [ -85.719017, 31.297901 ] ] ] } },
    { "type": "Feature", "properties": { "STATEFP": "01", "COUNTYFP": "055", "TRACTCE": "001300", "BLKGRPCE": "3", "AFFGEOID": "1500000US010550013003", "GEOID": "010550013003", "NAME": "3", "LSAD": "BG", "ALAND": 1378742, "AWATER": 247387 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -86.000685, 34.00537 ], [ -85.998837, 34.009768 ], [ -85.998012, 34.010398 ], [ -85.987865, 34.005426 ], [ -85.986656, 34.004552 ], [ -85.985, 34.002659 ], [ -85.98851, 34.001502 ], [ -85.987567, 33.999488 ], [ -85.988666, 33.99913 ], [ -85.992568, 33.999131 ], [ -85.993144, 33.999714 ], [ -85.994876, 33.995153 ], [ -85.998823, 33.989548 ], [ -85.999925, 33.994237 ], [ -86.000616, 34.000028 ], [ -86.000685, 34.00537 ] ] ] } },
    { "type": "Feature", "properties": { "STATEFP": "01", "COUNTYFP": "089", "TRACTCE": "001700", "BLKGRPCE": "2", "AFFGEOID": "1500000US010890017002", "GEOID": "010890017002", "NAME": "2", "LSAD": "BG", "ALAND": 1040641, "AWATER": 0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -86.574172, 34.727375 ], [ -86.562684, 34.727131 ], [ -86.562797, 34.723865 ], [ -86.562957, 34.723168 ], [ -86.562336, 34.719766 ], [ -86.557381, 34.719143 ], [ -86.557352, 34.718322 ], [ -86.559921, 34.717363 ], [ -86.564827, 34.718513 ], [ -86.567582, 34.718565 ], [ -86.570572, 34.718577 ], [ -86.573618, 34.719377 ], [ -86.574172, 34.727375 ] ] ] } },
    

    C4,Geojson 读取方式:

    val inputLocation = "/Download/polygon.json"
    val allowTopologyInvalidGeometries = true // Optional
    val skipSyntaxInvalidGeometries = false // Optional
    val spatialRDD = GeoJsonReader.readToGeometryRDD(sparkSession.sparkContext, inputLocation, allowTopologyInvalidGeometries, skipSyntaxInvalidGeometries)
    
    

    相关文章

      网友评论

        本文标题:GeoSpark-[阶段性总结]

        本文链接:https://www.haomeiwen.com/subject/xrbyiltx.html