Apache Avro

作者: spraysss | 来源:发表于2019-10-23 14:43 被阅读0次

    Apache Avro是一个数据序列化框架,它通过定义json风格的schema文件来表示数据的格式

    maven依赖

    添加avro依赖,和avro自动生成代码插件maven依赖

    <dependency>
      <groupId>org.apache.avro</groupId>
      <artifactId>avro</artifactId>
      <version>1.9.1</version>
    </dependency>
          
    As well as the Avro Maven plugin (for performing code generation):
    
    <plugin>
      <groupId>org.apache.avro</groupId>
      <artifactId>avro-maven-plugin</artifactId>
      <version>1.9.1</version>
      <executions>
        <execution>
          <phase>generate-sources</phase>
          <goals>
            <goal>schema</goal>
          </goals>
          <configuration>
            <sourceDirectory>${project.basedir}/src/main/avro/</sourceDirectory>
            <outputDirectory>${project.basedir}/src/main/java/</outputDirectory>
          </configuration>
        </execution>
      </executions>
    </plugin>
    <plugin>
      <groupId>org.apache.maven.plugins</groupId>
      <artifactId>maven-compiler-plugin</artifactId>
      <configuration>
        <source>1.8</source>
        <target>1.8</target>
      </configuration>
    </plugin>
    

    如果有报snapy的错误可以添加如下依赖

       <dependency>
                <groupId>org.xerial.snappy</groupId>
                <artifactId>snappy-java</artifactId>
                <version>1.1.7.3</version>
            </dependency>
    

    schema 文件

    1. src/main下面创建一个avro的文件夹用于存放.avsc的schema文件
    2. src/main/avro下创建一个user.avsc文件

    user.avsc内容如下

    {"namespace": "example.avro",
     "type": "record",
     "name": "User",
     "fields": [
         {"name": "name", "type": "string"},
         {"name": "favorite_number",  "type": ["int", "null"]},
         {"name": "favorite_color", "type": ["string", "null"]}
     ]
    }
    

    如下图编译会自动生成代码,代码生成的package路径是由schema文件中的namespace指定的,类名由name指定,如下图所示:


    序列化和反序列化

    • 使用生成的代码来进行序列化和反序列化
    • 直接通过schema文件进行序列化和反序列化

    使用生成的代码来进行序列化和反序列化demo

    package example.avro;
    
    import org.apache.avro.file.DataFileReader;
    import org.apache.avro.file.DataFileWriter;
    import org.apache.avro.io.DatumReader;
    import org.apache.avro.io.DatumWriter;
    import org.apache.avro.specific.SpecificDatumReader;
    import org.apache.avro.specific.SpecificDatumWriter;
    
    import java.io.File;
    import java.io.IOException;
    
    public class SpecificMain {
        public static void main(String[] args) throws IOException {
            User user1 = new User();
            user1.setName("Alyssa");
            user1.setFavoriteNumber(256);
            // Leave favorite color null
    
            // Alternate constructor
            User user2 = new User("Ben", 7, "red");
    
            // Construct via builder
            User user3 = User.newBuilder()
                    .setName("Charlie")
                    .setFavoriteColor("blue")
                    .setFavoriteNumber(null)
                    .build();
            //***** 序列化 ********
            DatumWriter<User> userDatumWriter = new SpecificDatumWriter<>(User.class);
            DataFileWriter<User> dataFileWriter = new DataFileWriter<>(userDatumWriter);
            dataFileWriter.create(user1.getSchema(), new File("/tmp/users.avro"));
            dataFileWriter.append(user1);
            dataFileWriter.append(user2);
            dataFileWriter.append(user3);
            dataFileWriter.close();
    
            //*****  反序列化 ******
            DatumReader<User> userDatumReader = new SpecificDatumReader<>(User.class);
            DataFileReader<User> dataFileReader = new DataFileReader<User>(new File("/tmp/users.avro"), userDatumReader);
            User user = null;
            while (dataFileReader.hasNext()) {
                // Reuse user object by passing it to next(). This saves us from
                // allocating and garbage collecting many objects for files with
                // many items.
                user = dataFileReader.next(user);
                System.out.println(user);
            }
    
        }
    }
    
    

    直接通过schema文件进行序列化和反序列化demo

    package example.avro;
    
    import org.apache.avro.Schema;
    import org.apache.avro.file.DataFileReader;
    import org.apache.avro.file.DataFileWriter;
    import org.apache.avro.generic.GenericData;
    import org.apache.avro.generic.GenericDatumReader;
    import org.apache.avro.generic.GenericDatumWriter;
    import org.apache.avro.generic.GenericRecord;
    import org.apache.avro.io.DatumReader;
    import org.apache.avro.io.DatumWriter;
    
    import java.io.File;
    import java.io.IOException;
    
    public class GenericMain {
        public static void main(String[] args) throws IOException {
            Schema schema = new Schema.Parser().parse(new File("user.avsc"));
    
            GenericRecord user1 = new GenericData.Record(schema);
            user1.put("name", "Alyssa");
            user1.put("favorite_number", 256);
            // Leave favorite color null
    
            GenericRecord user2 = new GenericData.Record(schema);
            user2.put("name", "Ben");
            user2.put("favorite_number", 7);
            user2.put("favorite_color", "red");
    
    
            File file = new File("/tmp/myusers.avro");
            DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema);
            DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter);
            dataFileWriter.create(schema, file);
            dataFileWriter.append(user1);
            dataFileWriter.append(user2);
            dataFileWriter.close();
    
    
            // Deserialize users from disk
            DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(schema);
            DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(file, datumReader);
            GenericRecord user = null;
            while (dataFileReader.hasNext()) {
                // Reuse user object by passing it to next(). This saves us from
                // allocating and garbage collecting many objects for files with
                // many items.
                user = dataFileReader.next(user);
                System.out.println(user);
            }
        }
    }
    
    

    相关文章

      网友评论

        本文标题:Apache Avro

        本文链接:https://www.haomeiwen.com/subject/rlhkvctx.html