自定义函数
自定义函数包括三种:UDF、UDAF、UDTF。
UDF(User-Defined-Function) 一进一出
UDAF(User-Defined Aggregation Function) 聚集函数,多进一出(Count\Max\Min)
UDTF(User-Defined Table-Generating Function) 一进多出,如lateral view explore()
使用方式,在HIVE绘画中add自定义函数的jar文件,然后创建function继而使用函数。
UDF开发
-
UDF函数可以直接应用于SELECT语句,对查询结构做格式化处理后,再输出内容。
-
编写UDF函数的时间需要注意以下几点:
- 自定义UDF需要继承org.apache.hadoop.hive.ql.UDF
- 需要实现evaluate函数,evaluate函数支持重载
-
步骤
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.avcdata</groupId>
<artifactId>boot2hive2</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
<hive.version>2.1.1</hive.version>
</properties>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<version>2.4</version>
<configuration>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
<archive>
<manifest>
<mainClass>com.avcdata.Main</mainClass>
</manifest>
</archive>
</configuration>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>8</source>
<target>8</target>
</configuration>
</plugin>
</plugins>
</build>
<dependencies>
<!-- https://mvnrepository.com/artifact/org.apache.hive/hive-jdbc -->
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-jdbc</artifactId>
<version>1.2.1</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-common -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>2.1.1</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.alibaba/fastjson -->
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.49</version>
</dependency>
</dependencies>
<repositories>
<repository>
<id>central</id>
<name>Central Repository</name>
<url>http://repo.maven.apache.org/maven2</url>
<layout>default</layout>
<snapshots>
<enabled>false</enabled>
</snapshots>
</repository>
</repositories>
</project>
Java代码
package com.avcdata;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.io.Text;
public final class TuoMin extends UDF {
public Text evaluate(final Text s) {
if (s == null) {
return null;
}
String str = s.toString();
str = str.substring(0, 1) + "***" + str.substring(str.length() - 1, str.length());
return new Text(str);
}
}
mvn package
将jar包上传至hive客户端
scp boot2hive2-1.0-SNAPSHOT-jar-with-dependencies.jar root@node3:~
在hive中添加jar包
> add jar /root/boot2hive2-1.0-SNAPSHOT-jar-with-dependencies.jar
创建临时函数
CREATE TEMPORARY FUNCTION tuomin AS 'com.avcdata.TuoMin';
使用临时函数
SELECT id
, tuomin(name)
, name
, likes
, address
FROM psn1;
删除临时函数
DROP TEMPORARY FUNCTION tuomin;
网友评论