目录
需求背景
解决方案
Ip解析介绍
pom.xml
ConstantsGeoIp.java
IpUtils.java
UDFIp2Region.java
ip2region.db
编译和打包
创建UDF
使用
需求背景
项目中埋点信息需要解析用户信息的地理位置信息,如果有经纬度信息,我们可以通过百度地图或者高德地图的API解析,但一般接口都是有次数限制或者收费的。这里我们通过解析IP地址获取用户(访问者)的地理位置。
解决方案
通过解析IP地址获取用户(访问者)的地理位置。通过查询资料,一般解析IP地址使用映射和查询库是最优的方案。查询速度快,查询准确。这里使用ip2region项目。详细下面介绍。
Ip解析介绍
- 参考项目:
- 使用ip2region项目[https://github.com/lionsoul2014/ip2region](https://github.com/lionsoul2014/ip2region),
- ip2region - 最自由的ip地址查询库,ip到地区的映射库;
- 数据聚合了一些知名ip到地名查询提供商的数据(淘宝IP地址库,GeoIP,纯真IP库)
- 使用
- 依赖ip2region-1.7.2.jar包
- ip地址库的资源文件=ip2region.db
pom.xml
UTF-8
1.7
UTF-8
UTF-8
0.11.0
2.2.0
4.12
1.10
org.apache.hive
hive-jdbc
${hive.version}
org.apache.hive
hive-exec
${hive.version}
org.apache.hadoop
hadoop-common
${hadoop.version}
junit
junit
${junit.version}
org.lionsoul
ip2region
1.7.2
nl.basjes.parse.useragent
yauaa-hive
udf
5.11
commons-codec
commons-codec
${commons-codec.version}
net.sourceforge.javacsv
javacsv
2.0
com.github.codesorcery
juan
0.2.0
org.apache.hive
hive-jdbc
provided
org.apache.hive
hive-exec
org.apache.hadoop
hadoop-common
provided
commons-codec
commons-codec
org.lionsoul
ip2region
nl.basjes.parse.useragent
yauaa-hive
udf
net.sourceforge.javacsv
javacsv
com.github.codesorcery
juan
junit
junit
test
jdk.tools
jdk.tools
1.6
system
${JAVA_HOME}/lib/tools.jar
src/main/java
org.apache.maven.plugins
maven-shade-plugin
2.4.3
package
shade
true
org.apache.maven.plugins
maven-compiler-plugin
3.1
${project.build.targetJdk}
${project.build.targetJdk}
${project.build.sourceEncoding}
true
org.apache.maven.plugins
maven-resources-plugin
2.6
${project.build.sourceEncoding}
release
org.apache.maven.plugins
maven-source-plugin
2.2.1
package
jar-no-fork
org.apache.maven.plugins
maven-javadoc-plugin
2.9.1
package
jar
org.apache.maven.plugins
maven-gpg-plugin
1.6
verify
sign
snapshots
https://oss.sonatype.org/content/repositories/snapshots/
releases
https://oss.sonatype.org/service/local/staging/deploy/maven2/
ConstantsGeoIp.java
package com.my.hive.udf.ipgeo;
public class ConstantsGeoIp {
public static final String GEOLITE2_CITY_FILE= "GeoLite2-City.mmdb";
public static final String SEP= "\|";//分隔
public static final String SEP_IP2REGION= "\|";//分隔
public static final String FILE_IP2REGION= "ip2region.db";//分隔
public static final String KEY_COUNTRY_ID= "countryID"; //国家
public static final String KEY_COUNTRY_NAME= "countryName";
public static final String KEY_COUNTRY_NAME_EN= "countryNameEn";
public static final String KEY_PROVINCE_ID= "provinceID"; //省份
public static final String KEY_PROVINCE_NAME= "provinceName";
public static final String KEY_PROVINCE_NAME_EN= "provinceNameEn";
public static final String KEY_CITY_ID= "cityID"; //城市
public static final String KEY_CITY_NAME= "cityName";
public static final String KEY_CITY_NAME_EN= "cityNameEn";
public static final String KEY_ISP_ID= "ispID"; //运营商,如电信
public static final String KEY_ISP_NAME= "ispName";
public static final String KEY_ISP_NAME_EN= "ispNameEn";
public static final String KEY_REGION_ID= "regionID"; //华南
public static final String KEY_REGION_NAME= "regionName";
public static final String KEY_REGION_NAME_EN= "regionNameEn";
public static final String KEY_CONTINENT_ID= "continentID"; //大洲
public static final String KEY_CONTINENT_NAME= "continentName";
public static final String KEY_CONTINENT_NAME_EN= "continentNameEn";
public static final String KEY_LATITUDE= "latitude"; //经度纬度
public static final String KEY_LonGITUDE= "longitude";
}
IpUtils.java
package com.my.utils.ip;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class IpUtils {
public static boolean isIpV4(String ipAddress) {
String ip = "([1-9]|[1-9]\d|1\d{2}|2[0-4]\d|25[0-5])(\.(\d|[1-9]\d|1\d{2}|2[0-4]\d|25[0-5])){3}";
Pattern pattern = Pattern.compile(ip);
Matcher matcher = pattern.matcher(ipAddress);
return matcher.matches();
}
}
UDFIp2Region.java
package com.my.hive.udf.ipgeo;
import java.io.*;
import java.net.URI;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
import org.apache.hadoop.io.Text;
import org.lionsoul.ip2region.DataBlock;
import org.lionsoul.ip2region.DbConfig;
import org.lionsoul.ip2region.DbMakerConfigException;
import org.lionsoul.ip2region.DbSearcher;
import com.my.utils.ip.IpUtils;
import org.apache.hadoop.hive.ql.session.SessionState;
@Description(name = "ip2geo", value = "_FUNC_(array) - Returns map type of the ip address.n"
+ "based on https://github.com/lionsoul2014/ip2region.nThe ip address database has stored in local resource. n"
+ " > Para1: Ipadressn"
+ "Example:n"
+ " > CREATE TEMPORARY FUNCTION ip2geo AS 'com.jet.hive.udf.ipgeo.UDFIp2Region' n"
+ " > SELECT ip2geo('221.226.1.30' ),ip2geo('221.226.1.30' )['provinceName'] n"
+ "")
public class UDFIp2Region extends GenericUDF{
PrimitiveObjectInspector inputOI;
private static DbSearcher searcher=null;
private static boolean isNonInit = true; //未被init过
private static List fieldNames = null;
private static byte[] data;
private static Configuration conf;
private static FileSystem fs;
private static InputStream in;
static {
//加载数据
ByteArrayOutputStream out = null;
try {
//修改成你的HDFS上的文件ip2region.db地址路径,不用加ip和端口
String uri = "hdfs:///warehouse/dd/auxlib/ip2region.db";
conf = new Configuration();
fs = FileSystem.get(URI.create(uri), conf);
in = fs.open(new Path(uri));
out = new ByteArrayOutputStream();
byte[] b = new byte[1024];
while (in.read(b) != -1) {
out.write(b);
}
// 提高性能,将ip2region.db一次从hdfs中读取出来,缓存到data字节数组中以重用,
// 避免每来一条数据读取一次ip2region.db
data = out.toByteArray();
out.close();
in.close();
} catch (Exception e){
e.printStackTrace();
}
finally {
try {
if(out != null) {
out.close();
}
if(in != null) {
in.close();
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
private static synchronized void constructUDFIp2RegionByOut() throws UDFArgumentException {
try {
if(searcher==null){
DbConfig config = new DbConfig();
searcher = new DbSearcher(config, data);
}
isNonInit=false;
}catch (Exception e) {
throw new UDFArgumentException("Error: read file:"+ConstantsGeoIp.FILE_IP2REGION);
}
}
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
if(isNonInit){
constructUDFIp2RegionByOut();
if(fieldNames==null){
fieldNames=new ArrayList();
fieldNames.add(ConstantsGeoIp.KEY_COUNTRY_NAME);
fieldNames.add(ConstantsGeoIp.KEY_REGION_NAME);
fieldNames.add(ConstantsGeoIp.KEY_PROVINCE_NAME);
fieldNames.add(ConstantsGeoIp.KEY_CITY_NAME);
fieldNames.add(ConstantsGeoIp.KEY_ISP_NAME);
}
isNonInit=false;
}
// 存储在全局变量的ObjectInspectors元素的输入
inputOI = PrimitiveObjectInspectorFactory.javaStringObjectInspector;
// 返回变量输出类型
return ObjectInspectorFactory.getStandardMapObjectInspector(
PrimitiveObjectInspectorFactory.writableStringObjectInspector,
PrimitiveObjectInspectorFactory.writableStringObjectInspector);
}
@Override
public Object evaluate(DeferredObject[] arguments) throws HiveException {
String ip = PrimitiveObjectInspectorUtils.getString(arguments[0].get(), inputOI).trim();
String reString=null;
if(ip!=null && IpUtils.isIpV4(ip)){
try {
DataBlock dataBlock = searcher.memorySearch(ip);
reString=dataBlock.getRegion();
} catch (IOException e) {
reString="0|0|0|0|0";
}
}
else{
reString="0|0|0|0|0";
}
String[] ipArray = reString.split(ConstantsGeoIp.SEP_IP2REGION);
Map reMap = new HashMap();
for (int i = 0; i < fieldNames.size(); i++) {
Text t = ipArray[i].equals("0")?null:new Text(ipArray[i]);
reMap.put(new Text(fieldNames.get(i)), t);
}
return reMap;
}
@Override
public String getDisplayString(String[] arg0) {
return arg0[0];
}
}
ip2region.db
##将ip2region.db这个文件上传到HDFS一个文件夹中使用
比如上传到/warehouse/dd/auxlib/ip2region.db,使用时将UDFIp2Region.java中的
String uri = "hdfs:///warehouse/dd/auxlib/ip2region.db";修改一下即可。
package com.my.hive.udf.ipgeo;
public class ConstantsGeoIp {
public static final String GEOLITE2_CITY_FILE= "GeoLite2-City.mmdb";
public static final String SEP= "\|";//分隔
public static final String SEP_IP2REGION= "\|";//分隔
public static final String FILE_IP2REGION= "ip2region.db";//分隔
public static final String KEY_COUNTRY_ID= "countryID"; //国家
public static final String KEY_COUNTRY_NAME= "countryName";
public static final String KEY_COUNTRY_NAME_EN= "countryNameEn";
public static final String KEY_PROVINCE_ID= "provinceID"; //省份
public static final String KEY_PROVINCE_NAME= "provinceName";
public static final String KEY_PROVINCE_NAME_EN= "provinceNameEn";
public static final String KEY_CITY_ID= "cityID"; //城市
public static final String KEY_CITY_NAME= "cityName";
public static final String KEY_CITY_NAME_EN= "cityNameEn";
public static final String KEY_ISP_ID= "ispID"; //运营商,如电信
public static final String KEY_ISP_NAME= "ispName";
public static final String KEY_ISP_NAME_EN= "ispNameEn";
public static final String KEY_REGION_ID= "regionID"; //华南
public static final String KEY_REGION_NAME= "regionName";
public static final String KEY_REGION_NAME_EN= "regionNameEn";
public static final String KEY_CONTINENT_ID= "continentID"; //大洲
public static final String KEY_CONTINENT_NAME= "continentName";
public static final String KEY_CONTINENT_NAME_EN= "continentNameEn";
public static final String KEY_LATITUDE= "latitude"; //经度纬度
public static final String KEY_LonGITUDE= "longitude";
}
IpUtils.java
package com.my.utils.ip;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class IpUtils {
public static boolean isIpV4(String ipAddress) {
String ip = "([1-9]|[1-9]\d|1\d{2}|2[0-4]\d|25[0-5])(\.(\d|[1-9]\d|1\d{2}|2[0-4]\d|25[0-5])){3}";
Pattern pattern = Pattern.compile(ip);
Matcher matcher = pattern.matcher(ipAddress);
return matcher.matches();
}
}
UDFIp2Region.java
package com.my.hive.udf.ipgeo;
import java.io.*;
import java.net.URI;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
import org.apache.hadoop.io.Text;
import org.lionsoul.ip2region.DataBlock;
import org.lionsoul.ip2region.DbConfig;
import org.lionsoul.ip2region.DbMakerConfigException;
import org.lionsoul.ip2region.DbSearcher;
import com.my.utils.ip.IpUtils;
import org.apache.hadoop.hive.ql.session.SessionState;
@Description(name = "ip2geo", value = "_FUNC_(array) - Returns map type of the ip address.n"
+ "based on https://github.com/lionsoul2014/ip2region.nThe ip address database has stored in local resource. n"
+ " > Para1: Ipadressn"
+ "Example:n"
+ " > CREATE TEMPORARY FUNCTION ip2geo AS 'com.jet.hive.udf.ipgeo.UDFIp2Region' n"
+ " > SELECT ip2geo('221.226.1.30' ),ip2geo('221.226.1.30' )['provinceName'] n"
+ "")
public class UDFIp2Region extends GenericUDF{
PrimitiveObjectInspector inputOI;
private static DbSearcher searcher=null;
private static boolean isNonInit = true; //未被init过
private static List fieldNames = null;
private static byte[] data;
private static Configuration conf;
private static FileSystem fs;
private static InputStream in;
static {
//加载数据
ByteArrayOutputStream out = null;
try {
//修改成你的HDFS上的文件ip2region.db地址路径,不用加ip和端口
String uri = "hdfs:///warehouse/dd/auxlib/ip2region.db";
conf = new Configuration();
fs = FileSystem.get(URI.create(uri), conf);
in = fs.open(new Path(uri));
out = new ByteArrayOutputStream();
byte[] b = new byte[1024];
while (in.read(b) != -1) {
out.write(b);
}
// 提高性能,将ip2region.db一次从hdfs中读取出来,缓存到data字节数组中以重用,
// 避免每来一条数据读取一次ip2region.db
data = out.toByteArray();
out.close();
in.close();
} catch (Exception e){
e.printStackTrace();
}
finally {
try {
if(out != null) {
out.close();
}
if(in != null) {
in.close();
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
private static synchronized void constructUDFIp2RegionByOut() throws UDFArgumentException {
try {
if(searcher==null){
DbConfig config = new DbConfig();
searcher = new DbSearcher(config, data);
}
isNonInit=false;
}catch (Exception e) {
throw new UDFArgumentException("Error: read file:"+ConstantsGeoIp.FILE_IP2REGION);
}
}
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
if(isNonInit){
constructUDFIp2RegionByOut();
if(fieldNames==null){
fieldNames=new ArrayList();
fieldNames.add(ConstantsGeoIp.KEY_COUNTRY_NAME);
fieldNames.add(ConstantsGeoIp.KEY_REGION_NAME);
fieldNames.add(ConstantsGeoIp.KEY_PROVINCE_NAME);
fieldNames.add(ConstantsGeoIp.KEY_CITY_NAME);
fieldNames.add(ConstantsGeoIp.KEY_ISP_NAME);
}
isNonInit=false;
}
// 存储在全局变量的ObjectInspectors元素的输入
inputOI = PrimitiveObjectInspectorFactory.javaStringObjectInspector;
// 返回变量输出类型
return ObjectInspectorFactory.getStandardMapObjectInspector(
PrimitiveObjectInspectorFactory.writableStringObjectInspector,
PrimitiveObjectInspectorFactory.writableStringObjectInspector);
}
@Override
public Object evaluate(DeferredObject[] arguments) throws HiveException {
String ip = PrimitiveObjectInspectorUtils.getString(arguments[0].get(), inputOI).trim();
String reString=null;
if(ip!=null && IpUtils.isIpV4(ip)){
try {
DataBlock dataBlock = searcher.memorySearch(ip);
reString=dataBlock.getRegion();
} catch (IOException e) {
reString="0|0|0|0|0";
}
}
else{
reString="0|0|0|0|0";
}
String[] ipArray = reString.split(ConstantsGeoIp.SEP_IP2REGION);
Map reMap = new HashMap();
for (int i = 0; i < fieldNames.size(); i++) {
Text t = ipArray[i].equals("0")?null:new Text(ipArray[i]);
reMap.put(new Text(fieldNames.get(i)), t);
}
return reMap;
}
@Override
public String getDisplayString(String[] arg0) {
return arg0[0];
}
}
ip2region.db
##将ip2region.db这个文件上传到HDFS一个文件夹中使用
比如上传到/warehouse/dd/auxlib/ip2region.db,使用时将UDFIp2Region.java中的
String uri = "hdfs:///warehouse/dd/auxlib/ip2region.db";修改一下即可。
package com.my.hive.udf.ipgeo;
import java.io.*;
import java.net.URI;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
import org.apache.hadoop.io.Text;
import org.lionsoul.ip2region.DataBlock;
import org.lionsoul.ip2region.DbConfig;
import org.lionsoul.ip2region.DbMakerConfigException;
import org.lionsoul.ip2region.DbSearcher;
import com.my.utils.ip.IpUtils;
import org.apache.hadoop.hive.ql.session.SessionState;
@Description(name = "ip2geo", value = "_FUNC_(array) - Returns map type of the ip address.n"
+ "based on https://github.com/lionsoul2014/ip2region.nThe ip address database has stored in local resource. n"
+ " > Para1: Ipadressn"
+ "Example:n"
+ " > CREATE TEMPORARY FUNCTION ip2geo AS 'com.jet.hive.udf.ipgeo.UDFIp2Region' n"
+ " > SELECT ip2geo('221.226.1.30' ),ip2geo('221.226.1.30' )['provinceName'] n"
+ "")
public class UDFIp2Region extends GenericUDF{
PrimitiveObjectInspector inputOI;
private static DbSearcher searcher=null;
private static boolean isNonInit = true; //未被init过
private static List fieldNames = null;
private static byte[] data;
private static Configuration conf;
private static FileSystem fs;
private static InputStream in;
static {
//加载数据
ByteArrayOutputStream out = null;
try {
//修改成你的HDFS上的文件ip2region.db地址路径,不用加ip和端口
String uri = "hdfs:///warehouse/dd/auxlib/ip2region.db";
conf = new Configuration();
fs = FileSystem.get(URI.create(uri), conf);
in = fs.open(new Path(uri));
out = new ByteArrayOutputStream();
byte[] b = new byte[1024];
while (in.read(b) != -1) {
out.write(b);
}
// 提高性能,将ip2region.db一次从hdfs中读取出来,缓存到data字节数组中以重用,
// 避免每来一条数据读取一次ip2region.db
data = out.toByteArray();
out.close();
in.close();
} catch (Exception e){
e.printStackTrace();
}
finally {
try {
if(out != null) {
out.close();
}
if(in != null) {
in.close();
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
private static synchronized void constructUDFIp2RegionByOut() throws UDFArgumentException {
try {
if(searcher==null){
DbConfig config = new DbConfig();
searcher = new DbSearcher(config, data);
}
isNonInit=false;
}catch (Exception e) {
throw new UDFArgumentException("Error: read file:"+ConstantsGeoIp.FILE_IP2REGION);
}
}
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
if(isNonInit){
constructUDFIp2RegionByOut();
if(fieldNames==null){
fieldNames=new ArrayList();
fieldNames.add(ConstantsGeoIp.KEY_COUNTRY_NAME);
fieldNames.add(ConstantsGeoIp.KEY_REGION_NAME);
fieldNames.add(ConstantsGeoIp.KEY_PROVINCE_NAME);
fieldNames.add(ConstantsGeoIp.KEY_CITY_NAME);
fieldNames.add(ConstantsGeoIp.KEY_ISP_NAME);
}
isNonInit=false;
}
// 存储在全局变量的ObjectInspectors元素的输入
inputOI = PrimitiveObjectInspectorFactory.javaStringObjectInspector;
// 返回变量输出类型
return ObjectInspectorFactory.getStandardMapObjectInspector(
PrimitiveObjectInspectorFactory.writableStringObjectInspector,
PrimitiveObjectInspectorFactory.writableStringObjectInspector);
}
@Override
public Object evaluate(DeferredObject[] arguments) throws HiveException {
String ip = PrimitiveObjectInspectorUtils.getString(arguments[0].get(), inputOI).trim();
String reString=null;
if(ip!=null && IpUtils.isIpV4(ip)){
try {
DataBlock dataBlock = searcher.memorySearch(ip);
reString=dataBlock.getRegion();
} catch (IOException e) {
reString="0|0|0|0|0";
}
}
else{
reString="0|0|0|0|0";
}
String[] ipArray = reString.split(ConstantsGeoIp.SEP_IP2REGION);
Map reMap = new HashMap();
for (int i = 0; i < fieldNames.size(); i++) {
Text t = ipArray[i].equals("0")?null:new Text(ipArray[i]);
reMap.put(new Text(fieldNames.get(i)), t);
}
return reMap;
}
@Override
public String getDisplayString(String[] arg0) {
return arg0[0];
}
}
ip2region.db
##将ip2region.db这个文件上传到HDFS一个文件夹中使用
比如上传到/warehouse/dd/auxlib/ip2region.db,使用时将UDFIp2Region.java中的
String uri = "hdfs:///warehouse/dd/auxlib/ip2region.db";修改一下即可。
ip2region.db最新文件下载地址ip2region.db-Hive文档类资源-CSDN下载
编译和打包
cd ${project_home}
mvn clean package -DskipTests
命令执行完成后, 将会在target目录下生成[A=my-hive-udf-${version}-shaded.jar,
B=my-hive-udf-${version}.jar]文件.其中A是包括所有依赖包的jar, B是最小编译jar文件
创建UDF
#dd_database_bigdata为database名称,ipgeo为方法名称
#create temporary function 创建的是临时方法,仅对当前session有效
create temporary function dd_database_bigdata.ipgeo as 'com.my.hive.udf.ipgeo.UDFIp2Region' USING JAR 'hdfs:///warehouse/dd/auxlib/my-hive-udf-1.0.0-shaded.jar';
#create function 创建的是永久方法
create function dd_database_bigdata.ipgeo as 'com.my.hive.udf.ipgeo.UDFIp2Region' USING JAR 'hdfs:///warehouse/dd/auxlib/my-hive-udf-1.0.0-shaded.jar';
使用
select ipgeo('221.226.1.11'), ipgeo('221.226.1.11')['provinceName']
##结果
{"cityName":"南京市","countryName":"中国","ispName":"电信","regionName":null,"provinceName":"江苏省"} 江苏省
#dd_database_bigdata为database名称,ipgeo为方法名称 #create temporary function 创建的是临时方法,仅对当前session有效 create temporary function dd_database_bigdata.ipgeo as 'com.my.hive.udf.ipgeo.UDFIp2Region' USING JAR 'hdfs:///warehouse/dd/auxlib/my-hive-udf-1.0.0-shaded.jar'; #create function 创建的是永久方法 create function dd_database_bigdata.ipgeo as 'com.my.hive.udf.ipgeo.UDFIp2Region' USING JAR 'hdfs:///warehouse/dd/auxlib/my-hive-udf-1.0.0-shaded.jar';



