My getGeo udf cannot use the ip mapping file which stored in the hdfs when I use it in the Hive on MR some times.
It only works using the simple sql
select getGeo(ip, 'code') from xxxx;
It will error when using the sql
select a, max(getGeo(ip, 'code')) from xxxx group by a;
It cause the NULLPointException in for (Path p : paths).
public class UDFGetGeo extends UDF {
private static String filePath = null;
static {
String dirPath = "/group/avazu/user/avazu/data/raw_log/ip_geo/";
Configuration conf = new Configuration();
Path inputPath = new Path(dirPath);
FileSystem fs = null;
FileStatus[] fss = null;
try {
fs = FileSystem.get(inputPath.toUri(), conf);
fss = fs.listStatus(inputPath);
} catch (Exception e) {
e.printStackTrace();
}
Path[] paths = FileUtil.stat2Paths(fss);
for (Path p : paths) {
try {
fs = FileSystem.get(p.toUri(), conf);
fss = fs.listStatus(p);
if(fss.length > 0) {
filePath = p.toString();
}
} catch (Exception e) {
e.printStackTrace();
}
}