About Code
Many times it happens like you have received data from many systems and each system operates on a different kind of date format. But in the output you need to have a specific date format.
Let’s say you are receiving date string like :-
12-01-2018 12:22:33
2018/12/01 12:22:33
20181201 12:22:33
And you want the output of all to be:- 2018-12-01 12:22: 33 In that can you can use below UDF which is written in Java to be used in PIG scripting.
You just need to make a jar and call using below command in pig :-
Register The Jar and call
com.transformation.udf.DateConversion(input_date_col,’yyyy-MM-dd HH:mm:ss’)
Now if there are multiple format in input_date_col then also it would convert all of them into expected format which is ’yyyy-MM-dd HH:mm:ss’
Below is the complete code:-
package com.transformation.udf; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.Date; import org.apache.pig.EvalFunc; import org.apache.pig.backend.executionengine.ExecException; import org.apache.pig.data.Tuple; public class DateConversion extends EvalFunc<String> { public String exec(Tuple input) throws ExecException { String outputDate = null; String inputDate = null; String outputDateFormat = null; String inputDateFormat = null; try { outputDate = dateConvertor((String) input.get(0), (String) input.get(1)); } catch (Exception e) { e.printStackTrace(); } return outputDate; } public static String dateConvertor(String inputDate,String outputDateFormat) throws Exception { SimpleDateFormat[] formats=null; if (inputDate != null && !inputDate.isEmpty()) { if(inputDate.contains("-")||inputDate.contains("/")) { inputDate=inputDate.replace("-", "."); inputDate=inputDate.replace("/", "."); } char yyyyIdentifier=inputDate.charAt(4); if(yyyyIdentifier=='.'||yyyyIdentifier=='-'||yyyyIdentifier=='/') { formats=new SimpleDateFormat[] { new SimpleDateFormat("yyyy.MM.dd hh:mm:ss.SSS aa"), new SimpleDateFormat("yyyy.MM.dd HH:mm:ss.SSS"), new SimpleDateFormat("yyyy.MM.dd hh:mm:ss aa"), new SimpleDateFormat("yyyy.MM.dd HH:mm:ss"), new SimpleDateFormat("yyyy.MM.dd hh:mm aa"), new SimpleDateFormat("yyyy.MM.dd HH:mm"), new SimpleDateFormat("yyyy.MM.dd hh aa"), new SimpleDateFormat("yyyy.MM.dd HH"), new SimpleDateFormat("yyyy.MM.dd")}; } else { formats=new SimpleDateFormat[] { new SimpleDateFormat("yyyyMMddHHmmss"), new SimpleDateFormat("MM.dd.yyyy hh:mm:ss.SSS aa"), new SimpleDateFormat("MM.dd.yyyy HH:mm:ss.SSS"), new SimpleDateFormat("MM.dd.yyyy hh:mm:ss aa"), new SimpleDateFormat("MM.dd.yyyy HH:mm:ss"), new SimpleDateFormat("MM.dd.yyyy hh:mm aa"), new SimpleDateFormat("MM.dd.yyyy HH:mm"), new SimpleDateFormat("MM.dd.yyyy hh aa"), new SimpleDateFormat("MM.dd.yyyy HH"), new SimpleDateFormat("MM.dd.yyyy"), new SimpleDateFormat("yyyyMMdd")}; } return dateGenerator(formats,inputDate,outputDateFormat); } else return null; } public static String dateGenerator(SimpleDateFormat[] formats,String inputDate,String outputDateFormat) throws Exception { Date parsedDate = null; String Output_Date=null; for (int i = 0; i < formats.length; i++) { try { if(inputDate.length()>19) { if(inputDate.contains("AM")||inputDate.contains("PM")) { if(inputDate.contains(" AM")) { inputDate=inputDate.substring(0, 19); inputDate=inputDate.concat(" AM"); } if(inputDate.contains(" PM")) { inputDate=inputDate.substring(0, 19); inputDate=inputDate.concat(" PM"); } } else{ inputDate=inputDate.substring(0, 19); } } parsedDate = formats[i].parse(inputDate); SimpleDateFormat dt = new SimpleDateFormat(outputDateFormat); Output_Date=dt.format(parsedDate); return Output_Date; } catch (ParseException e) { continue; } } return Output_Date; } }