About Code
Many times it happens like you have received data from many systems and each system operates on a different kind of date format. But in the output you need to have a specific date format.
Let’s say you are receiving date string like :-
12-01-2018 12:22:33
2018/12/01 12:22:33
20181201 12:22:33
And you want the output of all to be:- 2018-12-01 12:22: 33 In that can you can use below UDF which is written in Java to be used in PIG scripting.
![]()
You just need to make a jar and call using below command in pig :-
Register The Jar and call
com.transformation.udf.DateConversion(input_date_col,’yyyy-MM-dd HH:mm:ss’)
Now if there are multiple format in input_date_col then also it would convert all of them into expected format which is ’yyyy-MM-dd HH:mm:ss’
Below is the complete code:-
package com.transformation.udf;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import org.apache.pig.EvalFunc;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.data.Tuple;
public class DateConversion extends EvalFunc<String> {
public String exec(Tuple input) throws ExecException {
String outputDate = null;
String inputDate = null;
String outputDateFormat = null;
String inputDateFormat = null;
try {
outputDate = dateConvertor((String) input.get(0),
(String) input.get(1));
} catch (Exception e) {
e.printStackTrace();
}
return outputDate;
}
public static String dateConvertor(String inputDate,String outputDateFormat) throws Exception
{
SimpleDateFormat[] formats=null;
if (inputDate != null && !inputDate.isEmpty())
{
if(inputDate.contains("-")||inputDate.contains("/"))
{
inputDate=inputDate.replace("-", ".");
inputDate=inputDate.replace("/", ".");
}
char yyyyIdentifier=inputDate.charAt(4);
if(yyyyIdentifier=='.'||yyyyIdentifier=='-'||yyyyIdentifier=='/')
{
formats=new SimpleDateFormat[] { new SimpleDateFormat("yyyy.MM.dd hh:mm:ss.SSS aa"),
new SimpleDateFormat("yyyy.MM.dd HH:mm:ss.SSS"),
new SimpleDateFormat("yyyy.MM.dd hh:mm:ss aa"),
new SimpleDateFormat("yyyy.MM.dd HH:mm:ss"),
new SimpleDateFormat("yyyy.MM.dd hh:mm aa"),
new SimpleDateFormat("yyyy.MM.dd HH:mm"),
new SimpleDateFormat("yyyy.MM.dd hh aa"),
new SimpleDateFormat("yyyy.MM.dd HH"),
new SimpleDateFormat("yyyy.MM.dd")};
}
else
{
formats=new SimpleDateFormat[] { new SimpleDateFormat("yyyyMMddHHmmss"),
new SimpleDateFormat("MM.dd.yyyy hh:mm:ss.SSS aa"),
new SimpleDateFormat("MM.dd.yyyy HH:mm:ss.SSS"),
new SimpleDateFormat("MM.dd.yyyy hh:mm:ss aa"),
new SimpleDateFormat("MM.dd.yyyy HH:mm:ss"),
new SimpleDateFormat("MM.dd.yyyy hh:mm aa"),
new SimpleDateFormat("MM.dd.yyyy HH:mm"),
new SimpleDateFormat("MM.dd.yyyy hh aa"),
new SimpleDateFormat("MM.dd.yyyy HH"),
new SimpleDateFormat("MM.dd.yyyy"),
new SimpleDateFormat("yyyyMMdd")};
}
return dateGenerator(formats,inputDate,outputDateFormat);
}
else
return null;
}
public static String dateGenerator(SimpleDateFormat[] formats,String inputDate,String outputDateFormat) throws Exception
{
Date parsedDate = null;
String Output_Date=null;
for (int i = 0; i < formats.length; i++)
{
try
{
if(inputDate.length()>19)
{
if(inputDate.contains("AM")||inputDate.contains("PM"))
{
if(inputDate.contains(" AM"))
{
inputDate=inputDate.substring(0, 19);
inputDate=inputDate.concat(" AM");
}
if(inputDate.contains(" PM"))
{
inputDate=inputDate.substring(0, 19);
inputDate=inputDate.concat(" PM");
}
}
else{
inputDate=inputDate.substring(0, 19);
}
}
parsedDate = formats[i].parse(inputDate);
SimpleDateFormat dt = new SimpleDateFormat(outputDateFormat);
Output_Date=dt.format(parsedDate);
return Output_Date;
}
catch (ParseException e)
{
continue;
}
}
return Output_Date;
}
}