Java UDF to convert String to date in PIG

About Code

Many times it happens like you have received data from many systems and each system operates on a different kind of date format. But in the output you need to have a specific date format.
Let’s say you are receiving date string like :-
12-01-2018 12:22:33
2018/12/01 12:22:33
20181201 12:22:33

And you want the output of all to be:- 2018-12-01 12:22: 33 In that can you can use below UDF which is written in Java to be used in PIG scripting.

You just need to make a jar and call using below command in pig :-

Register The Jar and call

 com.transformation.udf.DateConversion(input_date_col,’yyyy-MM-dd HH:mm:ss’)

Now if there are multiple format in input_date_col then also it would convert all of them into expected format which is ’yyyy-MM-dd HH:mm:ss’

Below is the complete code:-

 

 package com.transformation.udf;

import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;

import org.apache.pig.EvalFunc;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.data.Tuple;

public class DateConversion extends EvalFunc<String> {

      public String exec(Tuple input) throws ExecException {
            String outputDate = null;
            String inputDate = null;
            String outputDateFormat = null;
            String inputDateFormat = null;

                  try {
                        outputDate = dateConvertor((String) input.get(0),
                                    (String) input.get(1));
                  } catch (Exception e) {
                        e.printStackTrace();
                  }

            return outputDate;
      }
      public static String dateConvertor(String inputDate,String outputDateFormat) throws Exception 
      {
            SimpleDateFormat[] formats=null;

            if (inputDate != null && !inputDate.isEmpty())
            {
                  if(inputDate.contains("-")||inputDate.contains("/"))
                  {
                        inputDate=inputDate.replace("-", ".");
                        inputDate=inputDate.replace("/", ".");
                  }
                  char yyyyIdentifier=inputDate.charAt(4);
                  if(yyyyIdentifier=='.'||yyyyIdentifier=='-'||yyyyIdentifier=='/')
                  {
                        formats=new SimpleDateFormat[] { new SimpleDateFormat("yyyy.MM.dd hh:mm:ss.SSS aa"),
                                    new SimpleDateFormat("yyyy.MM.dd HH:mm:ss.SSS"),
                                    new SimpleDateFormat("yyyy.MM.dd hh:mm:ss aa"),
                                    new SimpleDateFormat("yyyy.MM.dd HH:mm:ss"),
                                    new SimpleDateFormat("yyyy.MM.dd hh:mm aa"),
                                    new SimpleDateFormat("yyyy.MM.dd HH:mm"),
                                    new SimpleDateFormat("yyyy.MM.dd hh aa"),
                                    new SimpleDateFormat("yyyy.MM.dd HH"),
                                    new SimpleDateFormat("yyyy.MM.dd")};
                  }
                  else 
                  {
                        formats=new SimpleDateFormat[] { new SimpleDateFormat("yyyyMMddHHmmss"),
                                    new SimpleDateFormat("MM.dd.yyyy hh:mm:ss.SSS aa"),
                                    new SimpleDateFormat("MM.dd.yyyy HH:mm:ss.SSS"),
                                    new SimpleDateFormat("MM.dd.yyyy hh:mm:ss aa"),
                                    new SimpleDateFormat("MM.dd.yyyy HH:mm:ss"),
                                    new SimpleDateFormat("MM.dd.yyyy hh:mm aa"),
                                    new SimpleDateFormat("MM.dd.yyyy HH:mm"),
                                    new SimpleDateFormat("MM.dd.yyyy hh aa"),
                                    new SimpleDateFormat("MM.dd.yyyy HH"),
                                    new SimpleDateFormat("MM.dd.yyyy"),
                                    new SimpleDateFormat("yyyyMMdd")};
                  }

                  return dateGenerator(formats,inputDate,outputDateFormat);
            }
            else
                  return null;
      }

      public static String dateGenerator(SimpleDateFormat[] formats,String inputDate,String outputDateFormat) throws Exception
      {
            Date parsedDate = null;
            String Output_Date=null;
            for (int i = 0; i < formats.length; i++)
            {
                  try
                  { 

                        if(inputDate.length()>19)
                        {
                              if(inputDate.contains("AM")||inputDate.contains("PM"))
                              {

                                    if(inputDate.contains(" AM"))
                                    {
                                          inputDate=inputDate.substring(0, 19);
                                          inputDate=inputDate.concat(" AM");
                                    }
                                    if(inputDate.contains(" PM"))
                                    {
                                          inputDate=inputDate.substring(0, 19);
                                          inputDate=inputDate.concat(" PM");

                                    }
                              }
                              else{
                                    inputDate=inputDate.substring(0, 19);
                              }
                        }
                        parsedDate = formats[i].parse(inputDate);
                        SimpleDateFormat dt = new SimpleDateFormat(outputDateFormat);
                        Output_Date=dt.format(parsedDate);
                        return Output_Date;
                  }
                  catch (ParseException e)
                  {
                        continue; 
                  }
            }
            return Output_Date;

      }
}

Sharing is caring!

Subscribe to our newsletter
Loading

Leave a Reply