/* Copyright (c) 2015 & onwards. MapR Tech, Inc., All rights reserved */
package com.mapr.db.mapreduce;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.commons.codec.binary.Base64;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.ojai.Document;
import org.ojai.Value;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.mapr.db.MapRDB;
import com.mapr.db.Table.TableOption;
import com.mapr.db.TabletInfo;
import com.mapr.db.impl.ConditionImpl;
import com.mapr.db.impl.Constants;
import com.mapr.db.impl.MapRDBTableImpl;
import com.mapr.db.impl.MapRDBTableImpl.TablePrivateOption;
import com.mapr.db.impl.TableDescriptorImpl;
import com.mapr.db.mapreduce.impl.ByteBufWritableComparable;
import com.mapr.db.mapreduce.impl.MapReduceConstants;
import com.mapr.db.mapreduce.impl.MarlinSplitter;
import com.mapr.db.mapreduce.impl.TableSplit;
import com.mapr.db.util.ByteBufs;

/**
 * This class is used to read JSON table data from MapR-DB in Map/Reduce program.
 */
public class TableInputFormat extends InputFormat<Value, Document>
  implements Configurable {

  static final Logger LOG = LoggerFactory.getLogger(TableInputFormat.class);

  private TableRecordReader recordReader = null;
  protected MapRDBTableImpl jTable;
  private boolean isMarlin;

  /* Need a condition object to manage user-specified options */
  protected ConditionImpl cond = null;

  /* configuration parameters */
  /* Job parameter that specifies the input table. */
  public static final String INPUT_TABLE = "maprdb.mapreduce.inputtable";
  public static final String FIELD_PATH = "maprdb.mapreduce.fieldpath";
  public static final String COND_OBJ = "maprdb.mapreduce.condition";
  public static final String START_ROW = "maprdb.mapreduce.getall.startrow";
  public static final String STOP_ROW = "maprdb.mapreduce.getall.stoprow";
  public static final String EXCLUDE_EMBEDDEDFAMILY = "maprdb.exclude.embedded";
  public static final String GET_DELETES = "maprdb.get.deletes";
  public static final String READ_ALL_CFS = "maprdb.read.all.cfs";

  /* hadoop configuration */
  private Configuration conf;

  /**
   * Returns the current configuration.
   *
   * @return : Current Configuration.
   * @see org.apache.hadoop.conf.Configurable#getConf()
   */
  @Override
  public Configuration getConf() {
    return conf;
  }

  /**
   * Set up the splits which will be served as inputs to map tasks. The number of splits are same
   * as the number of tablets for regular JSON tables. In case of marlin tables, if there is a
   * topicPartition shared between two tablets, they are considered to be part of same split.
   *
   * @param context  The current job context.
   * @return List of input splits.
   * @see org.apache.hadoop.mapreduce.InputFormat#getSplits(org.apache.hadoop.mapreduce.JobContext)
   */
  @Override
  public List<InputSplit> getSplits(JobContext context) throws IOException,
      InterruptedException {
    /* get TabletInfo handle from table */
    TabletInfo[] tablets = jTable.getTabletInfos();

    if (isMarlin) {
      return MarlinSplitter.getMarlinSplits(jTable.getName(), tablets);
    }

    List<InputSplit> splits = new ArrayList<InputSplit>();

    /* each tablet info from the array should be added to splits.
     * In order to this , we need to extract start and end key.
     */
    int i = 1;
    for (TabletInfo tab: tablets) {
      ConditionImpl tabCond = (ConditionImpl)tab.getCondition();
      TableSplit split = new TableSplit(jTable.getName(), tabCond, tab.getLocations(), tab.getEstimatedSize());
      splits.add(split);
      LOG.debug("getSplits: split -> " + i + " -> " + split);
      i++;
    }

    return splits;
  }


  /**
   * Creates a record reader for a given split.
   *
   * @param split The input split.
   * @param context Current job context.
   * @return A new record reader.
   * @see org.apache.hadoop.mapreduce.InputFormat#createRecordReader(org.apache.hadoop.mapreduce.InputSplit, org.apache.hadoop.mapreduce.TaskAttemptContext)
   */
  @Override
  public RecordReader<Value, Document> createRecordReader(
      InputSplit split, TaskAttemptContext context) throws IOException,
      InterruptedException {

    Configuration conf = context.getConfiguration();

    /* create a recordreader */
    TableSplit jSplit = (TableSplit)split;
    if (recordReader == null) {
      recordReader = new TableRecordReader();
    }


    recordReader.setTable(jTable);
    ConditionImpl tabletInfoCond = jSplit.getCondition();
    if (cond == null) {
      recordReader.setCondition(tabletInfoCond);
    } else {
      ConditionImpl c = (ConditionImpl)MapRDB.newCondition()
          .and()
          .condition(tabletInfoCond)
          .condition(cond).close().build();
      recordReader.setCondition(c);
    }

    if (conf.get(FIELD_PATH) != null) {
      recordReader.setFieldPath(conf.get(FIELD_PATH));
    }

    return recordReader;
  }

  /**
   * This function is used to set parameters in the configuration. This is used to
   * set relevant parameters for scanning JSON tables.
   *
   * @param arg0 Configuration object with parameters for TableInputFormat.
   *  @see org.apache.hadoop.conf.Configurable#setConf(
   *  org.apache.hadoop.conf.Configuration)
   */
  @Override
  public void setConf(Configuration arg0){
    this.conf = arg0;
    String tableName = conf.get(INPUT_TABLE);

    try {
      if (tableName != null) {
        jTable = new MapRDBTableImpl(new Path(tableName), conf);

        if (conf.get(MapReduceConstants.PRESERVE_TS) != null) {
          jTable.setPrivateOption(TablePrivateOption.PRESERVE_TIMESTAMP, true);
        }
        jTable.setPrivateOption(TablePrivateOption.GET_DELETES, conf.getBoolean(GET_DELETES, false));
        jTable.setPrivateOption(TablePrivateOption.EXCLUDE_EMBEDDEDFAMILY, conf.getBoolean(EXCLUDE_EMBEDDEDFAMILY, false));
        jTable.setOption(TableOption.EXCLUDEID, conf.getBoolean(Constants.EXCLUDEID_STR, false));
        jTable.setPrivateOption(TablePrivateOption.READ_ALL_CFS, conf.getBoolean(READ_ALL_CFS, true));
      }

    } catch (Exception e) {
      LOG.error(e.getMessage());
    }

    isMarlin = ((TableDescriptorImpl)jTable.getTableDescriptor()).isStream();

    /* user may or may not pass a condition object */
    if (conf.get(COND_OBJ) != null ) {
      String conditionString = conf.get(COND_OBJ);
      cond = (ConditionImpl)ConditionImpl.parseFrom(ByteBufs.wrap(Base64.decodeBase64(conditionString)));
    }
  }

}


