package com.mapr.db.mapreduce;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;
import java.util.TreeSet;
import java.util.UUID;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.mapreduce.Job;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.mapr.db.MapRDB;
import com.mapr.db.TabletInfo;
import com.mapr.db.impl.ConditionImpl;
import com.mapr.db.impl.ConditionNode.RowkeyRange;
import com.mapr.db.impl.IdCodec;
import com.mapr.db.rowcol.DBDocumentImpl;
import com.mapr.db.rowcol.IdValueComparator;
import com.mapr.db.rowcol.KeyValue;

/**
 * Utility routines to set up M/R specific job details for Map/Reduce steps.
 */

public class MapRDBMapReduceUtil {
  final Logger LOG = LoggerFactory.getLogger(MapRDBMapReduceUtil.class);

  /**
   * This function is called to configure table input format to read data from MapR-DB
   * table into a MapReduce application.
   * @param job  MapReduce job object.
   * @param srcTable Name of the source table.
   * @throws IOException
   */
  public static void configureTableInputFormat(
      Job job,
      String srcTable
     ) throws IOException {
    Configuration conf = job.getConfiguration();
    conf.setStrings("io.serializations", conf.get("io.serializations"),
        ValueSerialization.class.getName(), DocumentSerialization.class.getName());

    if (srcTable != null) {
      conf.set(TableInputFormat.INPUT_TABLE, srcTable);
    }
    job.setInputFormatClass(TableInputFormat.class);

  }

  /**
   * This function is invoked to configure the table output format for MapReduce application.
   * @param job MapReduce job object.
   * @param destTable Name of the destination table.
   */
  public static void configureTableOutputFormat(
      Job job, String destTable) {
    Configuration conf = job.getConfiguration();
    if (destTable != null) {
      conf.set(TableOutputFormat.OUTPUT_TABLE, destTable);
    }
    job.setOutputFormatClass(TableOutputFormat.class);
    job.setSpeculativeExecution(false);
  }

  /**
   * Utility function to set up TableMutationOutputFormat if user wants to send DocumentMutation
   * to update existing documents in a MapRDB table.
   * @param job MapReduce job object.
   * @param destTable Name of the destination table.
   */
  public static void configureMutationOutputFormat(
      Job job, String destTable) {
    Configuration conf = job.getConfiguration();
    if (destTable != null) {
      conf.set(TableMutationOutputFormat.OUTPUT_TABLE, destTable);
    }
    job.setOutputFormatClass(TableMutationOutputFormat.class);
    job.setSpeculativeExecution(false);
  }

  /**
   * Function to be used to set Map Output Key class if an intermediate stage (Map/Reduce) will
   * emit value (as key) and Document (as value).
   * @param job
   */
  public static void setMapOutputKeyValueClass(Job job) {
    job.setMapOutputKeyClass(KeyValue.class);
    job.setSortComparatorClass(IdValueComparator.class);
    job.setMapOutputValueClass(DBDocumentImpl.class);
  }

  /**
   * Function to be used to set Output Key class if an intermediate stage (Map/Reduce) will
   * emit value (as key) and Document (as value).
   * @param job
   */
  public static void setOutputKeyValueClass(Job job) {
    job.setOutputKeyClass(KeyValue.class);
    job.setSortComparatorClass(IdValueComparator.class);
    job.setOutputValueClass(DBDocumentImpl.class);
  }

  /**
   * Utility function that is used to set up custom partitioner for MapR-DB destination tables.
   * This function assumes that the destination table has already been created with partitions
   * created around tablet boundaries.
   * @param job MapReduce job object.
   * @param destPath Destination table for M/R job.
   * @return Number of partition splits which will determine number of reducers.
   */
  public static int configurePartitioner(Job job, String destPath)
                 throws IOException {

    String uuid = UUID.randomUUID().toString();
    job.setPartitionerClass(TotalOrderPartitioner.class);
    Configuration config = job.getConfiguration();
    Path partitionFile = new Path(config.get("hadoop.tmp.dir"), "partitions_" + uuid);
    TotalOrderPartitioner.setPartitionFile(config, partitionFile);
    List<ByteBuffer> partitionSplitPoints = getPartitionSplitPoints(destPath);
    writePartitions(job, partitionFile, partitionSplitPoints);

    job.addCacheFile(partitionFile.toUri());
    return partitionSplitPoints.size();
  }

  private static List<ByteBuffer> getPartitionSplitPoints(String tabName)
      throws IOException {
    TabletInfo[] tabletInfo = MapRDB.getTable(tabName).getTabletInfos();
    assert tabletInfo.length >= 1;

    List<ByteBuffer> startKeys = new ArrayList<ByteBuffer>();
    for (TabletInfo ti : tabletInfo) {
      List<RowkeyRange> range = ((ConditionImpl)ti.getCondition()).getRowkeyRanges();
      startKeys.add(ByteBuffer.wrap(range.get(0).getStartRow()));
    }
    return startKeys;
  }


  private static void writePartitions(Job job, Path partitionsPath,
      List<ByteBuffer> startKeys)
      throws IOException, IllegalArgumentException {
    Configuration conf = job.getConfiguration();
    if (startKeys.isEmpty()) {
      throw new IllegalArgumentException("No regions passed");
    }

    // We're generating a list of split points, and we don't ever
    // have keys < the first region (which has an empty start key)
    // so we need to remove it. Otherwise we would end up with an
    // empty reducer with index 0
    TreeSet<ByteBuffer> sorted = new TreeSet<ByteBuffer>(startKeys);

    ByteBuffer first = sorted.first();
    if (first.limit() != 0) {
      throw new IllegalArgumentException(
      "First region of table should have empty start key. Instead has: "
      + first.toString());
    }
    sorted.remove(first);

    // Write the actual file
    FileSystem fs = partitionsPath.getFileSystem(conf);
    @SuppressWarnings("deprecation")
    SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, partitionsPath,
                           job.getMapOutputKeyClass(),
                           NullWritable.class);
    try {
      for (ByteBuffer startKey : sorted) {
        writer.append(IdCodec.decode(startKey), NullWritable.get());
      }
    } finally {
      writer.close();
    }
  }

}
