package com.mapr.db.mapreduce.tools;

import java.io.IOException;
import java.util.ArrayList;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.Counter;
import org.apache.hadoop.mapreduce.Counters;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.ojai.Value;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.mapr.db.Admin;
import com.mapr.db.MapRDB;
import com.mapr.db.impl.IdCodec;
import com.mapr.db.impl.TableDescriptorImpl;
import com.mapr.db.mapreduce.TableInputFormat;
import com.mapr.db.mapreduce.impl.ByteBufWritableComparable;
import com.mapr.db.mapreduce.impl.DiffTableUtils;
import com.mapr.db.mapreduce.impl.DocEmptySerialization;
import com.mapr.db.mapreduce.impl.MapReduceUtilMethods;
import com.mapr.db.mapreduce.impl.RangeChecksumInputFormat;
import com.mapr.db.mapreduce.impl.TableSplit;
import com.mapr.db.mapreduce.tools.impl.DiffTableComparator;
import com.mapr.db.mapreduce.tools.impl.DiffTableCounterCollector;
import com.mapr.db.mapreduce.tools.impl.DiffTableNonMR;
import com.mapr.db.mapreduce.tools.impl.DocScanner;
import com.mapr.db.mapreduce.tools.impl.FailureTracker;
import com.mapr.db.mapreduce.tools.impl.RowDiff;
import com.mapr.db.rowcol.DBDocumentImpl;
import com.mapr.db.rowcol.SequenceFileRowColCodec;
import com.mapr.fs.MapRFileSystem;
import com.mapr.org.apache.hadoop.hbase.util.Bytes;

public class DiffTables extends Configured implements Tool{
  private static final Logger LOG = LoggerFactory.getLogger(DiffTables.class);

  public final static String NAME = "difftables";
  String table1Path;
  String table2Path;
  String columnSpec;
  boolean mapreduce = true;
  int defaultNumThreads = 16;
  int numThreads = 0;
  String outDir;
  boolean exitOnFirstDiff;
  boolean cmpMeta = true;
  boolean excludedEmbeddedFamily = true;
  boolean getDeletes = true;

  String splitKeyRangeFileName = null;
  String includeKeyRangeFileName = null;

  /* conf strings for various parameters */
  static final String table1PathConf = TableInputFormat.INPUT_TABLE;
  static final String table2PathConf = "table2Pathconf";
  static final String outDirConf = "outdirconf";
  static final String columnSpecConf = "columnspeccconf";
  static final String exitOnFirstDiffConf = "exitonfirstdiffconf";


  public static enum COUNTERS {
    NUM_ROWS_MISMATCH_IN_SRC,
    NUM_ROWS_MISMATCH_IN_DST,
  }

  public static String getOpsForTableName(String table) {
    return "OpsForTable_" + table;
  }

  public static class DiffTableMapper extends
     Mapper<Value, DBDocumentImpl, ByteBufWritableComparable, ByteBufWritableComparable>
     implements FailureTracker {

    private DocScanner scanner2 = null;
    private Path opsTable1Dir = null;
    private Path opsTable2Dir = null;
    private DiffTableCounterCollector counter = null;
    private DiffTableComparator comparator = null;
    private boolean excludedEmbeddedFamily = true;
    private boolean exitOnFirstMismatch = false;
    private boolean shouldExit = false;
    private MultipleOutputs<ByteBufWritableComparable, ByteBufWritableComparable> mos = null;

    @Override
    public void setup(Context context) throws IOException {

      Configuration conf = context.getConfiguration();

      String table2Path = conf.get(table2PathConf);
      String[] fields = null;
      String cols = conf.get(columnSpecConf);
      if (cols != null) {
        fields = cols.split(",");
      }
      TableSplit currentSplit = (TableSplit)context.getInputSplit();
      //create a path to desttable
      scanner2 = new DocScanner(table2Path, currentSplit.getCondition(), fields, excludedEmbeddedFamily);

      //access outdir
      String outDir = conf.get(outDirConf);

      //create required directories
      Path[] paths = DiffTableUtils.createOutputDirs(FileSystem.get(conf), new Path(outDir));
      opsTable1Dir = paths[0];
      opsTable2Dir = paths[1];

      excludedEmbeddedFamily = conf.getBoolean(TableInputFormat.EXCLUDE_EMBEDDEDFAMILY, true);
      counter = new DiffTableCounterCollector(FileSystem.get(conf), opsTable1Dir, opsTable2Dir,conf, this);
      comparator = new DiffTableComparator(conf.get(table1PathConf), conf.get(table2PathConf), cols, excludedEmbeddedFamily, counter);
      exitOnFirstMismatch = conf.getBoolean(exitOnFirstDiffConf, false);

      if (mos == null) {
        mos = new MultipleOutputs<ByteBufWritableComparable,ByteBufWritableComparable>(context);
      }
    }

    @Override
    public void map(Value key, DBDocumentImpl value, Context context)
         throws IOException, InterruptedException {
      counter.incTable1Rows();

      ArrayList<RowDiff> diffs = comparator.processNextRowAndReturnDiff(value, scanner2);

      /* write diffs to sequence file */
      writeDiffsToFile(diffs, context);

    }

    private void writeDiffsToFile(ArrayList<RowDiff> diffs, Context context)
        throws IOException, InterruptedException {

      for (RowDiff d : diffs) {
        if (d.forSrc != null) {
          writeDiff(d.key, d.forSrc, true, context);
        }
        if (d.forDst != null) {
          writeDiff(d.key, d.forDst, false, context);
        }
      }
    }

    private void writeDiff(ByteBufWritableComparable key, ByteBufWritableComparable value, boolean isSrc, Context context)
        throws IOException, InterruptedException {

      LOG.debug(opsTable1Dir.getName());
      LOG.debug(opsTable2Dir.getName());
      if (isSrc) {
        mos.write(key, value, opsTable1Dir.getName());
        context.getCounter(COUNTERS.NUM_ROWS_MISMATCH_IN_SRC).increment(1);
      } else {
        mos.write(key, value, opsTable2Dir.getName());
        context.getCounter(COUNTERS.NUM_ROWS_MISMATCH_IN_DST). increment(1);
      }
    }

    @Override
    public void notifyMismatch() {
      if (exitOnFirstMismatch) {
        shouldExit = true;
      }
    }

    @Override
    public boolean shouldExit() {
      // TODO Auto-generated method stub
      return shouldExit;
    }

    @Override
    protected void cleanup(Context context) throws IOException, InterruptedException {
      /*DBDocumentImpl doc2 = scanner2.peekNext();
      while (doc2 != null) {
        ArrayList<RowDiff> remainingRows = dumpRemainingDiff();
      }*/
      ArrayList<RowDiff> remainingRows = dumpRemainingRows();
      LOG.debug("cleanup find "+remainingRows.size()+" remaining rows");
      writeDiffsToFile(remainingRows, context);
      mos.close();

    }

    private ArrayList<RowDiff> dumpRemainingRows() {
      DBDocumentImpl doc2 = scanner2.getNext();
      ArrayList<RowDiff> diffs = new ArrayList<RowDiff>();
      while (doc2 != null) {
        RowDiff d = new RowDiff();
        d.key = new ByteBufWritableComparable(IdCodec.encode(doc2.getId()));
        d.forSrc = new ByteBufWritableComparable(SequenceFileRowColCodec.encode(doc2));
        diffs.add(d);
        doc2 = scanner2.getNext();
      }
      return diffs;
    }

  }


  private Job setupJobWithMR() throws Exception {
    /* split table 1 */
    /* process split from table 1 and do a scan on table 2*/

    Configuration conf = this.getConf();

    conf.setStrings("io.serializations", conf.get("io.serializations"),
        DocEmptySerialization.class.getName());
    conf.setBoolean(TableInputFormat.GET_DELETES, getDeletes);

    Job job = Job.getInstance(conf, NAME);
    job.setJarByClass(DiffTables.class);

    job.setInputFormatClass(TableInputFormat.class);
    job.setMapperClass(DiffTableMapper.class);
    job.setOutputKeyClass(ByteBufWritableComparable.class);
    job.setOutputValueClass(ByteBufWritableComparable.class);
    SequenceFileOutputFormat.setOutputPath(job, new Path(conf.get("outdirconf")));
    job.setSpeculativeExecution(false);

    LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);
    job.setNumReduceTasks(0);

    return job;

  }

  @Override
  public int run(String[] args) throws Exception {
    parseArgs(args);

    int ret = 0;
    if (cmpMeta && ((ret = compareMeta(args)) != 0)) {
      return ret;
    } else if (!cmpMeta) {
      System.out.println("Skip metadata check.");
    }

    Configuration conf = getConf();

    setupConfParmas(conf);

    if (!mapreduce) {
      DiffTableNonMR d = new DiffTableNonMR(
        conf, table1Path, table2Path, columnSpec, numThreads,
        exitOnFirstDiff, excludedEmbeddedFamily, outDir);
      return d.runWithoutMapReduce();
    } else {
      Job job = setupJobWithMR();
      ret = job.waitForCompletion(true) ? 0 : 1;
      if (ret == 0) {
        Path srcDir = new Path(outDir);
        FileSystem fs = srcDir.getFileSystem(conf);
        boolean mv1 = DiffTableUtils.copyFileWithPrefix(fs, srcDir, new Path(outDir + "/OpsForDstTable"), conf, "OpsForDstTable", LOG);
        boolean mv2 = DiffTableUtils.copyFileWithPrefix(fs, srcDir, new Path(outDir + "/OpsForSrcTable"), conf, "OpsForSrcTable", LOG);
        if (!mv1 || !mv2) {
          LOG.info("Failed to move output diff results "+outDir+" into its sub folder opsForSrc or opsForDst");
        }

        // output counters information
        Counters counters = job.getCounters();
        Counter srcMismatch = counters.findCounter(COUNTERS.NUM_ROWS_MISMATCH_IN_SRC);
        Counter dstMismatch = counters.findCounter(COUNTERS.NUM_ROWS_MISMATCH_IN_DST);
        System.out.print("Mapreduce job " + (ret == 0 ? "completed. " : "failed. "));
        boolean printcounter = false;
        if (ret == 0 ) {
          if (srcMismatch.getValue() == 0 && dstMismatch.getValue() == 0) {
            System.out.println("The tables match.");
          } else {
            System.out.println("The tables mismatch.");
            printcounter = true;
          }
        } else{
          printcounter = true;
        }
        if (printcounter) {
          System.out.println(srcMismatch.getDisplayName() + ":" + srcMismatch.getValue() + "; "
              + dstMismatch.getDisplayName() + ":" + dstMismatch.getValue()
              + ". Please check diff in " + outDir);
        }
      }

      return ret;
    }


  }

  private int compareMeta(String[] args) throws Exception {
    int ret = ToolRunner.run(getConf(), new DiffTablesMeta(true), args);
    if (ret == DiffTablesMeta.DIFFERENT_METADATA_RET) {
      System.out.println("ERROR: Metadata is different.");
      System.out.println("To skip metadata comparison, use the option -cmpmeta false.");
      System.exit(ret);
    } else if (ret == DiffTablesMeta.SAME_METADATA_RET) {
      System.out.println("DiffTablesMeta completed. Metadata of the two tables is same.");
    }
    return ret;
  }

  private void setupConfParmas(Configuration conf) {
    conf.set(table1PathConf, table1Path);

    conf.set(table2PathConf, table2Path);
    conf.set(outDirConf, outDir);
    if (columnSpec != null) {
      conf.set(columnSpecConf, columnSpec);
      /* projection need to be applied in record reader */
      conf.set(TableInputFormat.FIELD_PATH, columnSpec);
    }
    conf.setBoolean(exitOnFirstDiffConf, exitOnFirstDiff);
    conf.setBoolean(TableInputFormat.EXCLUDE_EMBEDDEDFAMILY, excludedEmbeddedFamily);

    if (splitKeyRangeFileName != null) {
      conf.set(RangeChecksumInputFormat.SPLITFILENAME, splitKeyRangeFileName);
    }
    if (includeKeyRangeFileName != null) {
      conf.set(RangeChecksumInputFormat.INCLUDEDREGIONFILENAME, includeKeyRangeFileName);
    }
  }

  public void parseArgs(String args[]) throws Exception {
    for (int i = 0; i < args.length; ++i) {
      if (args[i].equalsIgnoreCase("-h")) {
        Usage(null);
      } else if (args[i].equalsIgnoreCase("-src")) {
        table1Path = args[++i];
      } else if (args[i].equalsIgnoreCase("-dst")) {
        table2Path = args[++i];
      } else if (args[i].equalsIgnoreCase("-columns")) {
        columnSpec = args[++i];
      } else if (args[i].equalsIgnoreCase("-exclude_embedded_families")) {
        excludedEmbeddedFamily = Boolean.valueOf(args[++i]);
      } else if (args[i].equalsIgnoreCase("-mapreduce")) {
        mapreduce = Boolean.valueOf(args[++i]);
      } else if (args[i].equalsIgnoreCase("-numthreads")) {
        numThreads = Integer.parseInt(args[++i]);
      } else if (args[i].equalsIgnoreCase("-outdir")) {
        outDir = args[++i];
      } else if (args[i].equalsIgnoreCase("-first_exit")){
        exitOnFirstDiff = true;
      } else if (args[i].equalsIgnoreCase("-cmpmeta")) {
        cmpMeta = Boolean.valueOf(args[++i]);
      } else if (args[i].equalsIgnoreCase("-split_keyrange")) {
        splitKeyRangeFileName = args[++i];
      } else if (args[i].equalsIgnoreCase("-keyrange_included")) {
        includeKeyRangeFileName = args[++i];
      } else {
        Usage(null);
      }
    }

    if (table1Path == null || table2Path == null) {
      Usage("missing -src or -dst table path.");
    }

    // TODO - compare abs and relative paths
    if (table1Path.equals(table2Path)) {
      System.out.println("Source table " + table1Path
          + " and destination table " + table2Path
          + " refers to the same table");
      System.exit(-1);
    }

    if (outDir == null) {
      Usage("Missing -outdir");
    }

    if (mapreduce && (numThreads != 0)) {
      System.out.println("numthreads = " + numThreads);
      Usage("-numthreads can't be specified when -mapreduce is true");
    }

    if ((!mapreduce) && (numThreads == 0)) {
      numThreads = defaultNumThreads;
    }

    Configuration conf = new Configuration();

    MapRFileSystem mfs = (MapRFileSystem)(FileSystem.get(conf));

    Path t1Path = new Path(table1Path);
    Path t2Path = new Path(table2Path);
    if (!mfs.exists(t1Path)) {
      Usage(t1Path + " does not exist");
    }

    if (!mfs.isJsonTable(t1Path)) {
      Usage(t1Path + " is not a JSON table. This tool only supports JSON tables");
    }

    if (!mfs.exists(t2Path)) {
      Usage(t2Path + " does not exist");
    }

    if (!mfs.isJsonTable(t2Path)) {
      Usage(t2Path + " is not a JSON table. This tool only supports JSON tables");
    }

    columnSpec = MapReduceUtilMethods.processColumnSpec(columnSpec, table1Path);
    LOG.info("Comparing {} column families from {} to {}.",
        (columnSpec != null ? columnSpec : "all"), table1Path, table2Path);

    Admin admin = MapRDB.newAdmin();
    TableDescriptorImpl desc = (TableDescriptorImpl) admin.getTableDescriptor(table1Path);
    if (desc.isStream()) {
      excludedEmbeddedFamily = true;
    }
  }

  public static void Usage(String errorMsg) {
    if (errorMsg != null && errorMsg.length() > 0) {
        System.err.println("ERROR: " + errorMsg);
    }

    System.err.println("Usage: " + NAME + " -src <source table path> -dst <destination table path> -outdir <output directory>\n"
                     + "[-first_exit] Exit when first difference is found.\n"
                     + "[-columns <JSON Fieldpaths specified as \"path1,...,pathN\">]\n"
                     + "[-exclude_embedded_families <true|false>] (default: false)\n"
                     +    "  Don't include the  other column families with path embedded in specified columns\n"
                     + "[-mapreduce] <true|false> (default: true)]\n"
                     + "[-numthreads <numThreads> (default:16, valid only when -mapreduce is false)]\n"
                     + "[-cmpmeta <true|false> (default: true)]\n");
    System.exit(1);
  }

  public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    int ret = 0;
    try {
      ret = ToolRunner.run(conf, new DiffTables(), args);
    } catch (Exception e) {
      ret = 1;
      e.printStackTrace();
    }
    System.exit(ret);
  }
}
