package com.mapr.db.mapreduce.tools;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.ojai.Document;
import org.ojai.Value;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.mapr.db.Admin;
import com.mapr.db.MapRDB;
import com.mapr.db.TableDescriptor;
import com.mapr.db.impl.IdCodec;
import com.mapr.db.impl.MapRDBTableImpl;
import com.mapr.db.impl.MapRDBTableImpl.TablePrivateOption;
import com.mapr.db.impl.TableDescriptorImpl;
import com.mapr.db.mapreduce.BulkLoadOutputFormat;
import com.mapr.db.mapreduce.BulkLoadRecordWriter;
import com.mapr.db.mapreduce.DBDocumentSerialization;
import com.mapr.db.mapreduce.MapRDBMapReduceUtil;
import com.mapr.db.mapreduce.TableOutputFormat;
import com.mapr.db.mapreduce.ValueSerialization;
import com.mapr.db.mapreduce.impl.ByteBufWritableComparable;
import com.mapr.db.mapreduce.impl.DocEmptySerialization;
import com.mapr.db.mapreduce.impl.ImportBulkLoadMapper;
import com.mapr.db.mapreduce.impl.ImportMapper;
import com.mapr.db.mapreduce.impl.MapReduceUtilMethods;
import com.mapr.db.rowcol.DBDocumentImpl;
import com.mapr.db.rowcol.SequenceFileRowColCodec;
import com.mapr.fs.MapRFileSystem;

public class Import extends Configured implements Tool {
  private static final Logger LOG = LoggerFactory.getLogger(Import.class);
  private final static String NAME = "importtable";
  private static int NUM_REDUCE_TASKS;
  private static String srcPath;
  private static String dstPath;
  private static boolean bulkLoad = false;
  private static boolean mapReduce = true;
  private static int numThreads = 16;

  abstract class BaseImporterThread implements Callable<Integer> {
    protected Path filePath;
    protected int myid;
    protected Configuration config;

    protected BaseImporterThread(int id, Path filePath, Configuration config) {
      this.myid = id;
      this.filePath = filePath;
      this.config = config;
    }
  }

  class ImporterThread extends BaseImporterThread {
    ImporterThread(int id, Path t, Configuration config) {
      super(id, t, config);
    }

    @Override
    public Integer call() {

      try {

        FileSystem fs = filePath.getFileSystem(config);
        config.setStrings("io.serializations", config.get("io.serializations"),
            DBDocumentSerialization.class.getName());

        SequenceFile.Reader reader = new SequenceFile.Reader(fs, filePath, config);
        RecordWriter<Value, Document> writer = null;

        MapRDBTableImpl tab = new MapRDBTableImpl(new Path(dstPath), config);
        tab.setPrivateOption(TablePrivateOption.PRESERVE_TIMESTAMP, true);

        if (bulkLoad) {
          writer = new BulkLoadRecordWriter(getConf(), new Path(dstPath));
          importBulkload(reader, writer, tab);
          writer.close(null);
          return 0;
        }

        ByteBufWritableComparable key = new ByteBufWritableComparable();
        ByteBufWritableComparable value = new ByteBufWritableComparable();

        int recordCount = 0;
        while (reader.next(key)) {
          reader.getCurrentValue(value);
          Document docValue = SequenceFileRowColCodec.decode(value.getByteBuf(), tab.idPathMap());
          tab.insertOrReplace(IdCodec.decode(key.getByteBuf()), docValue);
          tab.flush();
          recordCount++;
        }

        LOG.debug("recordCount "+Integer.toString(recordCount));

        tab.close();
        reader.close();

      } catch (Exception io) {
        LOG.error(NAME + " encountered an exception: " + io.getMessage());
        io.printStackTrace();
        return 1;
      }

      return 0;
    }


    @SuppressWarnings("unchecked")
    void importBulkload(SequenceFile.Reader reader, RecordWriter writer, MapRDBTableImpl tab) {
      ByteBufWritableComparable key = new ByteBufWritableComparable();
      ByteBufWritableComparable value = new ByteBufWritableComparable();
      try {
        while (reader.next(key)) {
          reader.getCurrentValue(value);
          Document docValue = SequenceFileRowColCodec.decode(value.getByteBuf(), tab.idPathMap());

          Value keyVal = IdCodec.decode(key.getByteBuf());
          writer.write(keyVal, docValue);
        }

      } catch (Exception io) {
        LOG.error(NAME + " encountered an exception in bulkload mode: " + io.getMessage());
        io.printStackTrace();
      }

    }

  }


  private static Job createSubmittableJob(Configuration conf, String[] otherArgs)
      throws IOException {
    Job job = new Job(conf, NAME + "_" + dstPath);
    job.setJarByClass(Import.class);

    SequenceFileInputFormat.setInputPaths(job, srcPath);
    job.setInputFormatClass(SequenceFileInputFormat.class);

    //Get the job's config copy - don't use conf now onwards
    Configuration config = job.getConfiguration();

    job.setMapperClass(ImportMapper.class);
    job.setSpeculativeExecution(false);

    if (!bulkLoad) {
      config.setStrings("io.serializations", conf.get("io.serializations"),
          DocEmptySerialization.class.getName());
      job.setOutputKeyClass(Value.class);
      job.setOutputValueClass(DBDocumentImpl.class);
      job.setOutputFormatClass(TableOutputFormat.class);
      config.set(TableOutputFormat.OUTPUT_TABLE, dstPath);
      NUM_REDUCE_TASKS = 0;
    } else {
      config.setStrings("io.serializations", conf.get("io.serializations"),
          DBDocumentSerialization.class.getName(), ValueSerialization.class.getName());
      job.setMapperClass(ImportBulkLoadMapper.class);
      MapRDBMapReduceUtil.setOutputKeyValueClass(job);

      job.setOutputFormatClass(BulkLoadOutputFormat.class);
      config.set(BulkLoadOutputFormat.OUTPUT_TABLE, dstPath);

      //Set up the paritioner
      NUM_REDUCE_TASKS = MapRDBMapReduceUtil.configurePartitioner(job, dstPath);
    }

    job.setNumReduceTasks(NUM_REDUCE_TASKS);
    return job;
  }

  public static void doCleanup() {
    Admin maprAdmin = MapRDB.newAdmin();
    TableDescriptor dstTableDesc = maprAdmin.getTableDescriptor(dstPath);
    if (dstTableDesc.isBulkLoad()) {
      dstTableDesc.setBulkLoad(false);
      maprAdmin.alterTable(dstTableDesc);
    }
  }

  private static void Usage(final String errorMsg) {
    if (errorMsg != null && errorMsg.length() > 0) {
      System.err.println("ERROR: " + errorMsg);
    }
    System.err.println("Usage: " + NAME + " [options] -src <Input binary file/directory path> -dst <MapR-DB Destination table path>\n"
                       + "Options:\n"
                       + "[-bulkload : <true|false>, default is false]\n"
                       + "[-mapreduce : <true|false>, default is true]\n");
    System.exit(1);
  }

  private static void ParseArgs(String args[]) throws Exception {
    for (int i = 0; i < args.length; ++i) {
      if (args[i].equalsIgnoreCase("-src")) {
        srcPath = args[++i];
      } else if (args[i].equalsIgnoreCase("-dst")) {
        dstPath = args[++i];
      } else if (args[i].equalsIgnoreCase("-bulkload")) {
        boolean bulkLoadOption = Boolean.valueOf(args[++i]);
        if (!MapReduceUtilMethods.checkBulkloadStatus(bulkLoadOption, dstPath)) {
          Usage("Table "+dstPath+" is in bulkload mode and can't work with bulkload = false option.");
        }
        bulkLoad = bulkLoadOption;
      } else if (args[i].equalsIgnoreCase("-mapreduce")) {
        mapReduce = Boolean.valueOf(args[++i]);
      } else {
        System.err.println("PARSE ARGS: " + args[i]);
        Usage(null);
      }
    }

    if (srcPath == null || dstPath == null) {
      Usage("missing -src or -dst.");
    }

    Configuration conf = new Configuration();
    MapRFileSystem mfs = (MapRFileSystem)(FileSystem.get(conf));
    Path dPath = new Path(dstPath);

    if (!mfs.exists(dPath)) {
      Usage(dPath + " does not exist");
    }

    if (!mfs.isJsonTable(dPath)) {
      Usage(dPath + " is not a JSON table. This tool only supports JSON tables");
    }

    //Marlin does NOT support bulkload
    Admin maprAdmin = MapRDB.newAdmin();
    TableDescriptorImpl descriptor = (TableDescriptorImpl)maprAdmin.getTableDescriptor(dstPath);

    if (descriptor.isStream()) {
      bulkLoad = false;
    }

    if (descriptor.isBulkLoad() && (!bulkLoad)) {
      bulkLoad = true;
    }

  }

  private int run_NonMR() throws Exception {
    Configuration config = getConf();
    Path inputPath = new Path(srcPath);
    FileSystem fs = inputPath.getFileSystem(config);
    FileStatus[] status = fs.globStatus(inputPath);
    List<Path> filesToBeProcessed = new ArrayList<Path>();
    if (status != null) {
      for (FileStatus f : status) {
        if (f.isDirectory()) {
          FileStatus[] statuses = fs.listStatus(f.getPath());
          for (FileStatus s : statuses) {
            if ((!s.isDirectory()) && (!s.getPath().getName().startsWith("_"))) {
              filesToBeProcessed.add(s.getPath());
            }
          }
        } else {
          filesToBeProcessed.add(f.getPath());
        }
      }
    } else {
      System.err.println("Given path " + srcPath + " does not exist. No data to copy.");
      return 1;
    }

    long ts = System.currentTimeMillis();
    ExecutorService executor = Executors.newFixedThreadPool(numThreads);
    List<Future> futures = new ArrayList<Future>();

    for(int i=0 ; i < filesToBeProcessed.size();i++) {
      Future f = executor.submit(new ImporterThread(i, filesToBeProcessed.get(i), config));
      futures.add(f);
    }

    int numFailures = 0;
    for (Future f : futures) {
      numFailures += (Integer)f.get();
    }

    executor.shutdown();
    while (!executor.isTerminated());

    return (numFailures == 0 ? 0 : 1);
  }

  @Override
  public int run(String[] args) throws Exception {
    String[] otherArgs = new GenericOptionsParser(getConf(), args).getRemainingArgs();
    if (otherArgs.length < 2) {
      Usage("Wrong number of arguments: " + otherArgs.length);
      System.exit(-1);
    }
    ParseArgs(otherArgs);

    if (!mapReduce) {
      return run_NonMR();
    }

    Job job = createSubmittableJob(getConf(), otherArgs);
    boolean isJobSuccessful = job.waitForCompletion(true);
    if (isJobSuccessful) {
      doCleanup();
    }
    System.exit(job.waitForCompletion(true) ? 0 : 1);
    return 0;
  }

  public static void main(String[] args) throws Exception {
    int ret = 0;
    try {
      ret = ToolRunner.run(new Configuration(), new Import(), args);
    } catch (Exception e) {
      ret = 1;
      e.printStackTrace();
    }
    System.exit(ret);
  }
}
