package com.mapr.db.mapreduce.tools;

import static com.mapr.db.rowcol.DBValueBuilderImpl.KeyValueBuilder;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.ojai.Document;
import org.ojai.DocumentStream;
import org.ojai.Value;
import org.ojai.json.Json;
import org.ojai.json.mapreduce.JSONFileInputFormat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.mapr.db.Admin;
import com.mapr.db.MapRDB;
import com.mapr.db.impl.MapRDBTableImpl;
import com.mapr.db.impl.MapRDBTableImpl.TablePrivateOption;
import com.mapr.db.impl.TableDescriptorImpl;
import com.mapr.db.mapreduce.BulkLoadOutputFormat;
import com.mapr.db.mapreduce.BulkLoadRecordWriter;
import com.mapr.db.mapreduce.DocumentSerialization;
import com.mapr.db.mapreduce.MapRDBMapReduceUtil;
import com.mapr.db.mapreduce.TableOutputFormat;
import com.mapr.db.mapreduce.ValueSerialization;
import com.mapr.db.mapreduce.impl.MapReduceConstants;
import com.mapr.db.mapreduce.impl.MapReduceUtilMethods;
import com.mapr.db.rowcol.DBDocumentImpl;
import com.mapr.db.rowcol.IdValueComparator;
import com.mapr.db.rowcol.KeyValue;

/**
 * This utility is used when importing JSON text files into a MapR Document-DB Table.
 * If the utility is used with the '-bulkload' option set to 'true', the admin must
 * clear the bulkload flag on the MapR-DB table after the import is complete, before
 * the table can begin serving data, using the command
 *  'maprcli table edit -path <path> -bulkload false' on the server.
 */
public class ImportJSON extends Configured implements Tool {
  private static final Logger LOG = LoggerFactory.getLogger(ImportJSON.class);
  private static final String NAME = "importJSON";
  public final static String TABLE_NAME = "import.table.name";
  private static int NUM_REDUCE_TASKS;
  private static String srcPath;
  private static String dstPath;
  private static boolean bulkLoad = false;
  private static boolean mapreduce = true;
  private static int numThreads = 16;
  private static String columnSpec = null;
  private static String keyField = null;
  private static boolean isSuccess = false;

  abstract class BaseImportJSONThread implements Runnable {
    protected Path filePath;
    protected int myid;
    protected Configuration config;

    protected BaseImportJSONThread(int id, Path filePath, Configuration config) {
      this.myid = id;
      this.filePath = filePath;
      this.config = config;
    }
  }

  class ImportJSONThread extends BaseImportJSONThread {
    ImportJSONThread(int id, Path t, Configuration config) {
      super(id, t, config);
    }

    @Override
    public void run() {
      FSDataInputStream inputStream = null;
      try {

        FileSystem fs = filePath.getFileSystem(config);
        inputStream = fs.open(filePath);

        DocumentStream documentStream = Json.newDocumentStream(inputStream);
        Iterator<Document> iter = documentStream.iterator();
        String key = null;
        RecordWriter<Value, Document> writer = null;

        if (bulkLoad) {
          writer = new BulkLoadRecordWriter(getConf(), new Path(dstPath));
          importBulkload(iter, writer);
          writer.close(null);
          isSuccess = true;
          return;
        }

        MapRDBTableImpl tab = new MapRDBTableImpl(new Path(dstPath), config);
        tab.setPrivateOption(TablePrivateOption.PRESERVE_TIMESTAMP, true);
        int recordCount = 0;

        while (iter.hasNext()) {
          Document docValue = iter.next();

          if (keyField != null) {
            key = docValue.getString(keyField);
            tab.insertOrReplace(key, docValue);

          } else {
            tab.insertOrReplace(docValue);
          }
          recordCount++;
        }

        LOG.debug("recordCount "+Integer.toString(recordCount));

        tab.flush();
        tab.close();

      } catch (Exception io) {
        LOG.error(NAME + " encountered an exception: " + io.getMessage());
        io.printStackTrace();
        isSuccess = false;
        return;
      }
      isSuccess = true;
    }

    void importBulkload(Iterator<Document> iter, RecordWriter writer) {
      while (iter.hasNext()) {
        Document docValue = iter.next();
        Value key = null;
        if (keyField != null) {
          key = docValue.getValue(keyField);
        } else {
          key = docValue.getValue("_id");
        }
        try {
          writer.write(key, docValue);
        } catch (Exception io) {
          LOG.error(NAME + " encountered an exception in bulkload mode: " + io.getMessage());
          io.printStackTrace();
        }

      }

    }
  }

  /**
   * This class reads JSON data from text files in the Map phase. It also converts
   * the JSON data into Ojai records and writes them to MapR Document-DB a table
   */
  public static class JsonTextImporter extends Mapper<LongWritable, Document, Value, Document> {

    public static int counter = 0;

    @Override
    public void map(LongWritable key, Document record, Context context)
        throws IOException, InterruptedException {
      Configuration conf = context.getConfiguration();
      String idField = conf.get(MapReduceConstants.idField);
      String fieldPath = conf.get(MapReduceConstants.recordPath);

      Document rec ;
      if (fieldPath != null) {
        rec = (Document)record.getValue(fieldPath);
      } else {
        rec = record;
      }

      rec = (Document) KeyValueBuilder.initFrom(rec);
      Value docKey ;
      if (idField == null) {
        docKey = rec.getId();
      } else {
        docKey = rec.getValue(idField);
      }
      context.write(docKey, rec);
    }
  }

  public static Job createSubmittableJob(Configuration conf, String[] args)
  throws IOException, ClassNotFoundException, InterruptedException {
    Path inputDir = new Path(srcPath);
    Job job = Job.getInstance(conf, NAME + "_" + TABLE_NAME);
    job.setJarByClass(ImportJSON.class);


    JSONFileInputFormat.setInputPaths(job, inputDir);
    job.setInputFormatClass(JSONFileInputFormat.class);

    //Get the job's config copy - don't use conf now onwards
    Configuration config = job.getConfiguration();
    config.setStrings("io.serializations",
                      config.get("io.serializations"),
                      DocumentSerialization.class.getName(), ValueSerialization.class.getName());

    config.set(TABLE_NAME, dstPath);
    job.setMapperClass(JsonTextImporter.class);

    job.setOutputKeyClass(KeyValue.class);
    job.setSortComparatorClass(IdValueComparator.class);
    job.setOutputValueClass(DBDocumentImpl.class);
    job.setSpeculativeExecution(false);

    if (keyField != null) {
      config.set(MapReduceConstants.idField, keyField);
    }

    if (!bulkLoad) {
      job.setOutputFormatClass(TableOutputFormat.class);
      config.set(TableOutputFormat.OUTPUT_TABLE, dstPath);

      NUM_REDUCE_TASKS = 0;
    } else {
      job.setOutputFormatClass(BulkLoadOutputFormat.class);
      config.set(BulkLoadOutputFormat.OUTPUT_TABLE, dstPath);

      //Set up the paritioner
      NUM_REDUCE_TASKS = MapRDBMapReduceUtil.configurePartitioner(job, dstPath);
    }
    job.setNumReduceTasks(NUM_REDUCE_TASKS);
    return job;
  }


  private void createTable(Admin maprAdmin, String path) {
    TableDescriptorImpl tableDesc = new TableDescriptorImpl();
    tableDesc.setPath(path);
    tableDesc.setBulkLoad(bulkLoad);
    maprAdmin.createTable(tableDesc);
  }

  private static void Usage(final String errorMsg) {
    if (errorMsg != null && errorMsg.length() > 0) {
      System.err.println("ERROR: " + errorMsg);
    }
    System.err.println("Usage: " + NAME + " [options] -src <Input text file/directory path> -dst <MapR-DB Destination table path>\n"
                       + "Options:\n"
                       + "[-idfield <Name of ID field in JSON Data>]\n"
                       + "[-bulkload <true|false>, default is false]\n"
                       + "[-mapreduce <true|false>, default is true]\n"
                       + "(Can not use bulkload mode with mapreduce = false)\n"
                       + "(If no ID field is specified, an ID field is expected to"
                       + " be present in the JSON Record)\n");
    System.exit(1);
  }

  private static void ParseArgs(String args[]) throws Exception {
    for (int i = 0; i < args.length; ++i) {
      if (args[i].equalsIgnoreCase("-src")) {
        srcPath = args[++i];
      } else if (args[i].equalsIgnoreCase("-dst")) {
        dstPath = args[++i];
      } else if (args[i].equalsIgnoreCase("-bulkload")) {
        boolean bulkLoadFlag = Boolean.valueOf(args[++i]);
        if (!MapReduceUtilMethods.checkBulkloadStatus(bulkLoadFlag, dstPath)) {
          Usage("Table "+dstPath+" is in bulkload mode and can't work with bulkload = false option.");
        }
        bulkLoad = bulkLoadFlag;
      } else if (args[i].equalsIgnoreCase("-mapreduce")) {
        mapreduce = Boolean.valueOf(args[++i]);
      } else if (args[i].equalsIgnoreCase("-columns")) {
        columnSpec = args[++i];
      } else if (args[i].equalsIgnoreCase("-idfield")) {
        keyField = args[++i];
      } else {
        System.err.println("PARSE ARGS: " + args[i]);
        Usage(null);
      }
    }

    if ((!mapreduce) && (bulkLoad)) {
      Usage("Bulkload mode in non M/R require data in file to be sorted");
    }

    if (srcPath == null || dstPath == null) {
      Usage("missing -src or -dst.");
    }

  }

  private void run_NonMR() throws Exception {
    Configuration config = getConf();
    Path inputPath = new Path(srcPath);
    FileSystem fs = inputPath.getFileSystem(config);
    FileStatus[] status = fs.globStatus(inputPath);
    if (status == null) {
      System.err.println("Given path " + srcPath + " does not exist. No data to copy.");
      isSuccess = false;
      return;
    }
    List<Path> filesToBeProcessed = new ArrayList<Path>();
    for (FileStatus f : status) {
      if (f.isDirectory()) {
        FileStatus[] statuses = fs.listStatus(f.getPath());
        for (FileStatus s : statuses) {
          if ((!s.isDirectory()) && (!s.getPath().getName().startsWith("_"))) {
            filesToBeProcessed.add(s.getPath());
          }
        }

      } else {
        filesToBeProcessed.add(f.getPath());
      }
    }


    long ts = System.currentTimeMillis();
    ExecutorService executor = Executors.newFixedThreadPool(numThreads);
    for(int i=0 ; i < filesToBeProcessed.size();i++) {
      executor.execute(new ImportJSONThread(i, filesToBeProcessed.get(i), config));
    }

    executor.shutdown();
    while (!executor.isTerminated());

  }

  @Override
  public int run(String[] args) throws Exception {
    String[] otherArgs = new GenericOptionsParser(getConf(), args).getRemainingArgs();
    if (otherArgs.length < 2) {
      Usage("Wrong number of arguments: " + otherArgs.length);
      System.exit(-1);
    }

    ParseArgs(otherArgs);
    Admin maprAdmin = MapRDB.newAdmin();
    if (!maprAdmin.tableExists(dstPath)) {
      createTable(maprAdmin, dstPath);
    }

    //if the table is created in bulkload mode and bulkload flag is set to false,
    //we should set it back to true.
    TableDescriptorImpl descriptor = (TableDescriptorImpl)maprAdmin.getTableDescriptor(dstPath);
    boolean destTableBulkload = descriptor.isBulkLoad();

    if ((!bulkLoad) && (destTableBulkload)) {
      bulkLoad = true;
    }

    if (!mapreduce) {
      run_NonMR();
      return (isSuccess)? 0 : 1;
    }

    Job job = createSubmittableJob(getConf(), otherArgs);
    boolean isJobSuccessful = job.waitForCompletion(true);
    //set bulkload back to false if it's specified as true
    if (descriptor.isBulkLoad()) {
      descriptor.setBulkLoad(false);
      maprAdmin.alterTable(descriptor);
    }
    System.exit(job.waitForCompletion(true) ? 0 : 1);
    return 0;
  }

  public static void main(String[] args) throws Exception {
    int ret = 0;
    try {
      ret = ToolRunner.run(new Configuration(), new ImportJSON(), args);
    } catch (Exception e) {
      ret = 1;
      e.printStackTrace();
    }
    System.exit(ret);
  }
}
