/*
 * Decompiled with CFR 0.152.
 */
package org.apache.nifi.processors.poi;

import com.github.pjfanning.xlsx.StreamingReader;
import com.github.pjfanning.xlsx.exceptions.OpenException;
import com.github.pjfanning.xlsx.exceptions.ParseException;
import com.github.pjfanning.xlsx.exceptions.ReadException;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVPrinter;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.nifi.annotation.behavior.WritesAttribute;
import org.apache.nifi.annotation.behavior.WritesAttributes;
import org.apache.nifi.annotation.documentation.CapabilityDescription;
import org.apache.nifi.annotation.documentation.DeprecationNotice;
import org.apache.nifi.annotation.documentation.Tags;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.context.PropertyContext;
import org.apache.nifi.csv.CSVUtils;
import org.apache.nifi.expression.ExpressionLanguageScope;
import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.flowfile.attributes.CoreAttributes;
import org.apache.nifi.processor.AbstractProcessor;
import org.apache.nifi.processor.ProcessContext;
import org.apache.nifi.processor.ProcessSession;
import org.apache.nifi.processor.ProcessorInitializationContext;
import org.apache.nifi.processor.Relationship;
import org.apache.nifi.processor.exception.ProcessException;
import org.apache.nifi.processor.util.StandardValidators;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.CellType;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;

@Tags(value={"excel", "csv", "poi"})
@CapabilityDescription(value="Consumes a Microsoft Excel document and converts each worksheet to csv. Each sheet from the incoming Excel document will generate a new Flowfile that will be output from this processor. Each output Flowfile's contents will be formatted as a csv file where the each row from the excel sheet is output as a newline in the csv file. This processor is currently only capable of processing .xlsx (XSSF 2007 OOXML file format) Excel documents and not older .xls (HSSF '97(-2007) file format) documents. This processor also expects well formatted CSV content and will not escape cell's containing invalid content such as newlines or additional commas.")
@WritesAttributes(value={@WritesAttribute(attribute="sheetname", description="The name of the Excel sheet that this particular row of data came from in the Excel document"), @WritesAttribute(attribute="numrows", description="The number of rows in this Excel Sheet"), @WritesAttribute(attribute="sourcefilename", description="The name of the Excel document file that this data originated from"), @WritesAttribute(attribute="convertexceltocsvprocessor.error", description="Error message that was encountered on a per Excel sheet basis. This attribute is only populated if an error was occured while processing the particular sheet. Having the error present at the sheet level will allow for the end user to better understand what syntax errors in their excel doc on a larger scale caused the error.")})
@DeprecationNotice(reason="ConvertExcelToCSVProcessor is no longer needed since there is now the ExcelReader which along with CSVRecordSetWriter can be used in ConvertRecord to achieve the same thing.", classNames={"org.apache.nifi.excel.ExcelReader", "org.apache.nifi.csv.CSVRecordSetWriter", "org.apache.nifi.processors.standard.ConvertRecord"})
public class ConvertExcelToCSVProcessor
extends AbstractProcessor {
    private static final String CSV_MIME_TYPE = "text/csv";
    public static final String SHEET_NAME = "sheetname";
    public static final String ROW_NUM = "numrows";
    public static final String SOURCE_FILE_NAME = "sourcefilename";
    private static final String DESIRED_SHEETS_DELIMITER = ",";
    private static final String UNKNOWN_SHEET_NAME = "UNKNOWN";
    public static final PropertyDescriptor DESIRED_SHEETS = new PropertyDescriptor.Builder().name("extract-sheets").displayName("Sheets to Extract").description("Comma separated list of Excel document sheet names that should be extracted from the excel document. If this property is left blank then all of the sheets will be extracted from the Excel document. The list of names is case in-sensitive. Any sheets not specified in this value will be ignored. A bulletin will be generated if a specified sheet(s) are not found.").required(false).expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES).addValidator(StandardValidators.NON_EMPTY_VALIDATOR).build();
    public static final PropertyDescriptor ROWS_TO_SKIP = new PropertyDescriptor.Builder().name("excel-extract-first-row").displayName("Number of Rows to Skip").description("The row number of the first row to start processing.Use this to skip over rows of data at the top of your worksheet that are not part of the dataset.Empty rows of data anywhere in the spreadsheet will always be skipped, no matter what this value is set to.").required(true).defaultValue("0").expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES).addValidator(StandardValidators.NON_NEGATIVE_INTEGER_VALIDATOR).build();
    public static final PropertyDescriptor COLUMNS_TO_SKIP = new PropertyDescriptor.Builder().name("excel-extract-column-to-skip").displayName("Columns To Skip").description("Comma delimited list of column numbers to skip. Use the columns number and not the letter designation. Use this to skip over columns anywhere in your worksheet that you don't want extracted as part of the record.").required(false).expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES).addValidator(StandardValidators.NON_EMPTY_VALIDATOR).build();
    public static final PropertyDescriptor FORMAT_VALUES = new PropertyDescriptor.Builder().name("excel-format-values").displayName("Format Cell Values").description("Should the cell values be written to CSV using the formatting applied in Excel, or should they be printed as raw values.").allowableValues(new String[]{"true", "false"}).defaultValue("false").required(true).build();
    public static final PropertyDescriptor FORMAT_BOOLEANS = new PropertyDescriptor.Builder().name("excel-format-booleans").displayName("Format Booleans").description("If true, true/false will be represented by TRUE/FALSE. If false, true/false will be represented by 0/1.").allowableValues(new String[]{"true", "false"}).defaultValue("true").required(true).build();
    public static final Relationship ORIGINAL = new Relationship.Builder().name("original").description("Original Excel document received by this processor").build();
    public static final Relationship SUCCESS = new Relationship.Builder().name("success").description("Excel data converted to csv").build();
    public static final Relationship FAILURE = new Relationship.Builder().name("failure").description("Failed to parse the Excel document").build();
    private List<PropertyDescriptor> descriptors;
    private Set<Relationship> relationships;

    protected void init(ProcessorInitializationContext context) {
        ArrayList<PropertyDescriptor> descriptors = new ArrayList<PropertyDescriptor>();
        descriptors.add(DESIRED_SHEETS);
        descriptors.add(ROWS_TO_SKIP);
        descriptors.add(COLUMNS_TO_SKIP);
        descriptors.add(FORMAT_VALUES);
        descriptors.add(FORMAT_BOOLEANS);
        descriptors.add(CSVUtils.CSV_FORMAT);
        descriptors.add(CSVUtils.VALUE_SEPARATOR);
        descriptors.add(CSVUtils.INCLUDE_HEADER_LINE);
        descriptors.add(CSVUtils.QUOTE_CHAR);
        descriptors.add(CSVUtils.ESCAPE_CHAR);
        descriptors.add(CSVUtils.COMMENT_MARKER);
        descriptors.add(CSVUtils.NULL_STRING);
        descriptors.add(CSVUtils.TRIM_FIELDS);
        descriptors.add(new PropertyDescriptor.Builder().fromPropertyDescriptor(CSVUtils.QUOTE_MODE).defaultValue(CSVUtils.QUOTE_NONE.getValue()).build());
        descriptors.add(CSVUtils.RECORD_SEPARATOR);
        descriptors.add(CSVUtils.TRAILING_DELIMITER);
        this.descriptors = Collections.unmodifiableList(descriptors);
        LinkedHashSet<Relationship> relationships = new LinkedHashSet<Relationship>();
        relationships.add(ORIGINAL);
        relationships.add(SUCCESS);
        relationships.add(FAILURE);
        this.relationships = Collections.unmodifiableSet(relationships);
    }

    public Set<Relationship> getRelationships() {
        return this.relationships;
    }

    public final List<PropertyDescriptor> getSupportedPropertyDescriptors() {
        return this.descriptors;
    }

    public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
        FlowFile flowFile = session.get();
        if (flowFile == null) {
            return;
        }
        Map<String, Boolean> desiredSheets = this.getDesiredSheets(context, flowFile);
        boolean formatValues = context.getProperty(FORMAT_VALUES).asBoolean();
        boolean formatBooleans = context.getProperty(FORMAT_BOOLEANS).asBoolean();
        CSVFormat csvFormat = CSVUtils.createCSVFormat((PropertyContext)context, (Map)flowFile.getAttributes());
        int firstRow = context.getProperty(ROWS_TO_SKIP).evaluateAttributeExpressions(flowFile).asInteger() - 1;
        List<Integer> columnsToSkip = this.getColumnsToSkip(context, flowFile);
        try {
            session.read(flowFile, inputStream -> {
                try (Workbook workbook = StreamingReader.builder().rowCacheSize(100).bufferSize(4096).setReadStyles(formatValues).open(inputStream);){
                    if (!desiredSheets.isEmpty()) {
                        desiredSheets.keySet().forEach(desiredSheet -> workbook.forEach(sheet -> {
                            if (sheet.getSheetName().equalsIgnoreCase((String)desiredSheet)) {
                                ExcelSheetReadConfig readConfig = new ExcelSheetReadConfig(columnsToSkip, firstRow, sheet.getSheetName());
                                this.handleExcelSheet(session, flowFile, (Sheet)sheet, readConfig, csvFormat, formatBooleans);
                                desiredSheets.put((String)desiredSheet, Boolean.TRUE);
                            }
                        }));
                        String sheetsNotFound = this.getSheetsNotFound(desiredSheets);
                        if (!sheetsNotFound.isEmpty()) {
                            this.getLogger().warn("Excel sheet(s) not found: {}", new Object[]{sheetsNotFound});
                        }
                    } else {
                        workbook.forEach(sheet -> {
                            ExcelSheetReadConfig readConfig = new ExcelSheetReadConfig(columnsToSkip, firstRow, sheet.getSheetName());
                            this.handleExcelSheet(session, flowFile, (Sheet)sheet, readConfig, csvFormat, formatBooleans);
                        });
                    }
                }
                catch (OpenException | ParseException | ReadException e) {
                    if (e.getCause() instanceof InvalidFormatException) {
                        String msg = "Only .xlsx Excel 2007 OOXML files are supported";
                        this.getLogger().error(msg, e);
                        throw new UnsupportedOperationException(msg, e);
                    }
                    this.getLogger().error("Error occurred while processing Excel document metadata", e);
                }
            });
            session.transfer(flowFile, ORIGINAL);
        }
        catch (RuntimeException ex) {
            this.getLogger().error("Failed to process incoming Excel document. " + ex.getMessage(), (Throwable)ex);
            FlowFile failedFlowFile = session.putAttribute(flowFile, ConvertExcelToCSVProcessor.class.getName() + ".error", ex.getMessage());
            session.transfer(failedFlowFile, FAILURE);
        }
    }

    private List<Integer> getColumnsToSkip(ProcessContext context, FlowFile flowFile) {
        String[] columnsToSkip = StringUtils.split((String)context.getProperty(COLUMNS_TO_SKIP).evaluateAttributeExpressions(flowFile).getValue(), (String)DESIRED_SHEETS_DELIMITER);
        if (columnsToSkip != null) {
            try {
                return Arrays.stream(columnsToSkip).map(columnToSkip -> Integer.parseInt(columnToSkip) - 1).collect(Collectors.toList());
            }
            catch (NumberFormatException e) {
                throw new ProcessException("Invalid column in Columns to Skip list.", (Throwable)e);
            }
        }
        return new ArrayList<Integer>();
    }

    private Map<String, Boolean> getDesiredSheets(ProcessContext context, FlowFile flowFile) {
        String desiredSheetsDelimited = context.getProperty(DESIRED_SHEETS).evaluateAttributeExpressions(flowFile).getValue();
        if (desiredSheetsDelimited != null) {
            String[] desiredSheets = StringUtils.split((String)desiredSheetsDelimited, (String)DESIRED_SHEETS_DELIMITER);
            if (desiredSheets != null) {
                return Arrays.stream(desiredSheets).collect(Collectors.toMap(key -> key, value -> Boolean.FALSE));
            }
            this.getLogger().debug("Excel document was parsed but no sheets with the specified desired names were found.");
        }
        return new HashMap<String, Boolean>();
    }

    private void handleExcelSheet(ProcessSession session, FlowFile originalParentFF, Sheet sheet, ExcelSheetReadConfig readConfig, CSVFormat csvFormat, boolean formatBooleans) {
        FlowFile ff = session.create(originalParentFF);
        SheetToCSV sheetHandler = new SheetToCSV(readConfig, csvFormat);
        sheetHandler.setFormatBooleans(formatBooleans);
        try {
            ff = session.write(ff, out -> {
                sheetHandler.setOutput(out);
                sheet.forEach(row -> {
                    sheetHandler.startRow(row.getRowNum());
                    row.forEach(sheetHandler::cell);
                    sheetHandler.endRow();
                });
                sheetHandler.close();
            });
            ff = session.putAttribute(ff, SHEET_NAME, readConfig.getSheetName());
            ff = session.putAttribute(ff, ROW_NUM, Long.toString(sheetHandler.getRowCount()));
            ff = StringUtils.isNotEmpty((CharSequence)originalParentFF.getAttribute(CoreAttributes.FILENAME.key())) ? session.putAttribute(ff, SOURCE_FILE_NAME, originalParentFF.getAttribute(CoreAttributes.FILENAME.key())) : session.putAttribute(ff, SOURCE_FILE_NAME, UNKNOWN_SHEET_NAME);
            ff = session.putAttribute(ff, CoreAttributes.FILENAME.key(), this.updateFilenameToCSVExtension(ff.getAttribute(CoreAttributes.UUID.key()), ff.getAttribute(CoreAttributes.FILENAME.key()), readConfig.getSheetName()));
            ff = session.putAttribute(ff, CoreAttributes.MIME_TYPE.key(), CSV_MIME_TYPE);
            session.transfer(ff, SUCCESS);
        }
        catch (RuntimeException e) {
            ff = session.putAttribute(ff, ConvertExcelToCSVProcessor.class.getName() + ".error", e.getMessage());
            session.transfer(ff, FAILURE);
        }
    }

    private String getSheetsNotFound(Map<String, Boolean> desiredSheets) {
        return desiredSheets.entrySet().stream().filter(entry -> (Boolean)entry.getValue() == false).map(Map.Entry::getKey).collect(Collectors.joining(DESIRED_SHEETS_DELIMITER));
    }

    private String updateFilenameToCSVExtension(String nifiUUID, String origFileName, String sheetName) {
        StringBuilder stringBuilder = new StringBuilder();
        if (StringUtils.isNotEmpty((CharSequence)origFileName)) {
            String ext = FilenameUtils.getExtension((String)origFileName);
            if (StringUtils.isNotEmpty((CharSequence)ext)) {
                stringBuilder.append(StringUtils.replace((String)origFileName, (String)("." + ext), (String)""));
            } else {
                stringBuilder.append(origFileName);
            }
        } else {
            stringBuilder.append(nifiUUID);
        }
        stringBuilder.append("_");
        stringBuilder.append(sheetName);
        stringBuilder.append(".");
        stringBuilder.append("csv");
        return stringBuilder.toString();
    }

    private class SheetToCSV {
        private final ExcelSheetReadConfig readConfig;
        CSVFormat csvFormat;
        private boolean firstCellOfRow;
        private boolean skipRow;
        private int currentRow = -1;
        private int currentCol = -1;
        private int rowCount = 0;
        private int skippedColumns = 0;
        private CSVPrinter printer;
        private boolean firstRow = false;
        private ArrayList<String> fieldValues;
        private boolean formatBooleans = true;

        public int getRowCount() {
            return this.rowCount;
        }

        public void setOutput(OutputStream output) {
            OutputStreamWriter streamWriter = new OutputStreamWriter(output, StandardCharsets.UTF_8);
            try {
                this.printer = new CSVPrinter((Appendable)streamWriter, this.csvFormat);
            }
            catch (IOException e) {
                throw new ProcessException("Failed to create CSV Printer.", (Throwable)e);
            }
        }

        public void setFormatBooleans(boolean formatBooleans) {
            this.formatBooleans = formatBooleans;
        }

        public SheetToCSV(ExcelSheetReadConfig readConfig, CSVFormat csvFormat) {
            this.readConfig = readConfig;
            this.csvFormat = csvFormat;
        }

        public void startRow(int rowNum) {
            if (rowNum <= this.readConfig.getOverrideFirstRow()) {
                this.skipRow = true;
                return;
            }
            this.skipRow = false;
            this.firstCellOfRow = true;
            this.firstRow = this.currentRow == -1;
            this.currentRow = rowNum;
            this.currentCol = -1;
            this.fieldValues = new ArrayList();
        }

        public void endRow() {
            if (this.skipRow) {
                return;
            }
            if (this.firstRow) {
                this.readConfig.setLastColumn(this.currentCol);
            }
            if (this.fieldValues.stream().noneMatch(string -> string != null && !string.isEmpty())) {
                return;
            }
            int columnsToAdd = this.readConfig.getLastColumn() - this.currentCol - this.readConfig.getColumnsToSkip().size();
            for (int i = 0; i < columnsToAdd; ++i) {
                this.fieldValues.add(null);
            }
            try {
                this.printer.printRecord(this.fieldValues);
            }
            catch (IOException e) {
                ConvertExcelToCSVProcessor.this.getLogger().warn("Print Record failed", (Throwable)e);
            }
            ++this.rowCount;
        }

        public void cell(Cell cell) {
            if (this.skipRow) {
                return;
            }
            int thisCol = cell.getColumnIndex();
            if (this.firstRow && this.firstCellOfRow) {
                this.readConfig.setFirstColumn(thisCol);
            }
            if (!(this.firstRow || thisCol >= this.readConfig.getFirstColumn() && thisCol <= this.readConfig.getLastColumn())) {
                return;
            }
            if (this.readConfig.getColumnsToSkip().contains(thisCol)) {
                ++this.skippedColumns;
                return;
            }
            int missedCols = thisCol - this.readConfig.getFirstColumn() - (this.currentCol - this.readConfig.getFirstColumn()) - 1;
            if (this.firstCellOfRow) {
                missedCols = thisCol - this.readConfig.getFirstColumn();
            }
            missedCols -= this.skippedColumns;
            if (this.firstCellOfRow) {
                this.firstCellOfRow = false;
            }
            for (int i = 0; i < missedCols; ++i) {
                this.fieldValues.add(null);
            }
            this.currentCol = thisCol;
            String stringCellValue = cell.getStringCellValue();
            CellType type = cell.getCellType();
            if (type.equals((Object)CellType.BOOLEAN) && this.formatBooleans) {
                stringCellValue = stringCellValue.equals("1") ? "TRUE" : "FALSE";
            }
            this.fieldValues.add(stringCellValue != null && !stringCellValue.isEmpty() ? stringCellValue : null);
            this.skippedColumns = 0;
        }

        public void close() throws IOException {
            this.printer.close();
        }
    }

    private static class ExcelSheetReadConfig {
        private int firstColumn;
        private int lastColumn;
        private final int overrideFirstRow;
        private final String sheetName;
        private final List<Integer> columnsToSkip;

        public String getSheetName() {
            return this.sheetName;
        }

        public int getFirstColumn() {
            return this.firstColumn;
        }

        public void setFirstColumn(int value) {
            this.firstColumn = value;
        }

        public int getLastColumn() {
            return this.lastColumn;
        }

        public void setLastColumn(int lastColumn) {
            this.lastColumn = lastColumn;
        }

        public int getOverrideFirstRow() {
            return this.overrideFirstRow;
        }

        public List<Integer> getColumnsToSkip() {
            return this.columnsToSkip;
        }

        public ExcelSheetReadConfig(List<Integer> columnsToSkip, int overrideFirstRow, String sheetName) {
            this.sheetName = sheetName;
            this.columnsToSkip = columnsToSkip;
            this.overrideFirstRow = overrideFirstRow;
        }
    }
}

