package org.apache.spark.ml.feature;

import org.apache.spark.ml.Model;
import org.apache.spark.ml.linalg.Vector;
import org.apache.spark.ml.linalg.VectorUDT;
import org.apache.spark.ml.util.MLTestingUtils$;
import org.apache.spark.ml.util.SchemaUtils$;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.functions$;
import org.apache.spark.sql.types.DataTypes;
import org.scalactic.Bool$;
import org.scalactic.Prettifier$;
import org.scalactic.source.Position;
import org.scalatest.Assertions$;
import scala.Predef$;
import scala.Tuple2;
import scala.collection.Seq;
import scala.collection.immutable.$colon;
import scala.collection.immutable.Nil$;
import scala.reflect.api.Mirror;
import scala.reflect.api.TypeCreator;
import scala.reflect.api.Types;
import scala.reflect.api.Universe;
import scala.reflect.runtime.package$;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;

/* compiled from: LSHTest.scala */
/* loaded from: input_file:org/apache/spark/ml/feature/LSHTest$.class */
public final class LSHTest$ {
    public static LSHTest$ MODULE$;

    static {
        new LSHTest$();
    }

    public <T extends LSHModel<T>> Tuple2<Object, Object> calculateLSHProperty(Dataset<?> dataset, LSH<T> lsh, double d, double d2) {
        Model<?> fit = lsh.fit(dataset);
        String inputCol = fit.getInputCol();
        String outputCol = fit.getOutputCol();
        Dataset transform = fit.transform(dataset);
        MLTestingUtils$.MODULE$.checkCopyAndUids(lsh, fit);
        SchemaUtils$.MODULE$.checkColumnType(transform.schema(), fit.getOutputCol(), DataTypes.createArrayType(new VectorUDT()), SchemaUtils$.MODULE$.checkColumnType$default$4());
        Seq seq = (Seq) ((Row) transform.select(outputCol, Predef$.MODULE$.wrapRefArray(new String[0])).head()).get(0);
        Assertions$.MODULE$.assertionsHelper().macroAssert(Bool$.MODULE$.lengthSizeMacroBool(seq, "length", BoxesRunTime.boxToInteger(seq.length()), BoxesRunTime.boxToInteger(fit.getNumHashTables()), Prettifier$.MODULE$.default()), "", Prettifier$.MODULE$.default(), new Position("LSHTest.scala", "Please set the environment variable SCALACTIC_FILL_FILE_PATHNAMES to yes at compile time to enable this feature.", 76));
        Dataset withColumn = transform.as("a").crossJoin(transform.as("b")).withColumn("same_bucket", functions$.MODULE$.udf((vectorArr, vectorArr2) -> {
            return BoxesRunTime.boxToBoolean($anonfun$calculateLSHProperty$2(fit, vectorArr, vectorArr2));
        }, package$.MODULE$.universe().TypeTag().Boolean(), package$.MODULE$.universe().TypeTag().apply(package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: org.apache.spark.ml.feature.LSHTest$$typecreator3$1
            public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                Universe universe = mirror.universe();
                return universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().ThisType(mirror.staticPackage("scala").asModule().moduleClass()), mirror.staticClass("scala.Array"), new $colon.colon(mirror.staticClass("org.apache.spark.ml.linalg.Vector").asType().toTypeConstructor(), Nil$.MODULE$));
            }
        }), package$.MODULE$.universe().TypeTag().apply(package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: org.apache.spark.ml.feature.LSHTest$$typecreator4$1
            public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                Universe universe = mirror.universe();
                return universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().ThisType(mirror.staticPackage("scala").asModule().moduleClass()), mirror.staticClass("scala.Array"), new $colon.colon(mirror.staticClass("org.apache.spark.ml.linalg.Vector").asType().toTypeConstructor(), Nil$.MODULE$));
            }
        })).apply(Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.col(new StringBuilder(2).append("a.").append(outputCol).toString()), functions$.MODULE$.col(new StringBuilder(2).append("b.").append(outputCol).toString())}))).withColumn("distance", functions$.MODULE$.udf((vector, vector2) -> {
            return BoxesRunTime.boxToDouble(fit.keyDistance(vector, vector2));
        }, package$.MODULE$.universe().TypeTag().Double(), package$.MODULE$.universe().TypeTag().apply(package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: org.apache.spark.ml.feature.LSHTest$$typecreator1$1
            public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                mirror.universe();
                return mirror.staticClass("org.apache.spark.ml.linalg.Vector").asType().toTypeConstructor();
            }
        }), package$.MODULE$.universe().TypeTag().apply(package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: org.apache.spark.ml.feature.LSHTest$$typecreator2$1
            public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                mirror.universe();
                return mirror.staticClass("org.apache.spark.ml.linalg.Vector").asType().toTypeConstructor();
            }
        })).apply(Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.col(new StringBuilder(2).append("a.").append(inputCol).toString()), functions$.MODULE$.col(new StringBuilder(2).append("b.").append(inputCol).toString())})));
        Dataset filter = withColumn.filter(functions$.MODULE$.col("same_bucket"));
        Dataset filter2 = withColumn.filter(functions$.MODULE$.col("same_bucket").unary_$bang());
        return new Tuple2.mcDD.sp(filter.filter(functions$.MODULE$.col("distance").$greater(BoxesRunTime.boxToDouble(d))).count() / filter.count(), filter2.filter(functions$.MODULE$.col("distance").$less(BoxesRunTime.boxToDouble(d2))).count() / filter2.count());
    }

    public <T extends LSHModel<T>> Tuple2<Object, Object> calculateApproxNearestNeighbors(LSH<T> lsh, Dataset<?> dataset, Vector vector, int i, boolean z) {
        LSHModel fit = lsh.fit(dataset);
        Dataset limit = dataset.sort(Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.udf(vector2 -> {
            return BoxesRunTime.boxToDouble(fit.keyDistance(vector2, vector));
        }, package$.MODULE$.universe().TypeTag().Double(), package$.MODULE$.universe().TypeTag().apply(package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: org.apache.spark.ml.feature.LSHTest$$typecreator1$2
            public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                mirror.universe();
                return mirror.staticClass("org.apache.spark.ml.linalg.Vector").asType().toTypeConstructor();
            }
        })).apply(Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.col(fit.getInputCol())}))})).limit(i);
        Dataset approxNearestNeighbors = fit.approxNearestNeighbors(dataset, vector, i, z, "distCol");
        Assertions$.MODULE$.assertionsHelper().macroAssert(Bool$.MODULE$.simpleMacroBool(approxNearestNeighbors.schema().sameType(fit.transformSchema(dataset.schema()).add("distCol", DataTypes.DoubleType)), "actual.schema.sameType(model.transformSchema(dataset.schema).add(\"distCol\", org.apache.spark.sql.types.DataTypes.DoubleType))", Prettifier$.MODULE$.default()), "", Prettifier$.MODULE$.default(), new Position("LSHTest.scala", "Please set the environment variable SCALACTIC_FILL_FILE_PATHNAMES to yes at compile time to enable this feature.", 119));
        if (z) {
            BoxedUnit boxedUnit = BoxedUnit.UNIT;
        } else {
            long count = approxNearestNeighbors.count();
            Assertions$.MODULE$.assertionsHelper().macroAssert(Bool$.MODULE$.binaryMacroBool(BoxesRunTime.boxToLong(count), "==", BoxesRunTime.boxToInteger(i), count == ((long) i), Prettifier$.MODULE$.default()), "", Prettifier$.MODULE$.default(), new Position("LSHTest.scala", "Please set the environment variable SCALACTIC_FILL_FILE_PATHNAMES to yes at compile time to enable this feature.", 125));
        }
        double count2 = limit.join(approxNearestNeighbors, fit.getInputCol()).count();
        return new Tuple2.mcDD.sp(count2 / approxNearestNeighbors.count(), count2 / limit.count());
    }

    public <T extends LSHModel<T>> Tuple2<Object, Object> calculateApproxSimilarityJoin(LSH<T> lsh, Dataset<?> dataset, Dataset<?> dataset2, double d) {
        LSHModel fit = lsh.fit(dataset);
        String inputCol = fit.getInputCol();
        Dataset filter = dataset.as("a").crossJoin(dataset2.as("b")).filter(functions$.MODULE$.udf((vector, vector2) -> {
            return BoxesRunTime.boxToDouble(fit.keyDistance(vector, vector2));
        }, package$.MODULE$.universe().TypeTag().Double(), package$.MODULE$.universe().TypeTag().apply(package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: org.apache.spark.ml.feature.LSHTest$$typecreator1$3
            public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                mirror.universe();
                return mirror.staticClass("org.apache.spark.ml.linalg.Vector").asType().toTypeConstructor();
            }
        }), package$.MODULE$.universe().TypeTag().apply(package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: org.apache.spark.ml.feature.LSHTest$$typecreator2$2
            public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                mirror.universe();
                return mirror.staticClass("org.apache.spark.ml.linalg.Vector").asType().toTypeConstructor();
            }
        })).apply(Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.col(new StringBuilder(2).append("a.").append(inputCol).toString()), functions$.MODULE$.col(new StringBuilder(2).append("b.").append(inputCol).toString())})).$less(BoxesRunTime.boxToDouble(d)));
        Dataset approxSimilarityJoin = fit.approxSimilarityJoin(dataset, dataset2, d);
        SchemaUtils$.MODULE$.checkColumnType(approxSimilarityJoin.schema(), "distCol", DataTypes.DoubleType, SchemaUtils$.MODULE$.checkColumnType$default$4());
        Assertions$.MODULE$.assertionsHelper().macroAssert(Bool$.MODULE$.simpleMacroBool(approxSimilarityJoin.schema().apply("datasetA").dataType().sameType(fit.transformSchema(dataset.schema())), "actual.schema.apply(\"datasetA\").dataType.sameType(model.transformSchema(datasetA.schema))", Prettifier$.MODULE$.default()), "", Prettifier$.MODULE$.default(), new Position("LSHTest.scala", "Please set the environment variable SCALACTIC_FILL_FILE_PATHNAMES to yes at compile time to enable this feature.", 159));
        Assertions$.MODULE$.assertionsHelper().macroAssert(Bool$.MODULE$.simpleMacroBool(approxSimilarityJoin.schema().apply("datasetB").dataType().sameType(fit.transformSchema(dataset2.schema())), "actual.schema.apply(\"datasetB\").dataType.sameType(model.transformSchema(datasetB.schema))", Prettifier$.MODULE$.default()), "", Prettifier$.MODULE$.default(), new Position("LSHTest.scala", "Please set the environment variable SCALACTIC_FILL_FILE_PATHNAMES to yes at compile time to enable this feature.", 161));
        double count = approxSimilarityJoin.filter(functions$.MODULE$.col("distCol").$less(BoxesRunTime.boxToDouble(d))).count();
        return new Tuple2.mcDD.sp(count / approxSimilarityJoin.count(), count / filter.count());
    }

    public static final /* synthetic */ boolean $anonfun$calculateLSHProperty$2(LSHModel lSHModel, Vector[] vectorArr, Vector[] vectorArr2) {
        return lSHModel.hashDistance(vectorArr, vectorArr2) == 0.0d;
    }

    private LSHTest$() {
        MODULE$ = this;
    }
}
