Demo entry 6682259

Scala method

   

Submitted by anonymous on Dec 07, 2017 at 16:11
Language: Scala. Code size: 1.3 kB.

  /**
   * Load a path name in Parquet + Avro format into a GenotypeRDD.
   *
   * @param pathName The path name to load genotypes from.
   *   Globs/directories are supported.
   * @param optPredicate An optional pushdown predicate to use when reading Parquet + Avro.
   *   Defaults to None.
   * @param optProjection An option projection schema to use when reading Parquet + Avro.
   *   Defaults to None.
   * @return Returns a GenotypeRDD.
   */
  def loadParquetGenotypes(
    pathName: String,
    optPredicate: Option[FilterPredicate] = None,
    optProjection: Option[Schema] = None): GenotypeRDD = {

    // load header lines
    val headers = loadHeaderLines(pathName)

    // load sequence info
    val sd = loadAvroSequenceDictionary(pathName)

    // load avro record group dictionary and convert to samples
    val samples = loadAvroSamples(pathName)

    (optPredicate, optProjection) match {
      case (None, None) => {
        ParquetUnboundGenotypeRDD(sc, pathName, sd, samples, headers)
      }
      case (_, _) => {
        // load from disk
        val rdd = loadParquet[Genotype](pathName, optPredicate, optProjection)

        new RDDBoundGenotypeRDD(rdd, sd, samples, headers,
          optPartitionMap = extractPartitionMap(pathName))
      }
    }
  }

This snippet took 0.01 seconds to highlight.

Back to the Entry List or Home.

Delete this entry (admin only).