val spark = SparkSession
      .builder()
      .appName("Spark SQL basic example")
      .config("spark.some.config.option", "some-value")
      .getOrCreate()


     runBasicDataFrameExample(spark)
    runDatasetCreationExample(spark)
    runInferSchemaExample(spark)
    runProgrammaticSchemaExample(spark)


spark.stop()


runBasicDataFrameExample()



val df = spark.read.json("examples/src/main/resources/people.json")

df.show()
df.printSchema()

df.select("name").show()                        // // 选择某一列
df.select($"name", $"age" + 1).show()     // project 算子
df.filter($"age" > 21).show()                     // filter  算子


df.groupBy("age").count().show()
// 临时视图表,  
df.createOrReplaceTempView("people")
val sqlDF = spark.sql("SELECT * FROM people")
    
sqlDF.show()

// 全局的视图表
df.createGlobalTempView("people")

runDatasetCreationExample()


 val caseClassDS = Seq(Person("Andy", 32)).toDS()
caseClassDS.show()

val primitiveDS = Seq(1, 2, 3).toDS()
primitiveDS.map(_ + 1).collect()

val path = "examples/src/main/resources/people.json"
val peopleDS = spark.read.json(path).as[Person]
peopleDS.show()

runInferSchemaExample()


    val peopleDF = spark.sparkContext
      .textFile("examples/src/main/resources/people.txt")
      .map(_.split(","))
      .map(attributes => Person(attributes(0), attributes(1).trim.toInt))
      .toDF()
    // 创建临时表
    peopleDF.createOrReplaceTempView("people")


val teenagersDF = spark.sql("SELECT name, age FROM people WHERE age BETWEEN 13 AND 19")
teenagersDF.map(teenager => "Name: " + teenager(0)).show()
teenagersDF.map(teenager => "Name: " + teenager.getAs[String]("name")).show()



runProgrammaticSchemaExample()


val peopleRDD = spark.sparkContext.textFile("examples/src/main/resources/people.txt")
val schemaString = "name age"

val fields = schemaString.split(" ")
      .map(fieldName => StructField(fieldName, StringType, nullable = true))
    val schema = StructType(fields)

    
val rowRDD = peopleRDD
      .map(_.split(","))
      .map(attributes => Row(attributes(0), attributes(1).trim))

    
val peopleDF = spark.createDataFrame(rowRDD, schema)
peopleDF.createOrReplaceTempView("people")
val results = spark.sql("SELECT name FROM people")
results.map(attributes => "Name: " + attributes(0)).show()