spark sql 入门

2023年1月14日 206次阅读来源: 牵牛花

package cn.my.sparksql

import cn.my.sparkStream.LogLevel
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.SQLContext

/**
  * Created by lq on 2017/8/10.
  */
object SqlDemo {
  def main(args: Array[String]): Unit = {
    LogLevel.setStreamingLogLevels()
    val conf = new SparkConf().setAppName("sql").setMaster("local[2]")
    val sc = new SparkContext(conf)
    val sqlconxt = new SQLContext(sc)

    System.setProperty("user.name","hadoop")
    val personRdd = sc.textFile("hdfs://mini1:9000/spark/student/studentCourse.dat")
      .map(line=>{
        val fields = line.split(",")
        Student(fields(0),fields(1),fields(2).toInt)
      })

    import sqlconxt.implicits._
    val personDf = personRdd.toDF()

    personDf.registerTempTable("student")
    /**
    1，    查询出  lily的所有成绩
    2，    查询出  lily的总分
    3，    查询出  数据高于 90分的同学
      */
    sqlconxt.sql("select * from student where name=\"Lily\" ").show();
    sqlconxt.sql("select sum(score) from student where name=\"Lily\" ").show();
    sqlconxt.sql("select * from student where score > 90 ").show();


  }
}



case class Student(name:String,course:String,score:Int){}

如何在命令行中使用sparksql

    原文作者：牵牛花
    原文地址: https://www.cnblogs.com/rocky-AGE-24/p/7356796.html
    本文转自网络文章，转载此文章仅为分享知识，如有侵权，请联系博主进行删除。