020 Spark SQL_大数据系统

020 Spark SQL

1、IDEA 使用Spark SQL

163、Spark SQL实战开发进阶之CLI命令行使用
【Hive】beeline连接报错 root is not allowed to impersonate root (state=08S01,code=0)

resources

log4j.properties

log4j.rootCategory=ERROR, console
log4j.appender.console=org.apache.log4j.ConsoleAppender
log4j.appender.console.target=System.err
log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n

# Set the default spark-shell log level to ERROR. When running the spark-shell, the
# log level for this class is used to overwrite the root logger's log level, so that
# the user can have different defaults for the shell and regular Spark apps.
log4j.logger.org.apache.spark.repl.Main=ERROR

# Settings to quiet third party logs that are too verbose
log4j.logger.org.spark_project.jetty=ERROR
log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=ERROR
log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=ERROR
log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=ERROR
log4j.logger.org.apache.parquet=ERROR
log4j.logger.parquet=ERROR

# SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support
log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL
log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR

/opt/module/hadoop-3.1.3/etc/hadoop
core-site.xml








	
    
        fs.defaultFS
        hdfs://hadoop102:9820
	

    
        hadoop.tmp.dir
        
        /opt/module/hadoop-3.1.3/data
	



    
        hadoop.http.staticuser.user
        atguigu
	



    
        hadoop.proxyuser.atguigu.hosts
        *
	

    
        hadoop.proxyuser.atguigu.groups
        *
	

    
        hadoop.proxyuser.atguigu.users
        *

/opt/module/hadoop-3.1.3/etc/hadoop
hdfs-site.xml









	
        dfs.namenode.http-address
        hadoop102:9870
    
	
    
        dfs.namenode.secondary.http-address
        hadoop104:9868

/opt/module/apache-hive-3.1.2-bin/conf
hive-site.xml




    
    
        hive.execution.engine
        tez
    
    
        hive.tez.container.size
        1024
    

    
    
        hive.cli.print.header
        true
    
    
        hive.cli.print.current.db
        true
    

    
    
        hive.server2.thrift.bind.host
        hadoop102
    

    
    
        hive.server2.thrift.port
        10000
    

    
    
        javax.jdo.option.ConnectionURL
        jdbc:mysql://hadoop102:3306/metastore?useSSL=false
    

    
    
        javax.jdo.option.ConnectionDriverName
        com.mysql.jdbc.Driver
    

    
    
        javax.jdo.option.ConnectionUserName
        root
    

    
    
        javax.jdo.option.ConnectionPassword
        root
    

    
    
        hive.metastore.warehouse.dir
        /user/hive/warehouse
    
    
   
    
        hive.metastore.schema.verification
        false
    
   
    
    
        hive.metastore.event.db.notification.api.auth
        false

IDEA上Spark操作Hive

package com.atguigu.spark.sql

import org.apache.spark.sql.{Dataframe, SparkSession}

object SparkSQL06_Hive {
  def main(args: Array[String]): Unit = {
    System.setProperty("HADOOP_USER_NAME", "atguigu")

    val sparkSession: SparkSession = SparkSession.builder()
      .master("local[2]")
      .enableHiveSupport()
      .appName("SparkSQL")
      .getOrCreate()
    import sparkSession.implicits._

    //读取Hive数据
    sparkSession.sql("show tables").show()

    sparkSession.close()
  }
}

020 Spark SQL

大数据系统相关栏目本月热门文章