栏目分类:
子分类:
返回
名师互学网用户登录
快速导航关闭
当前搜索
当前分类
子分类
实用工具
热门搜索
名师互学网 > IT > 前沿技术 > 大数据 > 大数据系统

study SQL by using pyspark(part one)

study SQL by using pyspark(part one)

#%%
from pyspark import SparkConf,SparkContext
from pyspark import sql
from pyspark.sql import SparkSession
spark = SparkSession.builder.config(conf=SparkConf()).getOrCreate()
sc = spark.sparkContext

# %%
rdd1 = sc.parallelize([(22,'xiaobao'),(23,'chuyu'),(23,'caffe')])
df1 = spark.createDataframe(rdd1,['age','name'])
# %%
df1.createOrReplaceTempView("mytable")
df1.show()
# %%
spark.sql("select * from mytable where name='xiaobao'").show()
# %%
jdbcDF = spark.read.format("jdbc")
    .option("driver","com.mysql.jdbc.Driver")
    .option("url","jdbc:mysql://***.***.**.***:3306/Gadaite")
    .option("dbtable","audi")
    .option("user","root")
    .option("password","*******")
    .load()
# %%
jdbcDF.createOrReplaceTempView("temptable")
spark.sql("select * from temptable").show(10)
# %%
#所有奥迪A1的数量
spark.sql("select * from temptable").show()
# %%
spark.sql("select * from temptable where engineSize = 2.0").show()
#%%
spark.sql("select * from temptable where year=2018").show()
# %%
spark.sql("select * from temptable where model='A4'").show()
# %%
spark.sql("select count(*) from temptable where trim(model)='A4'").show()

# %%
spark.sql("select * from temptable where trim(model)='A4'").show()

# %%
spark.sql("""
    select * 
    from temptable 
    where trim(model)='A4' 
    order by 
    price limit 15
    """).show()
# %%

#判断是否区分大小写
#结果区分大小写
spark.sql("""
    select * from temptable 
    where trim(transmission)='Manual' 
    limit 15
    """).show()
spark.sql("""
    select * from temptable 
    where trim(transmission)='manual' 
    limit 15
    """).show()
# %%
#-------%%%通配符
spark.sql("""
    select * from temptable 
    where fuelType like 'Pet%'
    """).show()
# %%
spark.sql("""
    select * from temptable 
    where mpg like '%1.4%'
    """).show()
# %%
spark.sql("""
    select * from temptable 
    where not engineSize like '%2.0%'
    """).show()
# %%
spark.sql("""
    select * from temptable 
    where ((engineSize like '%2.0%') 
    and (fuelType = 'Petrol'))
    """).show()
# %%
#组合计算
#此处明显验证出model前面有空格
spark.sql("""
    select Concat(transmission,model) 
    from temptable 
    order by price desc 
    limit 10
    """).show()
# %%
spark.sql("""
    select Concat(transmission,ltrim(model)) 
    from temptable 
    order by price desc 
    limit 10
    """).show()
#%%
spark.sql("""
    select Concat(transmission,'------',ltrim(model)) 
    from temptable 
    order by price desc 
    limit 10
    """).show()
# %%
spark.sql("""
    select Concat(transmission,'------',ltrim(model)) 
    as car_info 
    from temptable 
    order by price desc 
    limit 10
    """).show()
# %%
spark.sql("""
    select model,year,price,mileage,engineSize, 
    price / engineSize as per_engine_price 
    from temptable 
    order by price desc 
    limit 10
    """).show()
# %%
#测试计算
spark.sql("select 3*2,trim(' riven  ')").show()
# %%
"""
    以下不可用,会报错
    spark.sql("select Curdate()")
"""
# %%
spark.sql("select * from temptable").show()
# %%
#转换成大写字符串
spark.sql("""select  
    transmission ,fuelType,
    upper(transmission) as TRANSMISSION,
    UPPER(fuelType) AS FUELTYPE  
    from temptable 
    LIMIT 15 
    """).show()
# %%
#转换成小写字符串
spark.sql("""select  
    transmission ,fuelType,
    lower(transmission) as transmission,
    lower(fuelType) AS fueltype  
    from temptable 
    LIMIT 15 
    """).show()
# %%
#返回字符串的长度
spark.sql("""select  
    transmission ,fuelType,
    length(transmission) as transmission_lens,
    length(fuelType) AS fuelType_lens  
    from temptable 
    LIMIT 15 
    """).show()
#%%
#返回字符串左起的的n个字符
spark.sql("""select  
    transmission ,fuelType,
    left(transmission,2) as transmission_two,
    left(fuelType,3) AS fueltype_3  
    from temptable 
    LIMIT 15 
    """).show()
# %%
#返回字符串右起的n个字符
spark.sql("""select  
    transmission ,fuelType,
    right(transmission,3) as transmission_two,
    right(fuelType,3) AS fueltype_3  
    from temptable 
    LIMIT 15 
    """).show()
#%%
#提取字符串的组成部分,类似于python的切片操作
#substr(a,b,c),y也可用substring
spark.sql("""select  
    transmission ,fuelType,
    substr(transmission,2,2) as transmission_res,
    substr(fuelType,-2,3) AS fueltype_res   
    from temptable 
    LIMIT 15 
    """).show()
spark.sql("""select  
    transmission ,fuelType,
    substring(transmission,2,2) as transmission_res,
    substring(fuelType,-2,3) AS fueltype_res   
    from temptable 
    LIMIT 15 
    """).show()
# %%
# #返回字符串长度的其它函数写法,pyspark不支持
# spark.sql("""select  
#     transmission ,fuelType,
#     len(transmission) as transmission_lens,
#     len(fuelType) AS fuelType_lens  
#     from temptable 
#     LIMIT 15 
#     """).show()
# %%
#soundex描述其语音表示字母数字模式的算法
spark.sql("""select  
    fuelType 
    from temptable 
    where soundex(fuelType) = soundex("Petol") 
    LIMIT 15 
    """).show()
# %%
spark.sql("""
    select * from temptable limit 10
    """).show()
#%%
from pyspark.sql.functions import *
#%%
#日期和时间处理函数
spark.sql("""
    select * from temptable limit 5
    """).show()
# %%
#换个带时间的表
jdbcdf = spark.read.format("jdbc")
    .option("driver","com.mysql.jdbc.Driver")
    .option("url","jdbc:mysql://***.***.**.***:3306/Gadaite")
    .option("dbtable","footbshootouts")
    .option("user","root")
    .option("password","*******")
    .load()
# %%
jdbcdf.createOrReplaceTempView("mytable")
# %%
spark.sql("""select 
    * from mytable limit 20   
    """).show()
# %%
#字段需要时date,不是varchar
# spark.sql("""select 
#     winner from mytable 
#     where datepart(yy,date)=1973
#     """).show()

# %%
spark.sql("""
    select winner,away_team,date,home_team,
    upper(left(away_team,2) || left(winner,3)) 
    as winer_info from mytable 
    limit 15
    """).show()
# %%
spark.sql("""
    select winner,away_team,
    left(away_team,2) || left(winner,3) as winer_info 
    from mytable 
    limit 15
    """).show()
# %%
spark.sql("""
    select winner,away_team,
    left(away_team,2) as winer_info 
    from mytable 
    limit 15
    """).show()
# %%
spark.sql("""
    select winner,away_team,
    left(winner,3) as winer_info 
    from mytable 
    limit 15
    """).show()
# %%
spark.sql("""
    select winner,away_team,
    (away_team || winner) as winer_info 
    from mytable 
    limit 15
    """).show()
# %%
spark.sql("""
    select winner,away_team,
    (away_team || winner) as winer_info 
    from mytable 
    limit 15
    """).show()
# %%
spark.sql("select count(*) as count from mytable").show()
# %%
spark.sql("""
    select winner,away_team,
    (away_team || winner) as winer_info 
    from mytable 
    limit 15
    """).show()
# %%
spark.sql("""
    select winner,away_team,date,home_team,
    upper(left(away_team,2) || "---" || left(winner,3)) 
    as winer_info from mytable 
    limit 15
    """).show()
# %%

就不截图了,仅作记录

转载请注明:文章转载自 www.mshxw.com
本文地址:https://www.mshxw.com/it/280909.html
我们一直用心在做
关于我们 文章归档 网站地图 联系我们

版权所有 (c)2021-2022 MSHXW.COM

ICP备案号:晋ICP备2021003244-6号