from pyspark import SparkContext, SparkConf
#list of values
Student=[(1,"jonny","2000","Physics"),
(2,"jack","4000","Bio"),
(3,"dinarys","3000","Chemistry"),
(4,"john","5000","Bio"),
(5,"theon","3000","English"),
(6,"bran","5000","Finance"),
(7,"aarya","2000","Physics"),
(8,"sansa","2000","Physics"),
(9,"Shital","1000","Bio")]
#create RDD -Resilient Distributed Dataset from local list which is unstructured values
StudentRDD=sc.parallelize(Student)
#crate dataframe with structure along with column names from RDD
StudentDF=sqlContext.createDataFrame(StudentRDD).toDF("Id","Name","Salary","Dept")
#Register Temp Table from dataframe
StudentDF.registerTempTable('StudentTB')
#apply sql queries on Temp Table
result=sqlContext.sql("select * from StudentTB")
result.show()
#list of values
Student=[(1,"jonny","2000","Physics"),
(2,"jack","4000","Bio"),
(3,"dinarys","3000","Chemistry"),
(4,"john","5000","Bio"),
(5,"theon","3000","English"),
(6,"bran","5000","Finance"),
(7,"aarya","2000","Physics"),
(8,"sansa","2000","Physics"),
(9,"Shital","1000","Bio")]
#create RDD -Resilient Distributed Dataset from local list which is unstructured values
StudentRDD=sc.parallelize(Student)
#crate dataframe with structure along with column names from RDD
StudentDF=sqlContext.createDataFrame(StudentRDD).toDF("Id","Name","Salary","Dept")
#Register Temp Table from dataframe
StudentDF.registerTempTable('StudentTB')
#apply sql queries on Temp Table
result=sqlContext.sql("select * from StudentTB")
result.show()
No comments:
Post a Comment