Pyspark – getting started – useful stuff

By | September 10, 2017
  1. Example to create dataframe

    from pyspark import SparkConf, SparkContext
    from pyspark.sql import SparkSession
    
    spark = SparkSession.builder.getOrCreate()
    sc = spark.sparkContext
    
    def create_dataframe():
        """
        Example to create dataframe
        """
        headers = ("id" , "name")
        data = [
                (1, "puneetha")
                ,(2, "bhoomika")
                ]
        df = spark.createDataFrame(data, headers)
        df.show(1, False)
        # Output:
    #     |id |name    |
    #     +---+--------+
    #     |1  |puneetha|
    #     +---+--------+
    #     only showing top 1 row
        
        # display all records, without truncating
        df.show(df.count(), False)
        # Output:
    #     |id |name    |
    #     +---+--------+
    #     |1  |puneetha|
    #     |2  |bhoomika|
    #     +---+--------+
        
    def main():
        create_dataframe()
        
    if __name__ == "__main__":
        main()
    
  2. 
    

Leave a Reply

Your email address will not be published. Required fields are marked *