Test ด้วยคำสั่ง assert

ใช้ตัวอย่างไฟล์ json จาก Read data in JSON format

# Required for StructField, StringType, IntegerType, etc.
from pyspark.sql.types import *

jsonSchema = StructType([
  StructField("id", LongType(), True),
  StructField("father", StringType(), True),
  StructField("mother", StringType(), True),
  StructField("children", ArrayType(StringType()), True)
])
jsonFile = "/mnt/training/sample.json"

testDF = (spark.read    # The DataFrameReader
  .schema(jsonSchema)   # Use the specified schema
  .json(jsonFile)       # Creates a DataFrame from JSON after reading in the file
)

ดู Schema และ column type

testDF.printSchema()

columns = testDF.dtypes
print(columns)
root
 |-- id: long (nullable = true)
 |-- father: string (nullable = true)
 |-- mother: string (nullable = true)
 |-- children: array (nullable = true)
 |    |-- element: string (containsNull = true)

[('id', 'bigint'), ('father', 'string'), ('mother', 'string'), ('children', 'array<string>')]

ทดสอบด้วย assert

assert len(columns) == 4, "Expected 4 columns but found " + str(len(columns))

assert columns[0][0] == "id",            "Expected column 0 to be \"id\" but found \"" + columns[0][0] + "\"."
assert columns[0][1] == "bigint",        "Expected column 0 to be of type \"bigint\" but found \"" + columns[0][1] + "\"."

assert columns[1][0] == "father",        "Expected column 0 to be \"father\" but found \"" + columns[0][0] + "\"."
assert columns[1][1] == "string",        "Expected column 0 to be of type \"string\" but found \"" + columns[0][1] + "\"."

assert columns[2][0] == "mother",        "Expected column 0 to be \"father\" but found \"" + columns[0][0] + "\"."
assert columns[2][1] == "string",        "Expected column 0 to be of type \"string\" but found \"" + columns[0][1] + "\"."

assert columns[3][0] == "children",      "Expected column 0 to be \"father\" but found \"" + columns[0][0] + "\"."
assert columns[3][1] == "array<string>", "Expected column 0 to be of type \"array<string>\" but found \"" + columns[0][1] + "\"."

print("Congratulations, all tests passed... that is if no jobs were triggered :-)\n")
Congratulations, all tests passed... that is if no jobs were triggered :-)