>

root
|-- channelGrouping: string (nullable = true)
|-- clientId:string (nullable = true)
|-- customDimensions: array (nullable = true)
|    |-- element: struct (containsNull = true)
|    |    |--index: Long (nullable = true)
|    |    |-- value: string (nullable = true)
|-- date: string (nullable = true)
|-- device: struct (nullable =true)
|    |-- browser:string(nullable = true)
|    |-- browserSize: Int (nullable = true)
|    |-- browserVersion:string (nullable = true)
|    |-- deviceCategory: string (nullable = true)
|    |-- flashVersion: string (nullable = true)
|    |--isMobile: boolean (nullable = true)
|    |-- javaEnabled: boolean (nullable = true)

val structCastExpression1 = df.schema
.filter(_.dataType.isInstanceOf[StructType])
.map(c=> (c.name, c.dataType.asInstanceOf[StructType].map(_.name)))
.map{ case (col, sub) =>  s"""cast($col as struct${sub.map{ c => 
s"$c:string" }.mkString("<" , "," , ">")} ) as $col"""}  
//List(cast(s1 as struct<x:string,y:string> ) as s1,   //     cast(s2
as struct<u:string,v:string> ) as s2)
val otherColumns = df.schema
.filterNot(_.dataType.isInstanceOf[StructType])
.map( c=> s""" cast(${c.name} as string) as ${c.name} """)   //List(" cast(id as string) as id ", " cast(d as string) as d")
//original columns   val originalColumns = df.columns
// Union both the expressions into one big expression   val
finalExpression = otherColumns.union(structCastExpression1)   //
List(" cast(id as string) as id ",   //      " cast(d as string) as d
",   //      cast(s1 as struct<x:string,y:string> ) as s1,   //     
cast(s2 as struct<u:string,v:string> ) as s2 )
// Use `selectExpr` to pass the expression
df.selectExpr(finalExpression : _*)
.select(originalColumns.head, originalColumns.tail: _*)
.printSchema

이걸 사용한 후

root
|-- channelGrouping: string (nullable = true)
|-- clientId:string (nullable = true)
|-- customDimensions: string (nullable = true)
|-- date: string (nullable = true)
|-- device: struct (nullable = true)
|    |-- browser: string (nullable = true)
|    |-- browserSize: string (nullable = true)
|    |-- browserVersion:string (nullable = true)
|    |-- deviceCategory: string (nullable = true)
|    |-- flashVersion: string (nullable = true)
|    |--isMobile: string (nullable = true)
|    |-- javaEnabled: string (nullable = true)
|    |-- language: string (nullable = true)

예상치 못한 것은

root
|-- channelGrouping: string (nullable = true)
|-- clientId:string (nullable = true)
|-- customDimensions: array (nullable = true)
|    |-- element: struct (containsNull = true)
|    |    |--index: String (nullable = true)
|    |    |-- value: string (nullable = true)
|-- date: string (nullable = true)
|-- device: struct (nullable =true)
|    |-- browser:string(nullable = true)
|    |-- browserSize: String (nullable = true)
|    |-- browserVersion:string (nullable = true)
|    |-- deviceCategory: string (nullable = true)
|    |-- flashVersion: string (nullable = true)
|    |--isMobile: boolean (nullable = true)
|    |-- javaEnabled: boolean (nullable = true)

  • 답변 # 1

    모든 열을 문자열 화하려면 간단한 해결책이 있습니다 :

    import org.apache.spark.sql.types.{StringType}
    import org.apache.spark.sql.functions._
    val exNew = ex.select(ex.columns.map { col =>
        ex(col).cast(StringType)
    }: _*)
    
    

관련 자료

  • 이전 python - Plotly Express 호버 옵션
  • 다음 angular - 크기 조정 이벤트가 호출되면 NVD3에서 오래된 차트를 svg에 추가합니다