val fileword =sc.textFile(path,1)
.map(x=>(x.split(",")(2152),Vectors.dense(x.split(",").map(_.toDouble).toList.take(2151).toArray)))
fileword.sample(false,0.1,2).foreach(println)
valschema =StructType(
StructField("label",StringType)
::StructField("features",VectorType)
::Nil
)//定义字段名称和格式
valrowRDD = fileword.map(p=>Row(p._1,p._2))//对rdd记录进行转换
valwordsDataFrame =sqlContext.createDataFrame(rowRDD,schema)//生成dataframe
wordsDataFrame.show(100)