[SPARK-16231][PYSPARK][ML][EXAMPLES] dataframe_example.py fails to convert ML style vectors
## What changes were proposed in this pull request? Need to convert ML Vectors to the old MLlib style before doing Statistics.colStats operations on the DataFrame ## How was this patch tested? Ran example, local tests Author: Bryan Cutler <cutlerb@gmail.com> Closes #13928 from BryanCutler/pyspark-ml-example-vector-conv-SPARK-16231.
This commit is contained in:
parent
c17b1abff8
commit
1aa191e58e
|
@ -28,6 +28,7 @@ import shutil
|
|||
|
||||
from pyspark.sql import SparkSession
|
||||
from pyspark.mllib.stat import Statistics
|
||||
from pyspark.mllib.util import MLUtils
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) > 2:
|
||||
|
@ -55,7 +56,8 @@ if __name__ == "__main__":
|
|||
labelSummary.show()
|
||||
|
||||
# Convert features column to an RDD of vectors.
|
||||
features = df.select("features").rdd.map(lambda r: r.features)
|
||||
features = MLUtils.convertVectorColumnsFromML(df, "features") \
|
||||
.select("features").rdd.map(lambda r: r.features)
|
||||
summary = Statistics.colStats(features)
|
||||
print("Selected features column with average values:\n" +
|
||||
str(summary.mean()))
|
||||
|
|
Loading…
Reference in a new issue