[SPARK-2612] [mllib] Fix data skew in ALS

Author: peng.zhang <peng.zhang@xiaomi.com>

Closes #1521 from renozhang/fix-als and squashes the following commits:

b5727a4 [peng.zhang] Remove no need argument
1a4f7a0 [peng.zhang] Fix data skew in ALS
This commit is contained in:
peng.zhang 2014-07-22 02:39:07 -07:00 committed by Xiangrui Meng
parent 81fec9922c
commit 75db1742ab

View file

@ -252,14 +252,14 @@ class ALS private (
val YtY = Some(sc.broadcast(computeYtY(users)))
val previousProducts = products
products = updateFeatures(numProductBlocks, users, userOutLinks, productInLinks,
userPartitioner, rank, lambda, alpha, YtY)
rank, lambda, alpha, YtY)
previousProducts.unpersist()
logInfo("Re-computing U given I (Iteration %d/%d)".format(iter, iterations))
products.setName(s"products-$iter").persist()
val XtX = Some(sc.broadcast(computeYtY(products)))
val previousUsers = users
users = updateFeatures(numUserBlocks, products, productOutLinks, userInLinks,
productPartitioner, rank, lambda, alpha, XtX)
rank, lambda, alpha, XtX)
previousUsers.unpersist()
}
} else {
@ -267,11 +267,11 @@ class ALS private (
// perform ALS update
logInfo("Re-computing I given U (Iteration %d/%d)".format(iter, iterations))
products = updateFeatures(numProductBlocks, users, userOutLinks, productInLinks,
userPartitioner, rank, lambda, alpha, YtY = None)
rank, lambda, alpha, YtY = None)
products.setName(s"products-$iter")
logInfo("Re-computing U given I (Iteration %d/%d)".format(iter, iterations))
users = updateFeatures(numUserBlocks, products, productOutLinks, userInLinks,
productPartitioner, rank, lambda, alpha, YtY = None)
rank, lambda, alpha, YtY = None)
users.setName(s"users-$iter")
}
}
@ -464,7 +464,6 @@ class ALS private (
products: RDD[(Int, Array[Array[Double]])],
productOutLinks: RDD[(Int, OutLinkBlock)],
userInLinks: RDD[(Int, InLinkBlock)],
productPartitioner: Partitioner,
rank: Int,
lambda: Double,
alpha: Double,
@ -477,7 +476,7 @@ class ALS private (
}
}
toSend.zipWithIndex.map{ case (buf, idx) => (idx, (bid, buf.toArray)) }
}.groupByKey(productPartitioner)
}.groupByKey(new HashPartitioner(numUserBlocks))
.join(userInLinks)
.mapValues{ case (messages, inLinkBlock) =>
updateBlock(messages, inLinkBlock, rank, lambda, alpha, YtY)