2010-03-29 19:17:55 -04:00
|
|
|
package spark
|
|
|
|
|
|
|
|
import java.io._
|
2010-11-09 16:46:30 -05:00
|
|
|
import java.net.InetAddress
|
2011-04-28 01:13:01 -04:00
|
|
|
import java.util.concurrent.{Executors, ThreadFactory, ThreadPoolExecutor}
|
2010-03-29 19:17:55 -04:00
|
|
|
|
2010-10-04 15:01:05 -04:00
|
|
|
import scala.collection.mutable.ArrayBuffer
|
2010-11-04 01:45:44 -04:00
|
|
|
import scala.util.Random
|
2012-05-19 09:13:20 -04:00
|
|
|
import java.util.{Locale, UUID}
|
2010-10-04 15:01:05 -04:00
|
|
|
|
2010-10-16 19:14:13 -04:00
|
|
|
/**
|
|
|
|
* Various utility methods used by Spark.
|
|
|
|
*/
|
2010-10-04 15:01:05 -04:00
|
|
|
object Utils {
|
2012-04-10 16:29:46 -04:00
|
|
|
def serialize[T](o: T): Array[Byte] = {
|
|
|
|
val bos = new ByteArrayOutputStream()
|
|
|
|
val oos = new ObjectOutputStream(bos)
|
|
|
|
oos.writeObject(o)
|
2012-06-29 21:47:12 -04:00
|
|
|
oos.close()
|
2012-04-10 16:29:46 -04:00
|
|
|
return bos.toByteArray
|
|
|
|
}
|
2012-04-10 00:59:56 -04:00
|
|
|
|
2012-04-10 16:29:46 -04:00
|
|
|
def deserialize[T](bytes: Array[Byte]): T = {
|
|
|
|
val bis = new ByteArrayInputStream(bytes)
|
|
|
|
val ois = new ObjectInputStream(bis)
|
|
|
|
return ois.readObject.asInstanceOf[T]
|
|
|
|
}
|
2010-03-29 19:17:55 -04:00
|
|
|
|
|
|
|
def deserialize[T](bytes: Array[Byte], loader: ClassLoader): T = {
|
2012-04-10 16:29:46 -04:00
|
|
|
val bis = new ByteArrayInputStream(bytes)
|
|
|
|
val ois = new ObjectInputStream(bis) {
|
|
|
|
override def resolveClass(desc: ObjectStreamClass) =
|
|
|
|
Class.forName(desc.getName, false, loader)
|
|
|
|
}
|
|
|
|
return ois.readObject.asInstanceOf[T]
|
2010-03-29 19:17:55 -04:00
|
|
|
}
|
2010-10-04 15:01:05 -04:00
|
|
|
|
2012-02-09 18:50:26 -05:00
|
|
|
def isAlpha(c: Char): Boolean = {
|
2010-10-04 15:01:05 -04:00
|
|
|
(c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
|
|
|
|
}
|
|
|
|
|
|
|
|
def splitWords(s: String): Seq[String] = {
|
|
|
|
val buf = new ArrayBuffer[String]
|
|
|
|
var i = 0
|
|
|
|
while (i < s.length) {
|
|
|
|
var j = i
|
|
|
|
while (j < s.length && isAlpha(s.charAt(j))) {
|
|
|
|
j += 1
|
|
|
|
}
|
|
|
|
if (j > i) {
|
2012-06-29 21:47:12 -04:00
|
|
|
buf += s.substring(i, j)
|
2010-10-04 15:01:05 -04:00
|
|
|
}
|
|
|
|
i = j
|
|
|
|
while (i < s.length && !isAlpha(s.charAt(i))) {
|
|
|
|
i += 1
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return buf
|
|
|
|
}
|
2010-10-16 19:14:13 -04:00
|
|
|
|
|
|
|
// Create a temporary directory inside the given parent directory
|
2012-02-10 11:19:53 -05:00
|
|
|
def createTempDir(root: String = System.getProperty("java.io.tmpdir")): File = {
|
2010-10-16 19:14:13 -04:00
|
|
|
var attempts = 0
|
|
|
|
val maxAttempts = 10
|
|
|
|
var dir: File = null
|
|
|
|
while (dir == null) {
|
|
|
|
attempts += 1
|
|
|
|
if (attempts > maxAttempts) {
|
2012-02-10 11:19:53 -05:00
|
|
|
throw new IOException("Failed to create a temp directory after " + maxAttempts +
|
|
|
|
" attempts!")
|
2010-10-16 19:14:13 -04:00
|
|
|
}
|
|
|
|
try {
|
|
|
|
dir = new File(root, "spark-" + UUID.randomUUID.toString)
|
|
|
|
if (dir.exists() || !dir.mkdirs()) {
|
|
|
|
dir = null
|
|
|
|
}
|
|
|
|
} catch { case e: IOException => ; }
|
|
|
|
}
|
2012-06-09 18:58:07 -04:00
|
|
|
// Add a shutdown hook to delete the temp dir when the JVM exits
|
|
|
|
Runtime.getRuntime.addShutdownHook(new Thread("delete Spark temp dir " + dir) {
|
|
|
|
override def run() {
|
|
|
|
Utils.deleteRecursively(dir)
|
|
|
|
}
|
|
|
|
})
|
2010-10-16 19:14:13 -04:00
|
|
|
return dir
|
|
|
|
}
|
|
|
|
|
|
|
|
// Copy all data from an InputStream to an OutputStream
|
|
|
|
def copyStream(in: InputStream,
|
|
|
|
out: OutputStream,
|
|
|
|
closeStreams: Boolean = false)
|
|
|
|
{
|
|
|
|
val buf = new Array[Byte](8192)
|
|
|
|
var n = 0
|
|
|
|
while (n != -1) {
|
|
|
|
n = in.read(buf)
|
|
|
|
if (n != -1) {
|
|
|
|
out.write(buf, 0, n)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (closeStreams) {
|
|
|
|
in.close()
|
|
|
|
out.close()
|
|
|
|
}
|
|
|
|
}
|
2010-11-04 01:45:44 -04:00
|
|
|
|
|
|
|
// Shuffle the elements of a collection into a random order, returning the
|
|
|
|
// result in a new collection. Unlike scala.util.Random.shuffle, this method
|
|
|
|
// uses a local random number generator, avoiding inter-thread contention.
|
2011-05-20 03:19:53 -04:00
|
|
|
def randomize[T](seq: TraversableOnce[T]): Seq[T] = {
|
2010-11-08 03:45:02 -05:00
|
|
|
val buf = new ArrayBuffer[T]()
|
|
|
|
buf ++= seq
|
2010-11-04 01:45:44 -04:00
|
|
|
val rand = new Random()
|
|
|
|
for (i <- (buf.size - 1) to 1 by -1) {
|
|
|
|
val j = rand.nextInt(i)
|
|
|
|
val tmp = buf(j)
|
|
|
|
buf(j) = buf(i)
|
|
|
|
buf(i) = tmp
|
|
|
|
}
|
|
|
|
buf
|
|
|
|
}
|
2010-11-09 16:46:30 -05:00
|
|
|
|
|
|
|
/**
|
2012-02-10 01:58:24 -05:00
|
|
|
* Get the local host's IP address in dotted-quad format (e.g. 1.2.3.4).
|
2010-11-09 16:46:30 -05:00
|
|
|
*/
|
2011-04-28 01:13:01 -04:00
|
|
|
def localIpAddress(): String = InetAddress.getLocalHost.getHostAddress
|
2012-06-07 03:25:47 -04:00
|
|
|
|
|
|
|
private var customHostname: Option[String] = None
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Allow setting a custom host name because when we run on Mesos we need to use the same
|
|
|
|
* hostname it reports to the master.
|
|
|
|
*/
|
|
|
|
def setCustomHostname(hostname: String) {
|
|
|
|
customHostname = Some(hostname)
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Get the local machine's hostname
|
|
|
|
*/
|
|
|
|
def localHostName(): String = {
|
|
|
|
customHostname.getOrElse(InetAddress.getLocalHost.getHostName)
|
|
|
|
}
|
2011-04-28 01:13:01 -04:00
|
|
|
|
|
|
|
/**
|
2012-02-10 01:58:24 -05:00
|
|
|
* Returns a standard ThreadFactory except all threads are daemons.
|
2011-04-28 01:13:01 -04:00
|
|
|
*/
|
|
|
|
private def newDaemonThreadFactory: ThreadFactory = {
|
|
|
|
new ThreadFactory {
|
|
|
|
def newThread(r: Runnable): Thread = {
|
|
|
|
var t = Executors.defaultThreadFactory.newThread (r)
|
|
|
|
t.setDaemon (true)
|
|
|
|
return t
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2012-02-10 01:58:24 -05:00
|
|
|
* Wrapper over newCachedThreadPool.
|
2011-04-28 01:13:01 -04:00
|
|
|
*/
|
|
|
|
def newDaemonCachedThreadPool(): ThreadPoolExecutor = {
|
2012-02-10 11:19:53 -05:00
|
|
|
var threadPool = Executors.newCachedThreadPool.asInstanceOf[ThreadPoolExecutor]
|
2011-04-28 01:13:01 -04:00
|
|
|
|
|
|
|
threadPool.setThreadFactory (newDaemonThreadFactory)
|
|
|
|
|
|
|
|
return threadPool
|
|
|
|
}
|
2012-06-07 03:25:47 -04:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Return the string to tell how long has passed in seconds. The passing parameter should be in
|
|
|
|
* millisecond.
|
|
|
|
*/
|
|
|
|
def getUsedTimeMs(startTimeMs: Long): String = {
|
|
|
|
return " " + (System.currentTimeMillis - startTimeMs) + " ms "
|
|
|
|
}
|
2011-04-28 01:13:01 -04:00
|
|
|
|
|
|
|
/**
|
2012-02-10 01:58:24 -05:00
|
|
|
* Wrapper over newFixedThreadPool.
|
2011-04-28 01:13:01 -04:00
|
|
|
*/
|
|
|
|
def newDaemonFixedThreadPool(nThreads: Int): ThreadPoolExecutor = {
|
2012-02-10 11:19:53 -05:00
|
|
|
var threadPool = Executors.newFixedThreadPool(nThreads).asInstanceOf[ThreadPoolExecutor]
|
2011-04-28 01:13:01 -04:00
|
|
|
|
|
|
|
threadPool.setThreadFactory(newDaemonThreadFactory)
|
|
|
|
|
|
|
|
return threadPool
|
2010-11-09 16:46:30 -05:00
|
|
|
}
|
2011-02-27 22:15:52 -05:00
|
|
|
|
|
|
|
/**
|
2012-02-10 01:58:24 -05:00
|
|
|
* Delete a file or directory and its contents recursively.
|
|
|
|
*/
|
|
|
|
def deleteRecursively(file: File) {
|
|
|
|
if (file.isDirectory) {
|
|
|
|
for (child <- file.listFiles()) {
|
|
|
|
deleteRecursively(child)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!file.delete()) {
|
|
|
|
throw new IOException("Failed to delete: " + file)
|
|
|
|
}
|
|
|
|
}
|
2012-05-14 21:39:04 -04:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Use unit suffixes (Byte, Kilobyte, Megabyte, Gigabyte, Terabyte and
|
|
|
|
* Petabyte) in order to reduce the number of digits to four or less. For
|
|
|
|
* example, 4,000,000 is returned as 4MB.
|
|
|
|
*/
|
2012-05-15 03:31:52 -04:00
|
|
|
def memoryBytesToString(size: Long): String = {
|
2012-06-30 17:45:55 -04:00
|
|
|
val TB = 1L << 40
|
2012-05-14 21:39:04 -04:00
|
|
|
val GB = 1L << 30
|
|
|
|
val MB = 1L << 20
|
|
|
|
val KB = 1L << 10
|
2012-05-19 09:13:20 -04:00
|
|
|
|
2012-05-14 21:39:04 -04:00
|
|
|
val (value, unit) = {
|
2012-06-30 17:45:55 -04:00
|
|
|
if (size >= 2*TB) {
|
|
|
|
(size.asInstanceOf[Double] / TB, "TB")
|
|
|
|
} else if (size >= 2*GB) {
|
2012-05-14 21:39:04 -04:00
|
|
|
(size.asInstanceOf[Double] / GB, "GB")
|
|
|
|
} else if (size >= 2*MB) {
|
|
|
|
(size.asInstanceOf[Double] / MB, "MB")
|
|
|
|
} else if (size >= 2*KB) {
|
|
|
|
(size.asInstanceOf[Double] / KB, "KB")
|
|
|
|
} else {
|
|
|
|
(size.asInstanceOf[Double], "B")
|
|
|
|
}
|
|
|
|
}
|
2012-06-30 17:45:55 -04:00
|
|
|
"%.1f %s".formatLocal(Locale.US, value, unit)
|
2012-05-14 21:39:04 -04:00
|
|
|
}
|
2010-03-29 19:17:55 -04:00
|
|
|
}
|