val lines = spark.read.textFile("in").rdd
// MapPartitionRDD[5]
// not actually executed! lazy lineage graph!
// get an array rep
lines.collect()
val links1 = lines.map{ s => val parts = s.split("\\s+"); (parts(0), parts(1)) }
val links2 = links1.distinct()
val links3 = links2.groupByKey()
// persist to disk ?
val links4 = links3.cache()
var ranks = links4.mapValues(v => 1.0)
val jj = links4.join(ranks)
val contribs = jj.values.flatMap{ case (usls, rank) => urls.map(url => (url, rank / urls.size)) }
ranks = contribs.reduceByKey(_ + _).mapValues(0.15 + 0.85 * _)
val output = rank.collect()
output.foreach(tup => println(s"${tup._1} has rank: ${tup._2} ."))