做了一个关于mask函数的使用记录,以方便记忆
创建用于实验的图
import org.apache.spark._
import org.apache.spark.graphx._
// To make some of the examples work we will also need RDD
import org.apache.spark.rdd.RDD
// Create an RDD for the vertices
val users: RDD[(VertexId, (String, String))] =
sc.parallelize(Array((3L, ("rxin", "student")), (7L, ("jgonzal", "postdoc")),
(5L, ("franklin", "prof")), (2L, ("istoica", "prof")),
(4L, ("peter", "student"))))
// Create an RDD for edges
val relationships: RDD[Edge[String]] =
sc.parallelize(Array(Edge(3L, 7L, "collab"), Edge(5L, 3L, "advisor"),
Edge(2L, 5L, "colleague"), Edge(5L, 7L, "pi"),
Edge(4L, 0L, "student"), Edge(5L, 0L, "colleague")))
// Define a default user in case there are relationship with missing user
val defaultUser = ("John Doe", "Missing")
// Build the initial Graph
val graph = Graph(users, relationships, defaultUser)
/将生成的图的顶点输出展示一下
scala> graph.vertices.collect.foreach(println(_))
(0,(John Doe,Missing))
(2,(istoica,prof))
(3,(rxin,student))
(4,(peter,student))
(5,(franklin,prof))
(7,(jgonzal,postdoc))
/将生成的图的边输出展示一下
scala> graph.edges.collect.foreach(println(_))
Edge(3,7,collab)
Edge(5,3,advisor)
Edge(2,5,colleague)
Edge(5,7,pi)
接下来进行mask操作
// Run Connected Components val ccGraph = graph.connectedComponents() // No longer contains missing field scala> ccGraph.vertices.collect.foreach(println(_)) (0,0) (2,0) (3,0) (4,0) (5,0) (7,0) scala> ccGraph.edges.collect.foreach(println(_)) Edge(3,7,collab) Edge(5,3,advisor) Edge(2,5,colleague) Edge(5,7,pi) Edge(4,0,student) Edge(5,0,colleague) // Remove missing vertices as well as the edges to connected to them val validGraph = graph.subgraph(vpred = (id, attr) => attr._2 != "Missing") scala> validGraph.vertices.collect.foreach(println(_)) (2,(istoica,prof)) (3,(rxin,student)) (4,(peter,student)) (5,(franklin,prof)) (7,(jgonzal,postdoc)) scala> validGraph.edges.collect.foreach(println(_)) Edge(3,7,collab) Edge(5,3,advisor) Edge(2,5,colleague) Edge(5,7,pi) // Restrict the answer to the valid subgraph val validCCGraph = ccGraph.mask(validGraph) scala> validCCGraph.vertices.collect.foreach(println(_))//由下面输出可以看出,vertices是以ccGraph为主体进行操作 (2,0) (3,0) (4,0) (5,0) (7,0) scala> validCCGraph.edges.collect.foreach(println(_)) Edge(3,7,collab) Edge(5,3,advisor) Edge(2,5,colleague) Edge(5,7,pi) //咱们进行相反的mask操作 val ccvalidGrap=validGraph.mask(ccGraph) scala> ccvalidGrap.vertices.collect.foreach(println(_))//由输出可看出这次的vertices是以validGraph为主体进行操作 (2,(istoica,prof)) (3,(rxin,student)) (4,(peter,student)) (5,(franklin,prof)) (7,(jgonzal,postdoc)) scala> ccvalidGrap.edges.collect.foreach(println(_)) Edge(3,7,collab) Edge(5,3,advisor) Edge(2,5,colleague) Edge(5,7,pi)
记录完成



