Spark Graph X Spark Graph X import org

  • Slides: 12
Download presentation
Spark Graph. X

Spark Graph. X

Модель данных Spark Graph. X import org. apache. spark. graphx. {Edge, Graph, Vertex. Id}

Модель данных Spark Graph. X import org. apache. spark. graphx. {Edge, Graph, Vertex. Id} case class Props(name: String, value: Int) val vertices. RDD = spark. Context. parallelize(List( (1 L, Props("A", 10)), (2 L, Props("B", 20)), (3 L, Props("C", 30)), (4 L, Props("D", 40)))) val edges. RDD = spark. Context. parallelize(List( Edge(1 L, 2 L, 1), Edge(2 L, 3 L, 2), Edge(3 L, 4 L, 3), Edge(4 L, 1 L, 4))) val graph: Graph[Props, Int] = Graph(vertices. RDD, edges. RDD)

Graph - методы class Graph[VD, ED] { // Information about the Graph val num.

Graph - методы class Graph[VD, ED] { // Information about the Graph val num. Edges: Long val num. Vertices: Long // Functions for caching graphs def persist(new. Level: Storage. Level = Storage. Level. MEMORY_ONLY): Graph[VD, ED] def cache(): Graph[VD, ED] def unpersist. Vertices(blocking: Boolean = true): Graph[VD, ED] // Change the partitioning heuristic def partition. By(partition. Strategy: Partition. Strategy): Graph[VD, ED] // Modify the graph structure def reverse: Graph[VD, ED] def subgraph( epred: Edge. Triplet[VD, ED] => Boolean = (x => true), vpred: (Vertex. Id, VD) => Boolean = ((v, d) => true)): Graph[VD, ED] def mask[VD 2, ED 2](other: Graph[VD 2, ED 2]): Graph[VD, ED] def group. Edges(merge: (ED, ED) => ED): Graph[VD, ED] }

Graph - методы class Graph[VD, ED] { // Join RDDs with the graph def

Graph - методы class Graph[VD, ED] { // Join RDDs with the graph def join. Vertices[U](table: RDD[(Vertex. Id, U)]) (map. Func: (Vertex. Id, VD, U) => VD): Graph[VD, ED] def outer. Join. Vertices[U, VD 2](other: RDD[(Vertex. Id, U)]) (map. Func: (Vertex. Id, VD, Option[U]) => VD 2): Graph[VD 2, ED] // Aggregate information about adjacent triplets def collect. Neighbor. Ids(edge. Direction: Edge. Direction): Vertex. RDD[Array[Vertex. Id]] def collect. Neighbors(edge. Direction: Edge. Direction): Vertex. RDD[Array[(Vertex. Id, VD)]] // Basic graph algorithms def page. Rank(tol: Double, reset. Prob: Double = 0. 15): Graph[Double, Double] def connected. Components(): Graph[Vertex. Id, ED] def triangle. Count(): Graph[Int, ED] def strongly. Connected. Components(num. Iter: Int): Graph[Vertex. Id, ED] }

Pregel: breadth first search //поиск в ширину def all. Paths[ED](graph: Graph[ED, Double], source. Id:

Pregel: breadth first search //поиск в ширину def all. Paths[ED](graph: Graph[ED, Double], source. Id: Vertex. Id) = { //инициализация графа //в качестве свойств вершины выбраны расстояние от source. Id и список вершин от source. Id val initial. Grapth = graph. map. Vertices((id, _) => if(id == source. Id) (0. 0, List(id)) else (Double. Positive. Infinity, List. empty[Vertex. Id]) ) val sssp = initial. Grapth. pregel((Double. Positive. Infinity, List. empty[Vertex. Id])) ( (_, dist, new. Dist) => if(dist. _1 < new. Dist. _1) dist else new. Dist, //изменяем своства вершины triplet => { //проверка по состоянию вершин триплета // на начальную вершину триплета уже присылали сообщения // на конечную - нет if (triplet. src. Attr. _1 != Double. Positive. Infinity && triplet. dst. Attr. _1 == Double. Positive. Infinity) { Iterator((triplet. dst. Id, (triplet. src. Attr. _1 + triplet. attr, triplet. src. Attr. _2 : + triplet. dst. Id))) } else { Iterator. empty } }, (a, b) => if(a. _1 < b. _1) a else b //сворачиваем множественные сообщения на вершину ) sssp }

BFS на aggregate. Messages • //поиск в ширину с остановкой def calculate. Distance[ED](graph: Graph[ED,

BFS на aggregate. Messages • //поиск в ширину с остановкой def calculate. Distance[ED](graph: Graph[ED, Double], source. Id: Vertex. Id, target. Id: Vertex. Id) = { //ининциализируем вершины, получаем Graph[Double] var g 2 = graph. map. Vertices( (vid, _) => if (vid == source. Id) 0. 0 else Double. Positive. Infinity) var stop = false while( !stop ) { //при итерации ищем триплет на начале которого уже выставили дистанцию, а на конеце - нет val new. Distances = g 2. aggregate. Messages[Double]( ctx => { if(ctx. src. Attr < Double. Positive. Infinity && ctx. dst. Attr == Double. Positive. Infinity) { ctx. send. To. Dst(ctx. src. Attr + ctx. attr) } }, (a, b) => math. min(a, b) ) g 2 = g 2. outer. Join. Vertices(new. Distances)((_, vd, new. Sum) => math. min(vd, new. Sum. get. Or. Else(Double. Positive. Infinity)) ) //проверяем, достигнут ли target. Id или есть ли еще необработанные триплеты stop = g 2. triplets. filter(t => (t. dst. Id == target. Id && t. dst. Attr == Double. Positive. Infinity) || (t. src. Attr != Double. Positive. Infinity && t. dst. Attr == Double. Positive. Infinity)). is. Empty() } graph. outer. Join. Vertices(g 2. vertices)((vid, vd, dist) => (vd, dist. get. Or. Else(Double. Positive. Infinity))) }

Ссылки • Официальная документация: https: //spark. apache. org/docs/latest/graphxprogramming-guide. html

Ссылки • Официальная документация: https: //spark. apache. org/docs/latest/graphxprogramming-guide. html