Source: motifFinder/broadcastGatherFinder.js

/**
 * @classdesc
 * 
 * <p>
 * This class is responsible for finding broadcast or gather motif.
 * </p>
 * 
 * <p>
 * Intuitively, a broadcast motif is a communication pattern where a host sends
 * messages to other hosts in quick succession. A gather motif then is where
 * multiple hosts send messages to a single one in quick succession.
 * </p>
 * 
 * <p>
 * More formally, a broadcast motif is sequence S of k nodes n_1, n_2, n_3 ...
 * n_k
 * <ul>
 * <li>S is a sequence of consecutive nodes</li>
 * <li>No nodes in S except n_1 may have any parents. n_1 can have any number
 * of parents.</li>
 * <li>Each node in T is the child of a node in S</li>
 * <li>Define valid-parent as follows: a node n_i is a valid-parent if it is a
 * parent of a node n_c such that n_c's host is not equal to the host of any of
 * the children of nodes n_1 to n_(i-1) inclusive.
 * <li>For all nodes n_i in S that are valid-parents let n_j be the node in S
 * with the smallest j such that j > i that is also a valid-parent. The number
 * of nodes between n_i and n_j must be less than or equal to the parameter
 * maxInBetween</li>
 * <li>Let Hosts be the set of hosts of all nodes in the set of nodes that are
 * a child of a node in S. The cardinality of Hosts must be greater than or
 * equal to the parameter minBroadcastGather.
 * </ul>
 * </p>
 * 
 * <p>
 * The actual motif itself comprises all nodes in S and all nodes that are
 * children of a node in S. It also contains all edges that connect any two
 * nodes in S and all edges that connect any node in S its children.
 * </p>
 * 
 * <p>
 * The formal definition of a gather motif is analogous to broadcast. In the
 * formal definition above, replace child(ren) with parent(s) and parent(s) with
 * child(ren). One difference for gather is that no nodes in S INCLUDING n_1 may
 * have any parents.
 * </p>
 * 
 * @constructor
 * @extends MotifFinder
 * @param {int} minBroadcastGather Minimum amount of broadcasts or gathers
 *            needed to accept a motif
 * @param {int} maxInBetween Maximum number of non-broadcasting or gathering
 *            nodes allowed since previous broadcasting or gathering one. This
 *            number includes the last broadcasting or gathering node itself.
 *            For example, if you set this to 1, the finder will allow ZERO
 *            nodes between broadcast/gather nodes (in other words, ONE node
 *            since the previous broadcast/gather node including that one)
 * @param {Boolean} broadcast Set to true to find broadcasts. Set to false to
 *            find gathers
 */
function BroadcastGatherFinder(minBroadcastGather, maxInBetween, broadcast) {
    
    /** @private */
    this.minBroadcastGather = minBroadcastGather;
    
    /** @private */
    this.maxInBetween = maxInBetween;
    
    /** @private */
    this.broadcast = broadcast;
};

// BroadcastGatherFinder extends MotifFinder
BroadcastGatherFinder.prototype = Object.create(MotifFinder.prototype);
BroadcastGatherFinder.prototype.constructor = BroadcastGatherFinder;

/**
 * 
 * @private
 * @static
 */
BroadcastGatherFinder.GREEDY_THRESHOLD = 300;

/**
 * Overrides {@link MotifFinder#find}
 */
BroadcastGatherFinder.prototype.find = function(graph) {

    var context = this; // Used by inner functions

    var motifGroup = new MotifGroup();
    var disjoints = findDisjoint();

    for (var d = 0; d < disjoints.length; d++) {
        var disjoint = disjoints[d];

        if (disjoint.length <= BroadcastGatherFinder.GREEDY_THRESHOLD) {
            var score = findAll(disjoint);
            var groups = findBestGroups(disjoint, score);
            motifGroup.addMotif(toMotif(groups));
        }
        else {
            findGreedy(disjoint, finalMotif);
        }
    }

    return motifGroup;

    // Finds disjoint groups of potential broadcasts/gathers
    function findDisjoint() {
        var ret = [];

        var hosts = graph.getHosts();
        for (var h = 0; h < hosts.length; h++) {
            var host = hosts[h];
            var group = []; // The current disjoint group
            var inBetween = 0;
            var inPattern = false;

            var curr = graph.getHead(host).getNext();
            while (curr != null) {

                var hasBadLink = context.broadcast ? curr.hasParents() : curr.hasChildren();
                var hasGoodLink = context.broadcast ? curr.hasChildren() : curr.hasParents();

                // If curr can't be part of the current group, push group and
                // start a new group for curr
                if (inBetween > context.maxInBetween || curr.isTail() || hasBadLink) {
                    if (inPattern && group.length != 0) {
                        ret.push(group);
                        group = [];
                        inPattern = false;
                    }

                }

                if (hasGoodLink) {
                    // if not currently in broadcast/gather pattern, clear group
                    if (!inPattern) {
                        inPattern = true;
                        group = [];
                    }
                    inBetween = 1 - curr.getLogEventCount();
                }

                group.push(curr);
                inBetween += curr.getLogEventCount();
                curr = curr.getNext();
            }
        }
        return ret;
    }

    /*
     * Finds a broadcasts/gathers within the group. O(n^2) returns score array
     * where score[i][j] = number of broadcasts between ith and jth node in
     * group inclusive ONLY IF they form a valid broadcast motif
     */
    function findAll(group) {

        var score = [];

        for (var i = 0; i < group.length; i++) {

            var count = 0; // current broadcast/gather count
            var inBetween = 0; // Nodes since last valid broadcasting/gathering
            // node
            var seenHosts = {};
            score[i] = [];

            for (var j = i; j < group.length; j++) {
                var curr = group[j];

                // Check if curr has a valid link (a link to a host that hasn't
                // been seen yet)
                var hasValidLink = false;
                var links = context.broadcast ? curr.getChildren() : curr.getParents();
                for (var k = 0; k < links.length; k++) {
                    if (!seenHosts[links[k].getHost()]) {
                        hasValidLink = true;
                        break;
                    }
                }

                var hasBadLink = context.broadcast ? curr.hasParents() : curr.hasChildren();
                var hasGoodLink = context.broadcast ? curr.hasChildren() : curr.hasParents();
                // we allow parent links for the first node of a broadcast
                var allowBadLink = context.broadcast && j == i;

                // (hasGoodLink && !hasValidLink) == has children/parents, but
                // all of them are to hosts already seen
                if (inBetween > context.maxInBetween || (hasBadLink && !allowBadLink) || (hasGoodLink && !hasValidLink)) {
                    break;
                }

                if (hasGoodLink) {
                    for (var k = 0; k < links.length; k++) {
                        var childHost = links[k].getHost();
                        if (!seenHosts[childHost]) {
                            count++;
                            seenHosts[childHost] = true;
                        }
                    }
                    inBetween = 1 - curr.getLogEventCount();
                    score[i][j] = count;
                }

                inBetween += curr.getLogEventCount();
            }
        }

        return score;
    }

    // Finds the best partition of nodes into broadcast motifs
    function findBestGroups(nodes, score) {
        var best = []; // best[i] = max score using nodes 0 to i inclusive
        var parent = [];

        // Find max score using O(n^2) dynamic programming
        // dp recurrence: best[i] = max(best[j-1] + score[j][i]) for all 0<=j<=i
        for (var i = 0; i < nodes.length; i++) {
            var max = -1;
            for (var j = 0; j <= i; j++) {
                var newScore = 0;
                var ownScore = score[j][i];
                if (!!ownScore && ownScore >= context.minBroadcastGather) {
                    newScore += ownScore;
                }
                if (j > 0) {
                    newScore += best[j - 1];
                }
                if (newScore > max) {
                    max = newScore;
                    parent[i] = j - 1;
                }
            }
            best[i] = max;
        }

        // backtrack dp to recover the actual groups of nodes
        var groups = [];
        var loc = nodes.length - 1;
        while (loc != -1) {
            var ploc = parent[loc];
            var groupStart = nodes[ploc + 1];
            var groupEnd = nodes[loc];
            var currScore = score[ploc + 1][loc];
            if (!!currScore && currScore >= context.minBroadcastGather) {
                groups.push([ groupStart, groupEnd ]);
            }
            loc = parent[loc];
        }

        return groups;
    }

    // Creates a motif out of the groups
    function toMotif(groups) {
        var motif = new Motif();
        
        for (var j = 0; j < groups.length; j++) {
            var curr = groups[j][0];
            var groupEnd = groups[j][1].getNext();
            var prev = null;
            var seenHosts = {};

            while (curr != groupEnd) {
                motif.addNode(curr);
                if (prev != null) {
                    motif.addEdge(curr, prev);
                }

                var links = context.broadcast ? curr.getChildren() : curr.getParents();
                for (var i = 0; i < links.length; i++) {
                    motif.addEdge(curr, links[i]);
                    motif.addNode(links[i]);
                    var linkHost = links[i].getHost();
                    if (!seenHosts[linkHost]) {
                        seenHosts[linkHost] = true;
                    }
                }

                prev = curr;
                curr = curr.getNext();
            }
        }
        
        return motif;
    }

    // Alternate greedy solution. Used when O(n^2) dp is too slow
    function findGreedy(group, motif) {
        var bcCount = 0;
        var inBetween = 0;
        var inPattern = false;
        var currMotif = new Motif();
        var queued = [];
        var nodes = [];
        var seenHosts = {};
        group = group.concat([ new ModelNode([]) ]);

        for (var g = 0; g < group.length; g++) {
            var curr = group[g];
            queued.push(curr);

            var hasValidLink = false;
            var links = context.broadcast ? curr.getChildren() : curr.getParents();
            for (var i = 0; i < links.length; i++) {
                if (!seenHosts[links[i].getHost()]) {
                    hasValidLink = true;
                    break;
                }
            }

            var hasBadLink = context.broadcast ? curr.hasParents() : curr.hasChildren();
            var hasGoodLink = context.broadcast ? curr.hasChildren() : curr.hasParents();

            if (inBetween > context.maxInBetween || (g == group.length - 1) || hasBadLink || (hasGoodLink && !hasValidLink)) {
                if (bcCount >= context.minBroadcastGather) {
                    for (var i = 1; i < nodes.length; i++) {
                        currMotif.addEdge(nodes[i - 1], nodes[i]);
                    }
                    motif.merge(currMotif);
                }
                inPattern = false;
            }

            if (hasGoodLink && (context.broadcast || !hasBadLink)) {
                if (!inPattern) {
                    inPattern = true;
                    bcCount = 0;
                    inBetween = 0;
                    currMotif = new Motif();
                    queued = [ curr ];
                    seenHosts = {};
                    nodes = [];
                }

                currMotif.addAllNodes(links);
                for (var i = 0; i < links.length; i++) {
                    currMotif.addEdge(curr, links[i]);
                    var linkHost = links[i].getHost();
                    if (!seenHosts[linkHost]) {
                        bcCount++;
                        seenHosts[linkHost] = true;
                    }
                }
                currMotif.addAllNodes(queued);
                nodes = nodes.concat(queued);
                queued = [];
                inBetween = 1 - curr.getLogEventCount();
            }

            inBetween += curr.getLogEventCount();
        }
    }

};