Skip to content

DAG

Source code in fpcmci/graph/DAG.py
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
class DAG():
    def __init__(self, var_names, min_lag, max_lag, neglect_autodep = False, scm = None):
        """
        DAG constructor

        Args:
            var_names (list): _description_
            min_lag (int): _description_
            max_lag (int): _description_
            neglect_autodep (bool, optional): _description_. Defaults to False.
            scm (dict, optional): _description_. Defaults to None.
        """
        self.g = {var: Node(var, neglect_autodep) for var in var_names}
        self.neglect_autodep = neglect_autodep
        self.sys_context = dict()
        self.min_lag = min_lag
        self.max_lag = max_lag

        if scm is not None:
            for t in scm:
                for s in scm[t]: self.add_source(t, s[0], 0.3, 0, s[1])


    @property
    def features(self) -> list:
        """
        Features list

        Returns:
            list: Features list
        """
        return list(self.g)


    @property
    def autodep_nodes(self) -> list:
        """
        Autodependent nodes list

        Returns:
            list: Autodependent nodes list
        """
        autodeps = list()
        for t in self.g:
            # NOTE: I commented this because I want to check all the auto-dep nodes with obs data
            # if self.g[t].is_autodependent and self.g[t].intervention_node: autodeps.append(t)
            if self.g[t].is_autodependent: autodeps.append(t)
        return autodeps


    @property
    def interventions_links(self) -> list:
        """
        Intervention links list

        Returns:
            list: Intervention link list
        """
        int_links = list()
        for t in self.g:
            for s in self.g[t].sources:
                if self.g[s[0]].intervention_node:
                    int_links.append((s[0], s[1], t))
        return int_links


    def fully_connected_dag(self):
        """
        Build a fully connected DAG
        """
        for t in self.g:
            for s in self.g:
                for l in range(1, self.max_lag + 1): self.add_source(t, s, 1, 0, l)


    def add_source(self, t, s, score, pval, lag):
        """
        Adds source node to a target node

        Args:
            t (str): target node name
            s (str): source node name
            score (float): dependency score
            pval (float): dependency p-value
            lag (int): dependency lag
        """
        self.g[t].sources[(s, abs(lag))] = {SCORE: score, PVAL: pval}
        self.g[s].children.append(t)


    def del_source(self, t, s, lag):
        """
        Removes source node from a target node

        Args:
            t (str): target node name
            s (str): source node name
            lag (int): dependency lag
        """
        del self.g[t].sources[(s, lag)]
        self.g[s].children.remove(t)


    def remove_unneeded_features(self):
        """
        Removes isolated nodes
        """
        tmp = copy.deepcopy(self.g)
        for t in self.g.keys():
            if self.g[t].is_isolated: 
                if self.g[t].intervention_node: del tmp[self.g[t].associated_context] # FIXME: last edit to be tested
                del tmp[t]
        self.g = tmp


    def add_context(self):
        """
        Adds context variables
        """
        for sys_var, context_var in self.sys_context.items():
            if sys_var in self.features:

                # Adding context var to the graph
                self.g[context_var] = Node(context_var, self.neglect_autodep)

                # Adding context var to sys var
                self.g[sys_var].intervention_node = True
                self.g[sys_var].associated_context = context_var
                self.add_source(sys_var, context_var, 1, 0, 1)


    def remove_context(self):
        """
        Remove context variables
        """
        for sys_var, context_var in self.sys_context.items():
            if sys_var in self.g:

                # Removing context var from sys var
                # self.g[sys_var].intervention_node = False
                self.g[sys_var].associated_context = None
                self.del_source(sys_var, context_var, 1)

                # Removing context var from dag
                del self.g[context_var]


    def get_link_assumptions(self, autodep_ok = False) -> dict:
        """
        Returnes link assumption dictionary

        Args:
            autodep_ok (bool, optional): If true, autodependecy link assumption = -->. Otherwise -?>. Defaults to False.

        Returns:
            dict: link assumption dictionary
        """
        link_assump = {self.features.index(f): dict() for f in self.features}
        for t in self.g:
            for s in self.g[t].sources:
                if autodep_ok and s[0] == t: # NOTE: new condition added in order to not control twice the autodependency links
                    link_assump[self.features.index(t)][(self.features.index(s[0]), -abs(s[1]))] = '-->'

                elif s[0] not in list(self.sys_context.values()):
                    link_assump[self.features.index(t)][(self.features.index(s[0]), -abs(s[1]))] = '-?>'

                elif t in self.sys_context.keys() and s[0] == self.sys_context[t]:
                    link_assump[self.features.index(t)][(self.features.index(s[0]), -abs(s[1]))] = '-->'

        return link_assump


    def get_SCM(self) -> dict:   
        """
        Returns SCM

        Returns:
            dict: SCM
        """
        scm = {v: list() for v in self.features}
        for t in self.g:
            for s in self.g[t].sources:
                scm[t].append((s[0], -abs(s[1]))) 
        return scm


    def get_parents(self) -> dict:
        """
        Returns Parents dict

        Returns:
            dict: Parents dict
        """
        scm = {self.features.index(v): list() for v in self.features}
        for t in self.g:
            for s in self.g[t].sources:
                scm[self.features.index(t)].append((self.features.index(s[0]), -abs(s[1]))) 
        return scm


    def make_pretty(self) -> dict:
        """
        Makes variables' names pretty, i.e. $ varname $

        Returns:
            dict: pretty DAG
        """
        pretty = dict()
        for t in self.g:
            p_t = '$' + t + '$'
            pretty[p_t] = copy.deepcopy(self.g[t])
            pretty[p_t].name = p_t
            pretty[p_t].children = ['$' + c + '$' for c in self.g[t].children]
            for s in self.g[t].sources:
                del pretty[p_t].sources[s]
                p_s = '$' + s[0] + '$'
                pretty[p_t].sources[(p_s, s[1])] = {SCORE: self.g[t].sources[s][SCORE], PVAL: self.g[t].sources[s][PVAL]}
        return pretty


    def dag(self,
            node_layout = 'dot',
            min_width = 1, max_width = 5,
            min_score = 0, max_score = 1,
            node_size = 8, node_color = 'orange',
            edge_color = 'grey',
            bundle_parallel_edges = True,
            font_size = 12,
            label_type = LabelType.Lag,
            save_name = None,
            img_extention = ImageExt.PNG):
        """
        build a dag

        Args:
            node_layout (str, optional): Node layout. Defaults to 'dot'.
            min_width (int, optional): minimum linewidth. Defaults to 1.
            max_width (int, optional): maximum linewidth. Defaults to 5.
            min_score (int, optional): minimum score range. Defaults to 0.
            max_score (int, optional): maximum score range. Defaults to 1.
            node_size (int, optional): node size. Defaults to 8.
            node_color (str, optional): node color. Defaults to 'orange'.
            edge_color (str, optional): edge color. Defaults to 'grey'.
            bundle_parallel_edges (str, optional): bundle parallel edge bit. Defaults to True.
            font_size (int, optional): font size. Defaults to 12.
            label_type (LabelType, optional): enum to set whether to show the lag time (LabelType.Lag) or the strength (LabelType.Score) of the dependencies on each link/node or not showing the labels (LabelType.NoLabels). Default LabelType.Lag.
            save_name (str, optional): Filename path. If None, plot is shown and not saved. Defaults to None.
        """
        r = copy.deepcopy(self)
        r.g = r.make_pretty()

        G = nx.DiGraph()

        # NODES DEFINITION
        G.add_nodes_from(r.g.keys())

        # BORDER LINE
        border = dict()
        for t in r.g:
            border[t] = 0
            if r.g[t].is_autodependent:
                autodep = r.g[t].get_max_autodependent
                border[t] = max(self.__scale(r.g[t].sources[autodep][SCORE], min_width, max_width, min_score, max_score), border[t])

        # BORDER LABEL
        node_label = None
        if label_type == LabelType.Lag or label_type == LabelType.Score:
            node_label = {t: [] for t in r.g.keys()}
            for t in r.g:
                if r.g[t].is_autodependent:
                    autodep = r.g[t].get_max_autodependent
                    if label_type == LabelType.Lag:
                        node_label[t].append(autodep[1])
                    elif label_type == LabelType.Score:
                        node_label[t].append(round(r.g[t].sources[autodep][SCORE], 3))
                node_label[t] = ",".join(str(s) for s in node_label[t])


        # EDGE DEFINITION
        edges = [(s[0], t) for t in r.g for s in r.g[t].sources if t != s[0]]
        G.add_edges_from(edges)

        # EDGE LINE
        edge_width = {(s[0], t): 0 for t in r.g for s in r.g[t].sources if t != s[0]}
        for t in r.g:
            for s in r.g[t].sources:
                if t != s[0]:
                    edge_width[(s[0], t)] = max(self.__scale(r.g[t].sources[s][SCORE], min_width, max_width, min_score, max_score), edge_width[(s[0], t)])

        # EDGE LABEL
        edge_label = None
        if label_type == LabelType.Lag or label_type == LabelType.Score:
            edge_label = {(s[0], t): [] for t in r.g for s in r.g[t].sources if t != s[0]}
            for t in r.g:
                for s in r.g[t].sources:
                    if t != s[0]:
                        if label_type == LabelType.Lag:
                            edge_label[(s[0], t)].append(s[1])
                        elif label_type == LabelType.Score:
                            edge_label[(s[0], t)].append(round(r.g[t].sources[s][SCORE], 3))
            for k in edge_label.keys():
                edge_label[k] = ",".join(str(s) for s in edge_label[k])

        fig, ax = plt.subplots(figsize=(8,6))

        if edges:
            a = Graph(G, 
                    node_layout = node_layout,
                    node_size = node_size,
                    node_color = node_color,
                    node_labels = node_label,
                    node_edge_width = border,
                    node_label_fontdict = dict(size=font_size),
                    node_edge_color = edge_color,
                    node_label_offset = 0.1,
                    node_alpha = 1,

                    arrows = True,
                    edge_layout = 'curved',
                    edge_label = label_type != LabelType.NoLabels,
                    edge_labels = edge_label,
                    edge_label_fontdict = dict(size=font_size),
                    edge_color = edge_color, 
                    edge_width = edge_width,
                    edge_alpha = 1,
                    edge_zorder = 1,
                    edge_label_position = 0.35,
                    edge_layout_kwargs = dict(bundle_parallel_edges = bundle_parallel_edges, k = 0.05))

            nx.draw_networkx_labels(G, 
                                    pos = a.node_positions,
                                    labels = {n: n for n in G},
                                    font_size = font_size)

        if save_name is not None:
            plt.savefig(save_name + img_extention.value, dpi = 300)
        else:
            plt.show()


    def ts_dag(self,
               tau,
               min_width = 1, max_width = 5,
               min_score = 0, max_score = 1,
               node_size = 8,
               node_proximity = 2,
               node_color = 'orange',
               edge_color = 'grey',
               font_size = 12,
               save_name = None,
               img_extention = ImageExt.PNG):
        """
        build a timeseries dag

        Args:
            tau (int): max time lag
            min_width (int, optional): minimum linewidth. Defaults to 1.
            max_width (int, optional): maximum linewidth. Defaults to 5.
            min_score (int, optional): minimum score range. Defaults to 0.
            max_score (int, optional): maximum score range. Defaults to 1.
            node_size (int, optional): node size. Defaults to 8.
            node_proximity (int, optional): node proximity. Defaults to 2.
            node_color (str, optional): node color. Defaults to 'orange'.
            edge_color (str, optional): edge color. Defaults to 'grey'.
            font_size (int, optional): font size. Defaults to 12.
            save_name (str, optional): Filename path. If None, plot is shown and not saved. Defaults to None.
        """

        r = copy.deepcopy(self)
        r.g = r.make_pretty()

        # add nodes
        G = nx.grid_2d_graph(tau + 1, len(r.g.keys()))
        pos = {n : (n[0], n[1]/node_proximity) for n in G.nodes()}
        scale = max(pos.values())
        G.remove_edges_from(G.edges())

        # Nodes color definition
        # node_c = ['tab:blue', 'tab:orange','tab:red', 'tab:purple']
        # node_c = ['tab:blue', 'tab:orange', 'tab:green', 'tab:red', 'tab:purple']
        # node_color = dict()
        # tmpG = nx.grid_2d_graph(self.max_lag + 1, len(r.g.keys()))
        # for n in tmpG.nodes():
        #     node_color[n] = node_c[abs(n[1] - (len(r.g.keys()) - 1))]

        # edges definition
        edges = list()
        edge_width = dict()
        for t in r.g:
            for s in r.g[t].sources:
                s_index = len(r.g.keys())-1 - list(r.g.keys()).index(s[0])
                t_index = len(r.g.keys())-1 - list(r.g.keys()).index(t)

                s_lag = tau - s[1]
                t_lag = tau
                while s_lag >= 0:
                    s_node = (s_lag, s_index)
                    t_node = (t_lag, t_index)
                    edges.append((s_node, t_node))
                    edge_width[(s_node, t_node)] = self.__scale(r.g[t].sources[s][SCORE], min_width, max_width, min_score, max_score)
                    s_lag -= s[1]
                    t_lag -= s[1]

        G.add_edges_from(edges)

        # label definition
        labeldict = {}
        for n in G.nodes():
            if n[0] == 0:
                labeldict[n] = list(r.g.keys())[len(r.g.keys()) - 1 - n[1]]

        fig, ax = plt.subplots(figsize=(8,6))

        # time line text drawing
        pos_tau = set([pos[p][0] for p in pos])
        max_y = max([pos[p][1] for p in pos])
        for p in pos_tau:
            if abs(int(p) - tau) == 0:
                ax.text(p, max_y + .3, r"$t$", horizontalalignment='center', fontsize=font_size)
            else:
                ax.text(p, max_y + .3, r"$t-" + str(abs(int(p) - tau)) + "$", horizontalalignment='center', fontsize=font_size)

        Graph(G,
            node_layout = {p : np.array(pos[p]) for p in pos},
            node_size = node_size,
            node_color = node_color,
            node_labels = labeldict,
            node_label_offset = 0,
            node_edge_width = 0,
            node_label_fontdict = dict(size=font_size),
            node_alpha = 1,

            arrows = True,
            edge_layout = 'curved',
            edge_label = False,
            edge_color = edge_color, 
            edge_width = edge_width,
            edge_alpha = 1,
            edge_zorder = 1,
            scale = (scale[0] + 2, scale[1] + 2))

        if save_name is not None:
            plt.savefig(save_name + img_extention.value, dpi = 300)
        else:
            plt.show()


    def __scale(self, score, min_width, max_width, min_score = 0, max_score = 1):
        """
        Scales the score of the cause-effect relationship strength to a linewitdth

        Args:
            score (float): score to scale
            min_width (float): minimum linewidth
            max_width (float): maximum linewidth
            min_score (int, optional): minimum score range. Defaults to 0.
            max_score (int, optional): maximum score range. Defaults to 1.

        Returns:
            (float): scaled score
        """
        return ((score - min_score) / (max_score - min_score)) * (max_width - min_width) + min_width


    def get_skeleton(self) -> np.array:
        """
        Returns skeleton matrix.
        Skeleton matrix is composed by 0 and 1.
        1 <- if there is a link from source to target 
        0 <- if there is not a link from source to target 

        Returns:
            np.array: skeleton matrix
        """
        r = [np.zeros(shape=(len(self.features), len(self.features)), dtype = np.int32) for _ in range(self.min_lag, self.max_lag + 1)]
        for l in range(self.min_lag, self.max_lag + 1):
            for t in self.g.keys():
                for s in self.g[t].sources:
                    if s[1] == l: r[l - self.min_lag][self.features.index(t), self.features.index(s[0])] = 1
        return np.array(r)


    def get_val_matrix(self) -> np.array:
        """
        Returns val matrix.
        val matrix contains information about the strength of the links componing the causal model.

        Returns:
            np.array: val matrix
        """
        r = [np.zeros(shape=(len(self.features), len(self.features))) for _ in range(self.min_lag, self.max_lag + 1)]
        for l in range(self.min_lag, self.max_lag + 1):
            for t in self.g.keys():
                for s, info in self.g[t].sources.items():
                    if s[1] == l: r[l - self.min_lag][self.features.index(t), self.features.index(s[0])] = info['score']
        return np.array(r)


    def get_pval_matrix(self) -> np.array:
        """
        Returns pval matrix.
        pval matrix contains information about the pval of the links componing the causal model.

        Returns:
            np.array: pval matrix
        """
        r = [np.zeros(shape=(len(self.features), len(self.features))) for _ in range(self.min_lag, self.max_lag + 1)]
        for l in range(self.min_lag, self.max_lag + 1):
            for t in self.g.keys():
                for s, info in self.g[t].sources.items():
                    if s[1] == l: r[l - self.min_lag][self.features.index(t), self.features.index(s[0])] = info['pval']
        return np.array(r)

autodep_nodes: list property

Autodependent nodes list

Returns:

Name Type Description
list list

Autodependent nodes list

features: list property

Features list

Returns:

Name Type Description
list list

Features list

Intervention links list

Returns:

Name Type Description
list list

Intervention link list

__init__(var_names, min_lag, max_lag, neglect_autodep=False, scm=None)

DAG constructor

Parameters:

Name Type Description Default
var_names list

description

required
min_lag int

description

required
max_lag int

description

required
neglect_autodep bool

description. Defaults to False.

False
scm dict

description. Defaults to None.

None
Source code in fpcmci/graph/DAG.py
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
def __init__(self, var_names, min_lag, max_lag, neglect_autodep = False, scm = None):
    """
    DAG constructor

    Args:
        var_names (list): _description_
        min_lag (int): _description_
        max_lag (int): _description_
        neglect_autodep (bool, optional): _description_. Defaults to False.
        scm (dict, optional): _description_. Defaults to None.
    """
    self.g = {var: Node(var, neglect_autodep) for var in var_names}
    self.neglect_autodep = neglect_autodep
    self.sys_context = dict()
    self.min_lag = min_lag
    self.max_lag = max_lag

    if scm is not None:
        for t in scm:
            for s in scm[t]: self.add_source(t, s[0], 0.3, 0, s[1])

__scale(score, min_width, max_width, min_score=0, max_score=1)

Scales the score of the cause-effect relationship strength to a linewitdth

Parameters:

Name Type Description Default
score float

score to scale

required
min_width float

minimum linewidth

required
max_width float

maximum linewidth

required
min_score int

minimum score range. Defaults to 0.

0
max_score int

maximum score range. Defaults to 1.

1

Returns:

Type Description
float

scaled score

Source code in fpcmci/graph/DAG.py
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
def __scale(self, score, min_width, max_width, min_score = 0, max_score = 1):
    """
    Scales the score of the cause-effect relationship strength to a linewitdth

    Args:
        score (float): score to scale
        min_width (float): minimum linewidth
        max_width (float): maximum linewidth
        min_score (int, optional): minimum score range. Defaults to 0.
        max_score (int, optional): maximum score range. Defaults to 1.

    Returns:
        (float): scaled score
    """
    return ((score - min_score) / (max_score - min_score)) * (max_width - min_width) + min_width

add_context()

Adds context variables

Source code in fpcmci/graph/DAG.py
125
126
127
128
129
130
131
132
133
134
135
136
137
138
def add_context(self):
    """
    Adds context variables
    """
    for sys_var, context_var in self.sys_context.items():
        if sys_var in self.features:

            # Adding context var to the graph
            self.g[context_var] = Node(context_var, self.neglect_autodep)

            # Adding context var to sys var
            self.g[sys_var].intervention_node = True
            self.g[sys_var].associated_context = context_var
            self.add_source(sys_var, context_var, 1, 0, 1)

add_source(t, s, score, pval, lag)

Adds source node to a target node

Parameters:

Name Type Description Default
t str

target node name

required
s str

source node name

required
score float

dependency score

required
pval float

dependency p-value

required
lag int

dependency lag

required
Source code in fpcmci/graph/DAG.py
85
86
87
88
89
90
91
92
93
94
95
96
97
def add_source(self, t, s, score, pval, lag):
    """
    Adds source node to a target node

    Args:
        t (str): target node name
        s (str): source node name
        score (float): dependency score
        pval (float): dependency p-value
        lag (int): dependency lag
    """
    self.g[t].sources[(s, abs(lag))] = {SCORE: score, PVAL: pval}
    self.g[s].children.append(t)

dag(node_layout='dot', min_width=1, max_width=5, min_score=0, max_score=1, node_size=8, node_color='orange', edge_color='grey', bundle_parallel_edges=True, font_size=12, label_type=LabelType.Lag, save_name=None, img_extention=ImageExt.PNG)

build a dag

Parameters:

Name Type Description Default
node_layout str

Node layout. Defaults to 'dot'.

'dot'
min_width int

minimum linewidth. Defaults to 1.

1
max_width int

maximum linewidth. Defaults to 5.

5
min_score int

minimum score range. Defaults to 0.

0
max_score int

maximum score range. Defaults to 1.

1
node_size int

node size. Defaults to 8.

8
node_color str

node color. Defaults to 'orange'.

'orange'
edge_color str

edge color. Defaults to 'grey'.

'grey'
bundle_parallel_edges str

bundle parallel edge bit. Defaults to True.

True
font_size int

font size. Defaults to 12.

12
label_type LabelType

enum to set whether to show the lag time (LabelType.Lag) or the strength (LabelType.Score) of the dependencies on each link/node or not showing the labels (LabelType.NoLabels). Default LabelType.Lag.

LabelType.Lag
save_name str

Filename path. If None, plot is shown and not saved. Defaults to None.

None
Source code in fpcmci/graph/DAG.py
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
def dag(self,
        node_layout = 'dot',
        min_width = 1, max_width = 5,
        min_score = 0, max_score = 1,
        node_size = 8, node_color = 'orange',
        edge_color = 'grey',
        bundle_parallel_edges = True,
        font_size = 12,
        label_type = LabelType.Lag,
        save_name = None,
        img_extention = ImageExt.PNG):
    """
    build a dag

    Args:
        node_layout (str, optional): Node layout. Defaults to 'dot'.
        min_width (int, optional): minimum linewidth. Defaults to 1.
        max_width (int, optional): maximum linewidth. Defaults to 5.
        min_score (int, optional): minimum score range. Defaults to 0.
        max_score (int, optional): maximum score range. Defaults to 1.
        node_size (int, optional): node size. Defaults to 8.
        node_color (str, optional): node color. Defaults to 'orange'.
        edge_color (str, optional): edge color. Defaults to 'grey'.
        bundle_parallel_edges (str, optional): bundle parallel edge bit. Defaults to True.
        font_size (int, optional): font size. Defaults to 12.
        label_type (LabelType, optional): enum to set whether to show the lag time (LabelType.Lag) or the strength (LabelType.Score) of the dependencies on each link/node or not showing the labels (LabelType.NoLabels). Default LabelType.Lag.
        save_name (str, optional): Filename path. If None, plot is shown and not saved. Defaults to None.
    """
    r = copy.deepcopy(self)
    r.g = r.make_pretty()

    G = nx.DiGraph()

    # NODES DEFINITION
    G.add_nodes_from(r.g.keys())

    # BORDER LINE
    border = dict()
    for t in r.g:
        border[t] = 0
        if r.g[t].is_autodependent:
            autodep = r.g[t].get_max_autodependent
            border[t] = max(self.__scale(r.g[t].sources[autodep][SCORE], min_width, max_width, min_score, max_score), border[t])

    # BORDER LABEL
    node_label = None
    if label_type == LabelType.Lag or label_type == LabelType.Score:
        node_label = {t: [] for t in r.g.keys()}
        for t in r.g:
            if r.g[t].is_autodependent:
                autodep = r.g[t].get_max_autodependent
                if label_type == LabelType.Lag:
                    node_label[t].append(autodep[1])
                elif label_type == LabelType.Score:
                    node_label[t].append(round(r.g[t].sources[autodep][SCORE], 3))
            node_label[t] = ",".join(str(s) for s in node_label[t])


    # EDGE DEFINITION
    edges = [(s[0], t) for t in r.g for s in r.g[t].sources if t != s[0]]
    G.add_edges_from(edges)

    # EDGE LINE
    edge_width = {(s[0], t): 0 for t in r.g for s in r.g[t].sources if t != s[0]}
    for t in r.g:
        for s in r.g[t].sources:
            if t != s[0]:
                edge_width[(s[0], t)] = max(self.__scale(r.g[t].sources[s][SCORE], min_width, max_width, min_score, max_score), edge_width[(s[0], t)])

    # EDGE LABEL
    edge_label = None
    if label_type == LabelType.Lag or label_type == LabelType.Score:
        edge_label = {(s[0], t): [] for t in r.g for s in r.g[t].sources if t != s[0]}
        for t in r.g:
            for s in r.g[t].sources:
                if t != s[0]:
                    if label_type == LabelType.Lag:
                        edge_label[(s[0], t)].append(s[1])
                    elif label_type == LabelType.Score:
                        edge_label[(s[0], t)].append(round(r.g[t].sources[s][SCORE], 3))
        for k in edge_label.keys():
            edge_label[k] = ",".join(str(s) for s in edge_label[k])

    fig, ax = plt.subplots(figsize=(8,6))

    if edges:
        a = Graph(G, 
                node_layout = node_layout,
                node_size = node_size,
                node_color = node_color,
                node_labels = node_label,
                node_edge_width = border,
                node_label_fontdict = dict(size=font_size),
                node_edge_color = edge_color,
                node_label_offset = 0.1,
                node_alpha = 1,

                arrows = True,
                edge_layout = 'curved',
                edge_label = label_type != LabelType.NoLabels,
                edge_labels = edge_label,
                edge_label_fontdict = dict(size=font_size),
                edge_color = edge_color, 
                edge_width = edge_width,
                edge_alpha = 1,
                edge_zorder = 1,
                edge_label_position = 0.35,
                edge_layout_kwargs = dict(bundle_parallel_edges = bundle_parallel_edges, k = 0.05))

        nx.draw_networkx_labels(G, 
                                pos = a.node_positions,
                                labels = {n: n for n in G},
                                font_size = font_size)

    if save_name is not None:
        plt.savefig(save_name + img_extention.value, dpi = 300)
    else:
        plt.show()

del_source(t, s, lag)

Removes source node from a target node

Parameters:

Name Type Description Default
t str

target node name

required
s str

source node name

required
lag int

dependency lag

required
Source code in fpcmci/graph/DAG.py
100
101
102
103
104
105
106
107
108
109
110
def del_source(self, t, s, lag):
    """
    Removes source node from a target node

    Args:
        t (str): target node name
        s (str): source node name
        lag (int): dependency lag
    """
    del self.g[t].sources[(s, lag)]
    self.g[s].children.remove(t)

fully_connected_dag()

Build a fully connected DAG

Source code in fpcmci/graph/DAG.py
76
77
78
79
80
81
82
def fully_connected_dag(self):
    """
    Build a fully connected DAG
    """
    for t in self.g:
        for s in self.g:
            for l in range(1, self.max_lag + 1): self.add_source(t, s, 1, 0, l)

get_SCM()

Returns SCM

Returns:

Name Type Description
dict dict

SCM

Source code in fpcmci/graph/DAG.py
182
183
184
185
186
187
188
189
190
191
192
193
def get_SCM(self) -> dict:   
    """
    Returns SCM

    Returns:
        dict: SCM
    """
    scm = {v: list() for v in self.features}
    for t in self.g:
        for s in self.g[t].sources:
            scm[t].append((s[0], -abs(s[1]))) 
    return scm

Returnes link assumption dictionary

Parameters:

Name Type Description Default
autodep_ok bool

If true, autodependecy link assumption = -->. Otherwise -?>. Defaults to False.

False

Returns:

Name Type Description
dict dict

link assumption dictionary

Source code in fpcmci/graph/DAG.py
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
def get_link_assumptions(self, autodep_ok = False) -> dict:
    """
    Returnes link assumption dictionary

    Args:
        autodep_ok (bool, optional): If true, autodependecy link assumption = -->. Otherwise -?>. Defaults to False.

    Returns:
        dict: link assumption dictionary
    """
    link_assump = {self.features.index(f): dict() for f in self.features}
    for t in self.g:
        for s in self.g[t].sources:
            if autodep_ok and s[0] == t: # NOTE: new condition added in order to not control twice the autodependency links
                link_assump[self.features.index(t)][(self.features.index(s[0]), -abs(s[1]))] = '-->'

            elif s[0] not in list(self.sys_context.values()):
                link_assump[self.features.index(t)][(self.features.index(s[0]), -abs(s[1]))] = '-?>'

            elif t in self.sys_context.keys() and s[0] == self.sys_context[t]:
                link_assump[self.features.index(t)][(self.features.index(s[0]), -abs(s[1]))] = '-->'

    return link_assump

get_parents()

Returns Parents dict

Returns:

Name Type Description
dict dict

Parents dict

Source code in fpcmci/graph/DAG.py
196
197
198
199
200
201
202
203
204
205
206
207
def get_parents(self) -> dict:
    """
    Returns Parents dict

    Returns:
        dict: Parents dict
    """
    scm = {self.features.index(v): list() for v in self.features}
    for t in self.g:
        for s in self.g[t].sources:
            scm[self.features.index(t)].append((self.features.index(s[0]), -abs(s[1]))) 
    return scm

get_pval_matrix()

Returns pval matrix. pval matrix contains information about the pval of the links componing the causal model.

Returns:

Type Description
np.array

np.array: pval matrix

Source code in fpcmci/graph/DAG.py
508
509
510
511
512
513
514
515
516
517
518
519
520
521
def get_pval_matrix(self) -> np.array:
    """
    Returns pval matrix.
    pval matrix contains information about the pval of the links componing the causal model.

    Returns:
        np.array: pval matrix
    """
    r = [np.zeros(shape=(len(self.features), len(self.features))) for _ in range(self.min_lag, self.max_lag + 1)]
    for l in range(self.min_lag, self.max_lag + 1):
        for t in self.g.keys():
            for s, info in self.g[t].sources.items():
                if s[1] == l: r[l - self.min_lag][self.features.index(t), self.features.index(s[0])] = info['pval']
    return np.array(r)

get_skeleton()

Returns skeleton matrix. Skeleton matrix is composed by 0 and 1. 1 <- if there is a link from source to target 0 <- if there is not a link from source to target

Returns:

Type Description
np.array

np.array: skeleton matrix

Source code in fpcmci/graph/DAG.py
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
def get_skeleton(self) -> np.array:
    """
    Returns skeleton matrix.
    Skeleton matrix is composed by 0 and 1.
    1 <- if there is a link from source to target 
    0 <- if there is not a link from source to target 

    Returns:
        np.array: skeleton matrix
    """
    r = [np.zeros(shape=(len(self.features), len(self.features)), dtype = np.int32) for _ in range(self.min_lag, self.max_lag + 1)]
    for l in range(self.min_lag, self.max_lag + 1):
        for t in self.g.keys():
            for s in self.g[t].sources:
                if s[1] == l: r[l - self.min_lag][self.features.index(t), self.features.index(s[0])] = 1
    return np.array(r)

get_val_matrix()

Returns val matrix. val matrix contains information about the strength of the links componing the causal model.

Returns:

Type Description
np.array

np.array: val matrix

Source code in fpcmci/graph/DAG.py
492
493
494
495
496
497
498
499
500
501
502
503
504
505
def get_val_matrix(self) -> np.array:
    """
    Returns val matrix.
    val matrix contains information about the strength of the links componing the causal model.

    Returns:
        np.array: val matrix
    """
    r = [np.zeros(shape=(len(self.features), len(self.features))) for _ in range(self.min_lag, self.max_lag + 1)]
    for l in range(self.min_lag, self.max_lag + 1):
        for t in self.g.keys():
            for s, info in self.g[t].sources.items():
                if s[1] == l: r[l - self.min_lag][self.features.index(t), self.features.index(s[0])] = info['score']
    return np.array(r)

make_pretty()

Makes variables' names pretty, i.e. $ varname $

Returns:

Name Type Description
dict dict

pretty DAG

Source code in fpcmci/graph/DAG.py
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
def make_pretty(self) -> dict:
    """
    Makes variables' names pretty, i.e. $ varname $

    Returns:
        dict: pretty DAG
    """
    pretty = dict()
    for t in self.g:
        p_t = '$' + t + '$'
        pretty[p_t] = copy.deepcopy(self.g[t])
        pretty[p_t].name = p_t
        pretty[p_t].children = ['$' + c + '$' for c in self.g[t].children]
        for s in self.g[t].sources:
            del pretty[p_t].sources[s]
            p_s = '$' + s[0] + '$'
            pretty[p_t].sources[(p_s, s[1])] = {SCORE: self.g[t].sources[s][SCORE], PVAL: self.g[t].sources[s][PVAL]}
    return pretty

remove_context()

Remove context variables

Source code in fpcmci/graph/DAG.py
141
142
143
144
145
146
147
148
149
150
151
152
153
154
def remove_context(self):
    """
    Remove context variables
    """
    for sys_var, context_var in self.sys_context.items():
        if sys_var in self.g:

            # Removing context var from sys var
            # self.g[sys_var].intervention_node = False
            self.g[sys_var].associated_context = None
            self.del_source(sys_var, context_var, 1)

            # Removing context var from dag
            del self.g[context_var]

remove_unneeded_features()

Removes isolated nodes

Source code in fpcmci/graph/DAG.py
113
114
115
116
117
118
119
120
121
122
def remove_unneeded_features(self):
    """
    Removes isolated nodes
    """
    tmp = copy.deepcopy(self.g)
    for t in self.g.keys():
        if self.g[t].is_isolated: 
            if self.g[t].intervention_node: del tmp[self.g[t].associated_context] # FIXME: last edit to be tested
            del tmp[t]
    self.g = tmp

ts_dag(tau, min_width=1, max_width=5, min_score=0, max_score=1, node_size=8, node_proximity=2, node_color='orange', edge_color='grey', font_size=12, save_name=None, img_extention=ImageExt.PNG)

build a timeseries dag

Parameters:

Name Type Description Default
tau int

max time lag

required
min_width int

minimum linewidth. Defaults to 1.

1
max_width int

maximum linewidth. Defaults to 5.

5
min_score int

minimum score range. Defaults to 0.

0
max_score int

maximum score range. Defaults to 1.

1
node_size int

node size. Defaults to 8.

8
node_proximity int

node proximity. Defaults to 2.

2
node_color str

node color. Defaults to 'orange'.

'orange'
edge_color str

edge color. Defaults to 'grey'.

'grey'
font_size int

font size. Defaults to 12.

12
save_name str

Filename path. If None, plot is shown and not saved. Defaults to None.

None
Source code in fpcmci/graph/DAG.py
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
def ts_dag(self,
           tau,
           min_width = 1, max_width = 5,
           min_score = 0, max_score = 1,
           node_size = 8,
           node_proximity = 2,
           node_color = 'orange',
           edge_color = 'grey',
           font_size = 12,
           save_name = None,
           img_extention = ImageExt.PNG):
    """
    build a timeseries dag

    Args:
        tau (int): max time lag
        min_width (int, optional): minimum linewidth. Defaults to 1.
        max_width (int, optional): maximum linewidth. Defaults to 5.
        min_score (int, optional): minimum score range. Defaults to 0.
        max_score (int, optional): maximum score range. Defaults to 1.
        node_size (int, optional): node size. Defaults to 8.
        node_proximity (int, optional): node proximity. Defaults to 2.
        node_color (str, optional): node color. Defaults to 'orange'.
        edge_color (str, optional): edge color. Defaults to 'grey'.
        font_size (int, optional): font size. Defaults to 12.
        save_name (str, optional): Filename path. If None, plot is shown and not saved. Defaults to None.
    """

    r = copy.deepcopy(self)
    r.g = r.make_pretty()

    # add nodes
    G = nx.grid_2d_graph(tau + 1, len(r.g.keys()))
    pos = {n : (n[0], n[1]/node_proximity) for n in G.nodes()}
    scale = max(pos.values())
    G.remove_edges_from(G.edges())

    # Nodes color definition
    # node_c = ['tab:blue', 'tab:orange','tab:red', 'tab:purple']
    # node_c = ['tab:blue', 'tab:orange', 'tab:green', 'tab:red', 'tab:purple']
    # node_color = dict()
    # tmpG = nx.grid_2d_graph(self.max_lag + 1, len(r.g.keys()))
    # for n in tmpG.nodes():
    #     node_color[n] = node_c[abs(n[1] - (len(r.g.keys()) - 1))]

    # edges definition
    edges = list()
    edge_width = dict()
    for t in r.g:
        for s in r.g[t].sources:
            s_index = len(r.g.keys())-1 - list(r.g.keys()).index(s[0])
            t_index = len(r.g.keys())-1 - list(r.g.keys()).index(t)

            s_lag = tau - s[1]
            t_lag = tau
            while s_lag >= 0:
                s_node = (s_lag, s_index)
                t_node = (t_lag, t_index)
                edges.append((s_node, t_node))
                edge_width[(s_node, t_node)] = self.__scale(r.g[t].sources[s][SCORE], min_width, max_width, min_score, max_score)
                s_lag -= s[1]
                t_lag -= s[1]

    G.add_edges_from(edges)

    # label definition
    labeldict = {}
    for n in G.nodes():
        if n[0] == 0:
            labeldict[n] = list(r.g.keys())[len(r.g.keys()) - 1 - n[1]]

    fig, ax = plt.subplots(figsize=(8,6))

    # time line text drawing
    pos_tau = set([pos[p][0] for p in pos])
    max_y = max([pos[p][1] for p in pos])
    for p in pos_tau:
        if abs(int(p) - tau) == 0:
            ax.text(p, max_y + .3, r"$t$", horizontalalignment='center', fontsize=font_size)
        else:
            ax.text(p, max_y + .3, r"$t-" + str(abs(int(p) - tau)) + "$", horizontalalignment='center', fontsize=font_size)

    Graph(G,
        node_layout = {p : np.array(pos[p]) for p in pos},
        node_size = node_size,
        node_color = node_color,
        node_labels = labeldict,
        node_label_offset = 0,
        node_edge_width = 0,
        node_label_fontdict = dict(size=font_size),
        node_alpha = 1,

        arrows = True,
        edge_layout = 'curved',
        edge_label = False,
        edge_color = edge_color, 
        edge_width = edge_width,
        edge_alpha = 1,
        edge_zorder = 1,
        scale = (scale[0] + 2, scale[1] + 2))

    if save_name is not None:
        plt.savefig(save_name + img_extention.value, dpi = 300)
    else:
        plt.show()
Source code in fpcmci/graph/Node.py
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
class Node():

    def __init__(self, name, neglect_autodep):
        """
        Node class contructer

        Args:
            name (str): node name
            neglect_autodep (bool): flag to decide whether to to skip the node if it is only auto-dependent
        """
        self.name = name
        self.sources = dict()
        self.children = list()
        self.neglect_autodep = neglect_autodep
        self.intervention_node = False        
        self.associated_context = None        


    @property
    def is_autodependent(self) -> bool:
        """
        Returns True if the node is autodependent

        Returns:
            bool: Returns True if the node is autodependent. Otherwise False
        """
        return self.name in self.sourcelist


    @property
    def is_isolated(self) -> bool:
        """
        Returns True if the node is isolated

        Returns:
            bool: Returns True if the node is isolated. Otherwise False
        """
        if self.neglect_autodep:
            return (self.is_exogenous or self.is_only_autodep or self.is_only_autodep_context) and not self.has_child

        return (self.is_exogenous or self.has_only_context) and not self.has_child


    @property
    def is_only_autodep(self) -> bool:
        """
        Returns True if the node is ONLY auto-dependent

        Returns:
            bool: Returns True if the node is ONLY auto-dependent. Otherwise False
        """
        return len(self.sources) == 1 and self.name in self.sourcelist


    @property
    def has_only_context(self) -> bool:
        """
        Returns True if the node has ONLY the context variable as parent

        Returns:
            bool: Returns True if the node has ONLY the context variable as parent. Otherwise False
        """
        return len(self.sources) == 1 and self.associated_context in self.sourcelist


    @property
    def is_only_autodep_context(self) -> bool:
        """
        Returns True if the node has ONLY the context variable and itself as parent

        Returns:
            bool: Returns True if the node has ONLY the context variable and itself as parent. Otherwise False
        """
        return len(self.sources) == 2 and self.name in self.sourcelist and self.associated_context in self.sourcelist


    @property
    def is_exogenous(self) -> bool:
        """
        Returns True if the node has no parents

        Returns:
            bool: Returns True if the node has no parents. Otherwise False
        """
        return len(self.sources) == 0


    @property
    def has_child(self) -> bool:
        """
        Returns True if the node has at least one child

        Returns:
            bool: Returns True if the node has at least one child. Otherwise False
        """
        tmp = copy.deepcopy(self.children)
        if self.name in tmp:
            tmp.remove(self.name)
        return len(tmp) > 0


    @property
    def sourcelist(self) -> list:
        """
        Returns list of source names

        Returns:
            list: Returns list of source names
        """
        return [s[0] for s in self.sources]


    @property
    def autodependency_links(self) -> list:
        """
        Returns list of autodependency links

        Returns:
            list: Returns list of autodependency links

        """
        autodep_links = list()
        if self.is_autodependent:
            for s in self.sources: 
                if s[0] == self.name: 
                    autodep_links.append(s)
        return autodep_links


    @property
    def get_max_autodependent(self) -> float:
        """
        Returns max score of autodependent link

        Returns:
            float: Returns max score of autodependent link
        """
        max_score = 0
        max_s = None
        if self.is_autodependent:
            for s in self.sources: 
                if s[0] == self.name:
                    if self.sources[s][SCORE] > max_score: max_s = s
        return max_s

Returns list of autodependency links

Returns:

Name Type Description
list list

Returns list of autodependency links

get_max_autodependent: float property

Returns max score of autodependent link

Returns:

Name Type Description
float float

Returns max score of autodependent link

has_child: bool property

Returns True if the node has at least one child

Returns:

Name Type Description
bool bool

Returns True if the node has at least one child. Otherwise False

has_only_context: bool property

Returns True if the node has ONLY the context variable as parent

Returns:

Name Type Description
bool bool

Returns True if the node has ONLY the context variable as parent. Otherwise False

is_autodependent: bool property

Returns True if the node is autodependent

Returns:

Name Type Description
bool bool

Returns True if the node is autodependent. Otherwise False

is_exogenous: bool property

Returns True if the node has no parents

Returns:

Name Type Description
bool bool

Returns True if the node has no parents. Otherwise False

is_isolated: bool property

Returns True if the node is isolated

Returns:

Name Type Description
bool bool

Returns True if the node is isolated. Otherwise False

is_only_autodep: bool property

Returns True if the node is ONLY auto-dependent

Returns:

Name Type Description
bool bool

Returns True if the node is ONLY auto-dependent. Otherwise False

is_only_autodep_context: bool property

Returns True if the node has ONLY the context variable and itself as parent

Returns:

Name Type Description
bool bool

Returns True if the node has ONLY the context variable and itself as parent. Otherwise False

sourcelist: list property

Returns list of source names

Returns:

Name Type Description
list list

Returns list of source names

__init__(name, neglect_autodep)

Node class contructer

Parameters:

Name Type Description Default
name str

node name

required
neglect_autodep bool

flag to decide whether to to skip the node if it is only auto-dependent

required
Source code in fpcmci/graph/Node.py
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
def __init__(self, name, neglect_autodep):
    """
    Node class contructer

    Args:
        name (str): node name
        neglect_autodep (bool): flag to decide whether to to skip the node if it is only auto-dependent
    """
    self.name = name
    self.sources = dict()
    self.children = list()
    self.neglect_autodep = neglect_autodep
    self.intervention_node = False        
    self.associated_context = None