Identify connected subnetworks, constrained by edge attributes

126 Views Asked by At

I am working with network data and want to identify connected subnetworks. I loaded my data using
graph <- graph_from_data_frame(filtered_df, directed = FALSE) and ploted my network

plot(graph)

E(graph)$conflict_period
[1] "72_1"   "72_1"   "72_1"   "72_1"   "72_1"   "72_1"   "72_1"   "72_1"   "72_1"   "72_1"   "72_1"   "372_1"  "372_1"  "372_1" 
[15] "372_1"  "372_1"  "372_1"  "372_1"  "372_1"  "372_1"  "372_1"  "522_0"  "522_0"  "522_0"  "522_0"  "522_0"  "522_0"  "522_0" 
[29] "522_0"  "522_0"  "522_0"  "715_0"  "715_0"  "715_0"  "715_0"  "5390_0"

So far, the information on the subgroups is stored in the edges. A Node belonges to each subgroup it receives an edge from, so a Node can also belong to multiple subgroups.For instance Government of Mali belongs together with Civilians to subgroups "72_1" and togehther with FIAA to subgroup "372_1". I want to know if subgroups "72_1" and "372_1" are conected, which they are if at least two Nodes belonging to subgroup "72_1" are conected via edges to at least two Nodes belonging to subgroup"372_1". I tired ways outside Network analysis to identify this relationship, but failed. Now I am here asking for help.

enter image description here

the desired output would be a table listing the connected subgroup based on the aforementioned criterium. In this case it should be:

conflict_period connected
72_1 372_1,522_0
372_1 72_1,522_0
522_0 372_1,72_1
715_0 NA
5390_0 NA

Here is the used data:

structure(list(side_a = c("Government of Mali", "Government of Mali", 
"Government of Mali", "Government of Mali", "Government of Mali", 
"Government of Mali", "Government of Mali", "Government of Mali", 
"Government of Mali", "Government of Mali", "Government of Mali", 
"Government of Mali", "Government of Mali", "Government of Mali", 
"Government of Mali", "Government of Mali", "Government of Mali", 
"Government of Mali", "Government of Mali", "Government of Mali", 
"Government of Mali", "FIAA", "FIAA", "FIAA", "FIAA", "FIAA", 
"FIAA", "FIAA", "FIAA", "FIAA", "FIAA", "MPGK", "MPGK", "MPGK", 
"MPGK", "ARLA, FIAA, FPLA"), side_b = c("Civilians", "Civilians", 
"Civilians", "Civilians", "Civilians", "Civilians", "Civilians", 
"Civilians", "Civilians", "Civilians", "Civilians", "FIAA", "FIAA", 
"FIAA", "FIAA", "FIAA", "FIAA", "FIAA", "FIAA", "FIAA", "FIAA", 
"Civilians", "Civilians", "Civilians", "Civilians", "Civilians", 
"Civilians", "Civilians", "Civilians", "Civilians", "Civilians", 
"Civilians", "Civilians", "Civilians", "Civilians", "MPA"), country = c("Mali", 
"Mali", "Mali", "Mali", "Mali", "Mali", "Mali", "Mali", "Mali", 
"Mali", "Mali", "Mali", "Mali", "Mali", "Mali", "Mali", "Mali", 
"Mali", "Mali", "Mali", "Mali", "Mali", "Mali", "Mali", "Mali", 
"Mali", "Mali", "Mali", "Mali", "Mali", "Mali", "Mali", "Mali", 
"Mali", "Mali", "Mali"), period_start = structure(c(765158400, 
765158400, 765158400, 765158400, 765158400, 765158400, 765158400, 
765158400, 765158400, 765158400, 765158400, 770256000, 770256000, 
770256000, 770256000, 770256000, 770256000, 770256000, 770256000, 
770256000, 770256000, 771552000, 771552000, 771552000, 771552000, 
771552000, 771552000, 771552000, 771552000, 771552000, 771552000, 
769910400, 769910400, 769910400, 769910400, 771206400), tzone = "UTC", class = c("POSIXct", 
"POSIXt")), period_end = structure(c(817862400, 817862400, 817862400, 
817862400, 817862400, 817862400, 817862400, 817862400, 817862400, 
817862400, 817862400, 819158400, 819158400, 819158400, 819158400, 
819158400, 819158400, 819158400, 819158400, 819158400, 819158400, 
816739200, 816739200, 816739200, 816739200, 816739200, 816739200, 
816739200, 816739200, 816739200, 816739200, 814492800, 814492800, 
814492800, 814492800, 802828800), tzone = "UTC", class = c("POSIXct", 
"POSIXt")), conflict_period = c("72_1", "72_1", "72_1", "72_1", 
"72_1", "72_1", "72_1", "72_1", "72_1", "72_1", "72_1", "372_1", 
"372_1", "372_1", "372_1", "372_1", "372_1", "372_1", "372_1", 
"372_1", "372_1", "522_0", "522_0", "522_0", "522_0", "522_0", 
"522_0", "522_0", "522_0", "522_0", "522_0", "715_0", "715_0", 
"715_0", "715_0", "5390_0")), row.names = c(NA, -36L), class = c("tbl_df", 
"tbl", "data.frame"))
4

There are 4 best solutions below

9
ThomasIsCoding On BEST ANSWER

According to your latest update, here is one option to achieve your desired output

library(igraph)
library(dplyr)

# simplify the dataframe and generate a graph
g <- df %>%
    select(matches("^side|conflict")) %>%
    distinct() %>%
    graph_from_data_frame(directed = FALSE)

# repeat pruning the graph and retaining the subgraph(s) such that each node has the degree >= 2
gh <- g
repeat {
    d <- degree(gh)
    if (min(d) >= 2) break
    gh <- induced_subgraph(gh, V(gh)[d >= 2])
}

# retrieve the vertices of each desired cluster
clt <- lapply(decompose(gh), \(x) E(x)$conflict_period)

# go through each conflict_period and if they are in one of the found clusters
out <- sapply(
    E(g)$conflict_period,
    \(p) {
        lapply(clt, \(q) {
            if (p %in% q) {
                setdiff(q, p)
            } else {
                NA
            }
        })
    }
)

# produce the desired output
res <- within(
    data.frame(conflict_period = names(out)),
    connected <- out
)

such that

> res
  conflict_period    connected
1            72_1 372_1, 522_0
2           372_1  72_1, 522_0
3           522_0  372_1, 72_1
4           715_0           NA
5          5390_0           NA
3
Grzegorz Sapijaszko On

Use igraph::groups() to find the connected clusters/subnetworks:

filtered_df <- structure(list(side_a = c(
  "Government of Mali", "Government of Mali",
  "Government of Mali", "Government of Mali", "Government of Mali",
  "Government of Mali", "Government of Mali", "Government of Mali",
  "Government of Mali", "Government of Mali", "Government of Mali",
  "Government of Mali", "Government of Mali", "Government of Mali",
  "Government of Mali", "Government of Mali", "Government of Mali",
  "Government of Mali", "Government of Mali", "Government of Mali",
  "Government of Mali", "FIAA", "FIAA", "FIAA", "FIAA", "FIAA",
  "FIAA", "FIAA", "FIAA", "FIAA", "FIAA", "MPGK", "MPGK", "MPGK",
  "MPGK", "ARLA, FIAA, FPLA"
), side_b = c(
  "Civilians", "Civilians",
  "Civilians", "Civilians", "Civilians", "Civilians", "Civilians",
  "Civilians", "Civilians", "Civilians", "Civilians", "FIAA", "FIAA",
  "FIAA", "FIAA", "FIAA", "FIAA", "FIAA", "FIAA", "FIAA", "FIAA",
  "Civilians", "Civilians", "Civilians", "Civilians", "Civilians",
  "Civilians", "Civilians", "Civilians", "Civilians", "Civilians",
  "Civilians", "Civilians", "Civilians", "Civilians", "MPA"
), country = c(
  "Mali",
  "Mali", "Mali", "Mali", "Mali", "Mali", "Mali", "Mali", "Mali",
  "Mali", "Mali", "Mali", "Mali", "Mali", "Mali", "Mali", "Mali",
  "Mali", "Mali", "Mali", "Mali", "Mali", "Mali", "Mali", "Mali",
  "Mali", "Mali", "Mali", "Mali", "Mali", "Mali", "Mali", "Mali",
  "Mali", "Mali", "Mali"
), period_start = structure(c(
  765158400,
  765158400, 765158400, 765158400, 765158400, 765158400, 765158400,
  765158400, 765158400, 765158400, 765158400, 770256000, 770256000,
  770256000, 770256000, 770256000, 770256000, 770256000, 770256000,
  770256000, 770256000, 771552000, 771552000, 771552000, 771552000,
  771552000, 771552000, 771552000, 771552000, 771552000, 771552000,
  769910400, 769910400, 769910400, 769910400, 771206400
), tzone = "UTC", class = c(
  "POSIXct",
  "POSIXt"
)), period_end = structure(c(
  817862400, 817862400, 817862400,
  817862400, 817862400, 817862400, 817862400, 817862400, 817862400,
  817862400, 817862400, 819158400, 819158400, 819158400, 819158400,
  819158400, 819158400, 819158400, 819158400, 819158400, 819158400,
  816739200, 816739200, 816739200, 816739200, 816739200, 816739200,
  816739200, 816739200, 816739200, 816739200, 814492800, 814492800,
  814492800, 814492800, 802828800
), tzone = "UTC", class = c(
  "POSIXct",
  "POSIXt"
)), conflict_period = c(
  "72_1", "72_1", "72_1", "72_1",
  "72_1", "72_1", "72_1", "72_1", "72_1", "72_1", "72_1", "372_1",
  "372_1", "372_1", "372_1", "372_1", "372_1", "372_1", "372_1",
  "372_1", "372_1", "522_0", "522_0", "522_0", "522_0", "522_0",
  "522_0", "522_0", "522_0", "522_0", "522_0", "715_0", "715_0",
  "715_0", "715_0", "5390_0"
)), row.names = c(NA, -36L), class = c(
  "tbl_df",
  "tbl", "data.frame"
))

graph <- igraph::graph_from_data_frame(filtered_df, directed = FALSE)

plot(graph)


graph |>
  igraph::cluster_optimal() |>
  igraph::groups()
#> $`1`
#> [1] "Government of Mali" "FIAA"               "MPGK"              
#> [4] "Civilians"         
#> 
#> $`2`
#> [1] "ARLA, FIAA, FPLA" "MPA"

Created on 2024-02-29 with reprex v2.1.0

1
ThomasIsCoding On

Probably you should use decompose to separate the subgraphs and then retrieve the "subgroup" information in edges, e.g.,

df %>%
    graph_from_data_frame(directed = FALSE) %>%
    decompose() %>%
    map(\(x) unique(E(x)$conflict_period))

which outputs

[[1]]
[1] "72_1"  "372_1" "522_0" "715_0"

[[2]]
[1] "5390_0"
0
clp On

Employing cliques

                                                    # Sanitize input.
se <- E(graph)[which(count_multiple(graph) == 1)]   # Single edge.
# [1] "5390_0"
se$conflict_period                                  # Show. deletedconflicts
g2 <- delete_edges(graph, se)                       # Keep multiple edges.
g3 <- delete.vertices(g2, which(degree(g2)==0))     # Remove isolates.
g4 <- simplify(g3, edge.attr.comb = "first")        # Remove multiple edges.
E(g4)$label <- E(g4)$conflict_period                # Show edge label in plot().
plot(g4)

enter image description here

Observe that any 3-cycle a, b, c induces:

conflict_period  | connected
a                | b, c
b                | a, c
c                | a,b

In this small example we can read the solution directly from the graph.

                                            # show cliques.
cq  <- max_cliques(g4, min = 2, max=3)      # Find all 2,3-cycles.
tri <- list()                               # create a list of all cycles.
for (v in cq) tri[[length(tri)+1]] <- E(subgraph(g4, v))$conflict_period
tri
## [[1]]
## [1] "715_0"
## 
## [[2]]
## [1] "372_1" "72_1"  "522_0"