## ----lesmis, echo=FALSE, message=FALSE, out.width=".6\\linewidth", fig.align="center"---- library(igraph) load("lesmis.rda") par(mar = rep(0,4)) plot(lesmis, vertex.size = 3, vertex.frame.color = "orange", vertex.label.color = "black", vertex.label.font = 2) ## ----summaryLesmis------------------------------------------------------- lesmis ## ----sessioninfo, echo=FALSE--------------------------------------------- sessionInfo() ## ----globalChar---------------------------------------------------------- graph.density(lesmis); triangles(lesmis); length(triangles(lesmis))/3 transitivity(lesmis); diameter(lesmis); radius(lesmis); girth(lesmis) ## ----globalCharErdos, echo=FALSE, cache=TRUE, message=FALSE-------------- set.seed(20041721) B <- 500 all_seeds <- sample(1:20041726, B, replace = FALSE) library(doMC) registerDoMC(cores = 3) global_char <- foreach (ind=1:B) %dopar% { set.seed(all_seeds[ind]) rg <- sample_gnm(vcount(lesmis), ecount(lesmis)) if (is.connected(rg)) { res <- c(graph.density(rg), length(triangles(rg))/3, transitivity(rg), diameter(rg), radius(rg), girth(rg)$girth, cohesion(rg)) } else res <- rep(NA, 7) return(res) } global_char <- matrix(unlist(global_char), byrow = TRUE, nrow= B) colnames(global_char) <- c("density", "triangles", "transitivity", "diameter", "radius", "girth", "cohesion") global_char <- na.omit(global_char) ## ----globalCharErdosPrint, echo=FALSE------------------------------------ summary(global_char) ## ----localChar----------------------------------------------------------- summary(degree(lesmis)) summary(betweenness(lesmis)) summary(eccentricity(lesmis)) summary(closeness(lesmis)) ## ----localCharErdos, echo=FALSE, cache=TRUE, message=FALSE--------------- set.seed(21041120) B <- 500 all_seeds <- sample(1:21041120, B, replace = FALSE) library(doMC) registerDoMC(cores = 3) local_char <- foreach (ind=1:B) %dopar% { set.seed(all_seeds[ind]) rg <- sample_gnm(vcount(lesmis), ecount(lesmis)) if (is.connected(rg)) { res <- c(mean(degree(rg)), mean(betweenness(rg)), mean(eccentricity(rg)), mean(closeness(rg))) } else res <- rep(NA, 4) return(res) } local_char <- matrix(unlist(local_char), byrow = TRUE, nrow= B) colnames(local_char) <- c("degree", "betweenness", "eccentricity", "closeness") local_char <- na.omit(local_char) ## ----localCharErdosPrint, echo=FALSE------------------------------------- summary(local_char) ## ----DegreeDist, echo=FALSE, fig.height=5-------------------------------- library(scales) par(mfrow = c(1,2)) dd <- degree.distribution(lesmis, cumulative = FALSE) est_alpha <- fit_power_law(degree(lesmis), implementation = "R.mle") plot(log(seq_along(dd)), log(dd), xlab = expression(log(k)), ylab = expression(log(P("degree" = k))), pch = "+") abline(0, -est_alpha@coef, col = "darkred", lwd = 2) vsize <- 10 * sqrt(degree(lesmis) / max(degree(lesmis))) dclass <- as.numeric(cut(sqrt(degree(lesmis)), 10, FALSE)) valpha <- dclass / 10 par(mar = rep(0,4)) plot(lesmis, vertex.size = vsize, vertex.label = NA, vertex.color = alpha("darkred", valpha)) ## ----globalCharSF, echo=FALSE, cache=TRUE, message=FALSE----------------- set.seed(21041043) B <- 500 all_seeds <- sample(1:21041043, B, replace = FALSE) library(doMC) registerDoMC(cores = 3) global_char <- foreach (ind=1:B) %dopar% { set.seed(all_seeds[ind]) rg <- sample_pa(vcount(lesmis), m = 3, power = est_alpha@coef, directed = FALSE) if (is.connected(rg)) { res <- c(graph.density(rg), length(triangles(rg))/3, transitivity(rg), diameter(rg), radius(rg), girth(rg)$girth, cohesion(rg), mean(degree(rg)), mean(betweenness(rg)), mean(eccentricity(rg)), mean(closeness(rg))) } else res <- rep(NA, 9) return(res) } global_char <- matrix(unlist(global_char), byrow = TRUE, nrow= B) colnames(global_char) <- c("density", "triangles", "transitivity", "diameter", "radius", "girth", "cohesion", "degree", "betweenness", "eccentricity", "closeness") global_char <- na.omit(global_char) ## ----globalCharSFPrint, echo=FALSE--------------------------------------- summary(global_char) ## ----rewiredGraphs, echo=FALSE, cache=TRUE, message=FALSE---------------- set.seed(22041651) B <- 500 iter <- 100 all_seeds <- sample(1:22041651, B, replace = FALSE) library(doMC) registerDoMC(cores = 3) global_char <- foreach (ind=1:B) %dopar% { set.seed(all_seeds[ind]) rg <- rewire(lesmis, keeping_degseq(n = iter)) if (is.connected(rg) & is.simple(rg)) { res <- list("betweenness" = betweenness(rg), "triangles" = length(triangles(rg)) / 3, "transitivity" = transitivity(rg)) } else res <- list("betweenness" = rep(NA, vcount(lesmis)), "triangles" = NA, "transitivity" = NA) return(res) } all_betweenness <- lapply(global_char, getElement, name = "betweenness") all_betweenness <- matrix(unlist(all_betweenness), byrow = TRUE, nrow = B) all_betweenness <- na.omit(all_betweenness) all_triangles <- unlist(lapply(global_char, getElement, name = "triangles")) all_triangles <- na.omit(all_triangles) all_transitivity <- unlist(lapply(global_char, getElement, name = "transitivity")) all_transitivity <- na.omit(all_transitivity) ## ----outputRewire, echo=FALSE, fig.height=5------------------------------ # output: compare real value with random distribution par(mfrow = c(1,2)) hist(all_triangles, col = "pink", xlim = range(c(all_triangles, length(triangles(lesmis))/3)), main = "", xlab = "Number of triangles") abline(v = length(triangles(lesmis))/3, lwd = 2, col = "darkred") hist(all_transitivity, col = "lightblue", xlim = range(c(all_transitivity, transitivity(lesmis))), main = "", xlab = "transitivity") abline(v = transitivity(lesmis), lwd = 2, col = "darkblue") ## ----rewiredGraphsLocal, echo=FALSE, cache=TRUE, message=FALSE----------- # obtain estimated p-values bet_lesmis <- betweenness(lesmis) valid_exp <- nrow(all_betweenness) c_betweenness <- rbind(bet_lesmis, all_betweenness) p_high <- apply(c_betweenness, 2, function(acol) sum(acol[1] > acol[-1]) / valid_exp) p_low <- apply(c_betweenness, 2, function(acol) sum(acol[1] < acol[-1]) / valid_exp) par(mfrow = c(1,2)) # plot results vsize <- log(bet_lesmis + 1) dclass <- as.numeric(cut(log(bet_lesmis + 1), 10, FALSE)) valpha <- dclass / 10 par(mar = rep(0,4)) plot(lesmis, vertex.size = vsize, vertex.label = NA, vertex.color = alpha("darkred", valpha)) vcol <- rep("lightgray", vcount(lesmis)) vcol[which(p_high > 0.95)] <- "darkred" vcol[which(p_low > 0.95)] <- "darkblue" vlab <- rep("", vcount(lesmis)) vlab[which(p_high > 0.95)] <- V(lesmis)$label[which(p_high > 0.95)] vlab[which(p_low > 0.95)] <- V(lesmis)$label[which(p_low > 0.95)] par(mar = rep(0,4)) plot(lesmis, vertex.size = vsize, vertex.color = vcol, vertex.label = vlab, vertex.label.color = vcol, vertex.label.cex = 0.7, vertex.label.font = 2) ## ----seedClust, echo=FALSE----------------------------------------------- set.seed(24041755) ## ----clustering, cache=TRUE---------------------------------------------- res_time <- cbind( system.time(res_hierarchical <- cluster_fast_greedy(lesmis)), system.time(res_multilevel <- cluster_louvain(lesmis)), system.time(res_annealing <- cluster_spinglass(lesmis)), system.time(res_exact <- cluster_optimal(lesmis)) )[3, ] ## ----clusteringTime, echo=FALSE------------------------------------------ names(res_time) <- c("hierarchical", "multilevel", "annealing", "exact") res_time ## ----microbenchClustering, cache=TRUE------------------------------------ library(microbenchmark) res_micro <- microbenchmark(cluster_fast_greedy(lesmis), cluster_louvain(lesmis)) ## ----plotMicrobenchmark, echo=FALSE, fig.height=2------------------------ library(microbenchmark) library(ggplot2) autoplot(res_micro) ## ----plotModularity, echo=FALSE, fig.height=5---------------------------- par(mfrow = c(2, 2)) par(mar = rep(1, 4)) plot(lesmis, vertex.size = 5, vertex.label = NA, vertex.color = membership(res_hierarchical), vertex.frame.color = membership(res_hierarchical), main = paste("hierarchical -", round(modularity(res_hierarchical), 4), "-", length(unique(membership(res_hierarchical))))) plot(lesmis, vertex.size = 5, vertex.label = NA, vertex.color = membership(res_multilevel), vertex.frame.color = membership(res_multilevel), main = paste("multilevel -", round(modularity(res_multilevel), 4), "-", length(unique(membership(res_multilevel))))) plot(lesmis, vertex.size = 5, vertex.label = NA, vertex.color = membership(res_annealing), vertex.frame.color = membership(res_annealing), main = paste("simulated annealing -", round(modularity(res_annealing), 4), "-", length(unique(membership(res_annealing))))) plot(lesmis, vertex.size = 5, vertex.label = NA, vertex.color = membership(res_exact), vertex.frame.color = membership(res_exact), main = paste("exact -", round(modularity(res_exact), 4), "-", length(unique(membership(res_exact))))) ## ----rewiredGraphs2, echo=FALSE, cache=TRUE, message=FALSE, fig.height=4---- set.seed(30041808) B <- 500 iter <- 100 all_seeds <- sample(1:30041808, B, replace = FALSE) library(doMC) registerDoMC(cores = 3) all_modularities <- foreach (ind=1:B) %dopar% { set.seed(all_seeds[ind]) rg <- rewire(lesmis, keeping_degseq(n = iter)) if (is.connected(rg) & is.simple(rg)) { res <- modularity(cluster_louvain(rg)) } else res <- NA return(res) } all_modularities <- na.omit(unlist(all_modularities)) hist(all_modularities, col = "pink", xlim = range(c(all_modularities, modularity(res_multilevel))), main = "", xlab = "Modularity") abline(v = modularity(res_multilevel), col = "darkred", lwd = 2) ## ----disconnectedGraph, echo=FALSE, fig.height=3------------------------- set.seed(29042016) A <- matrix(c(0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0), ncol = 5, nrow = 5) a_graph <- graph_from_adjacency_matrix(A, mode = "undirected") par(mar = rep(0,4)) plot(a_graph, vertex.size = 20, vertex.label.color = "black", vertex.color = c(rep("orange", 3), rep("green", 2))) ## ----seedClust2, echo=FALSE---------------------------------------------- set.seed(29041446) ## ----spectralClustering, cache=TRUE-------------------------------------- res_time_spec <- system.time({ spec_embed <- embed_laplacian_matrix(lesmis, no = 6, which = "sa", scaled = FALSE) res_spectral <- kmeans(spec_embed$X[ ,-1], centers = 6, nstart = 1) })[3] res_time_spec ## ----plotModularity2, echo=FALSE, fig.height=3--------------------------- par(mfrow = c(1, 2)) par(mar = rep(1, 4)) plot(lesmis, vertex.size = 5, vertex.label = NA, vertex.color = res_spectral$cluster, vertex.frame.color = res_spectral$cluster, main = paste("spectral clustering -", round(modularity(lesmis, res_spectral$cluster), 4), "-", length(unique(res_spectral$cluster)))) plot(lesmis, vertex.size = 5, vertex.label = NA, vertex.color = membership(res_exact), vertex.frame.color = membership(res_exact), main = paste("exact -", round(modularity(res_exact), 4), "-", length(unique(membership(res_exact))))) ## ----seedClust3, echo=FALSE---------------------------------------------- set.seed(30041805) ## ----SBMclustering, cache=TRUE, results='hide', fig.show='hide'---------- library(blockmodels) res_time_sbm <- system.time({ res_sbm <- BM_bernoulli("SBM_sym", as_adjacency_matrix(lesmis, sparse = FALSE)) res_sbm$estimate() })[3] ## ----resSBMclustering---------------------------------------------------- res_time_sbm opt_K <- which.max(res_sbm$ICL) opt_K sbm_clust <- apply(res_sbm$memberships[[opt_K]]$Z, 1, which.max) ## ----plotModularity3, echo=FALSE, fig.height=3--------------------------- par(mfrow = c(1, 2)) par(mar = rep(1, 4)) plot(lesmis, vertex.size = 5, vertex.label = NA, vertex.color = sbm_clust, vertex.frame.color = sbm_clust, main = paste("SBM clustering -", round(modularity(lesmis, sbm_clust), 4), "-", opt_K)) plot(lesmis, vertex.size = 5, vertex.label = NA, vertex.color = membership(res_exact), vertex.frame.color = membership(res_exact), main = paste("exact -", round(modularity(res_exact), 4), "-", length(unique(membership(res_exact))))) ## ----compareClust, echo=FALSE, fig.height=5------------------------------ all_methods <- list("hierarchical" = membership(res_hierarchical), "multilevel" = membership(res_multilevel), "annealing" = membership(res_annealing), "exact" = membership(res_exact), "spectral" = res_spectral$cluster, "sbm" = sbm_clust) nb <- length(all_methods) compare_rand <- 1 - outer(1:nb, 1:nb, Vectorize(function(i,j) compare(all_methods[[i]], all_methods[[j]], method = "rand"))) rownames(compare_rand) <- colnames(compare_rand) <- names(all_methods) clustering_rand <- hclust(as.dist(compare_rand)) compare_nmi <- 1 - outer(1:nb, 1:nb, Vectorize(function(i,j) compare(all_methods[[i]], all_methods[[j]], method = "nmi"))) rownames(compare_nmi) <- colnames(compare_nmi) <- names(all_methods) clustering_nmi <- hclust(as.dist(compare_nmi)) par(mfrow = c(1,2)) plot(clustering_rand, main = "Rand index") plot(clustering_nmi, main = "NMI")