Simulated Example:

Prop1 = 95, Power1 = 3, Sizes 100, 200, 300

Tova Fuller, Steve Horvath, Bin Zhang

Correspondence to

This is an R tutorial explaining a variant of the simulated example in Zhang and Horvath (2005). To cite this tutorial, please use the following reference.

Zhang B, Horvath S (2005) A General Framework for Weighted Gene Co-Expression Network Analysis. Statistical Applications in Genetics and Molecular Biology.

See also our webpage at

http://www.genetics.ucla.edu/labs/horvath/CoexpressionNetwork/

The original simulated network example may be found at the following URL:

http://www.genetics.ucla.edu/labs/horvath/CoexpressionNetwork/SimulatedNetworkHorvath.doc

Further details regarding module construction can be found at the tutorial referenced above. The purpose of this tutorial is to demonstrate the behavior of both unweighted and weighted networks with power of signal decreased relative to the power of noise. To accomplish this, we set power1 = 3. In this example, the power of the noise was not modified from its value of 5.

In this example, we find a lower power1 value yields more robust results than with power1 = 5. Specifically, we find a very robust biological signal to the choice of AF parameter in soft thresholding. In addition, we find both connectivity measures as well as our clustering coefficient perform extremely well for all values of beta. By contrast, these measures perform worse for higher values of beta in our unweighted network constructed with power1 = 5.

if(exists("modulecreation") )rm(modulecreation); modulecreation=function(size1,prop1=.95,power1=3) {

# Note abolve prop1=1

# noise "background"

seq1=seq(1,size1)/size1

vnoise=( .85+(.95-.85)*seq1)^power1

SIMILARITY1=outer(vnoise,vnoise,FUN="*")

# this specifies the red noise rectangle of highly connected noise genes

SIMILARITY1[c((prop1*size1):size1), c((prop1*size1):size1)] =.95

# this specificies the signal part of the similarity matrix

vsignal=(1-.3*seq1)^power1

# signal part

SIMILARITY1[c(1:(size1*prop1)) ,c(1:(size1*prop1)) ]= outer(vsignal,vsignal,FUN="pmin")[c(1:(size1*prop1)) , c(1:(size1*prop1)) ]

diag(SIMILARITY1)=1

list(ModuleCOR= SIMILARITY1, GS= vsignal, k=apply(SIMILARITY1,2,sum),

CC=ClusterCoef.fun, lastColumn = SIMILARITY1[,size1])

}

library(class)

library(cluster)

library(sma) # this is needed for plot.mat below

library(scatterplot3d)

source("C:/Documents and Settings/SHorvath/My Documents/ADAG/JunDong/YEAST/YEASTtutorialApril2005/NetworkFunctions.txt")

#Memory

# check the maximum memory that can be allocated

memory.size(TRUE)

# increase the available memory

memory.limit(size=3448)

set.seed(1)

# module 1

n1=100;

par(mfrow=c(3,2))

mod1= modulecreation(n1)

module1=mod1[[1]];GS1= mod1[[2]];k1= mod1[[3]];CC1=mod1[[4]]

seq1=mod1$lastColumn

# This allows you to look at the constructed module

par(mfrow=c(1,1),mar=c(0,0,0,0))

reverse1=rank(-c(1:n1))

image(1-t(module1[reverse1,]))

# This bar reports the Gene significance measure of the genes.

signifBar = cbind(1-GS1, 1-GS1,1-GS1,1-GS1)

par(mfrow=c(1,1), mar=c(0, 0, 0, 0))

heatmap(as.matrix(t(signifBar)),Rowv=NA,Colv=NA,scale="none", xlab="", ylab="", margins = c(0,0))

# module 2

n2=200;mod1= modulecreation(n2)

module2=mod1[[1]];GS2= mod1[[2]];k2= mod1[[3]];CC2=mod1[[4]]

seq2=mod1$lastColumn

# module 3

n3=300;mod1= modulecreation(n3)

module3=mod1[[1]];GS3= mod1[[2]];k3= mod1[[3]];CC3=mod1[[4]]

seq3=mod1$lastColumn

# grey genes

n4=500

nTotal=n1+n2+n3+n4

seqTotal= seq(1:nTotal)/(nTotal+1)

Similarity1= matrix(0 , nrow=nTotal, ncol=nTotal)

index1=c(1:n1);index2=c((n1+1):(n1+n2)); index3=c((n1+n2+1):(n1+n2+n3))

index4=c((n1+n2+n3+1):(n1+n2+n3+n4))

Similarity1[index1,index1]=module1; Similarity1[index2,index2]=module2

Similarity1[index3,index3]=module3

# now we add noise to the correlation matrix to make it more realistic

# the noise is added in a way to ensure that the entries lie between 0 and 1.

Similarity1=Similarity1+(1-Similarity1)*matrix( runif(nTotal*nTotal)^5,nrow=nTotal,ncol=nTotal)

# here we symmetrize the similarity matrix

Similarity1=( t(Similarity1)+ Similarity1)/2

diag(Similarity1)=1

# here we verify that the entries lie between 0 and 1

summary(c(as.dist(Similarity1 )))

# true underlying module label

truemodule=rep(c("brown", "blue","turquoise","grey"),c(n1,n2,n3,n4))

# This is a color-coded picture of the similarity matrix

reverse1=rank(-c(1:nTotal))

par(mfrow=c(1,1),mar=c(0,0,0,0))

image(1-t(Similarity1[reverse1,]))

# The following shows the simulated module membership.

barplot(rep(1,nTotal),col=as.character(truemodule),space=0,border= as.character(truemodule),offset=0)

# SCALE FREE TOPOLOGY CHECK FOR SOFT AND HARD Thresholding

# no.breaks1=specifies the number of points in a scale free plot.

# Unfortunately R^2 depends on this to some extent --> opportunity for research

no.breaks1=9

powers1=seq(1,9,by=1)

TableSoft=data.frame(matrix(NA,nrow=length(powers1),ncol=4))

names(TableSoft)=c("powers", "ScaleFreeRsquared", "Slope", "TruncatedRsquared")

par(mfrow=c(3,3), mar= c(2, 2, 2, 2) )

for (i in c(1:length(powers1)) ){

TableSoft[i,1]=powers1[i]

ADJsoft=abs(Similarity1)^powers1[i];diag(ADJsoft)=0;ksoft=apply(ADJsoft,2,sum)

TableSoft[i, 2:4]=ScaleFreePlot1(ksoft, truncated=T,AF1= paste("beta=",as.character(powers1[i])), no.breaks=no.breaks1)

}

thresholds1= seq(from=.1,to=.9,by=0.1)

TableHard=data.frame(matrix(NA,nrow=length(thresholds1),ncol=4))

names(TableHard)=c("thresholds", "ScaleFreeRsquared", "Slope", "TruncatedRsquared")

par(mfrow=c(3,3), mar= c(2, 2, 2, 2) )

for (i in c(1:length(thresholds1)) ){

TableHard[i,1]=thresholds1[i]

ADJhard=abs(Similarity1)>thresholds1[i];diag(ADJhard)=0;khard=apply(ADJhard,2,sum)

TableHard[i, 2:4]=ScaleFreePlot1(khard, truncated=T,AF1= paste("tau=",as.character(thresholds1[i])) ,no.breaks=no.breaks1)

}

par(mfrow=c(1,1),mar= c(5, 4, 4, 2) +0.1)

plot(1:length(powers1),

-sign(TableHard[,3])*TableHard[,2], type="n",ylab="Scale Free Topology R^2",xlab="Adjacency Parameter Number", ylim=range(min(c(-sign(TableSoft[,3])*TableSoft[,1], -sign(TableHard[,3])*TableHard[,1]),na.rm=T),1) )

text(1:length(thresholds1), -sign(TableHard[,3])*TableHard[,2], labels= thresholds1, col="black")

points(1:length(powers1), -sign(TableSoft[,3])*TableSoft[,2], type="n")

text(1:length(powers1), -sign(TableSoft[,3])*TableSoft[,2], labels= as.character(powers1), col="red")

abline(h=.8)

# Based on where the "kink" occurs, the scale free topology

# criterion would lead us to choose the following parameters

# for the soft and hard adjacency function

# R^2>0.8

beta1=5 # this is the the power adjacency function parameter in power(s,beta)

tau1=0.9 # this parameter is hard threshold parameter in the signum function.

# Creating Scale Free Topology Plots

par(mfrow=c(2,1))

ADJsoft=abs(Similarity1)^beta1;diag(ADJsoft)=0;ksoft=apply(ADJsoft,2,sum)

ScaleFreePlot1(ksoft, truncated=T,AF1= paste("beta=",as.character(beta1)), ,no.breaks=no.breaks1)

ADJhard=I(Similarity1>tau1)+0.0; diag(ADJhard)=0;khard=apply(ADJhard,2,sum)

ScaleFreePlot1(khard,truncated=T,AF1=paste("tau=",as.character(tau1)) ,no.breaks=no.breaks1 )

# Now we define the gene significance variable for the brown module

# We permute the GS measures in the other modules to make the example realistic

GS=c(GS1,sample(GS2)/2,sample(GS3)/2, sample(GS1,n4,replace=T)/2 )

# The following code assumes that we can identify the true underlying brown module.

# In reality this is not the case, which is why we revisit this issue below.

datconnectivitiesSoft=data.frame(matrix(NA,nrow=sum(truemodule=="brown"),ncol=length(powers1)))

names(datconnectivitiesSoft)=paste("kWithinPower",powers1,sep="")

for (i in c(1:length(powers1)) ) {

datconnectivitiesSoft[,i]=apply(abs(Similarity1[truemodule=="brown", truemodule=="brown"])^powers1[i],1,sum)}

SpearmanCorrelationsSoft=signif(cor(GS[ truemodule=="brown"], datconnectivitiesSoft, method="s",use="p"))

datconnectivitiesHard=data.frame(matrix(NA,nrow=sum(truemodule=="brown"),ncol=length(thresholds1)))

names(datconnectivitiesHard)=paste("kWithinPower",thresholds1,sep="")

for (i in c(1:length(thresholds1)) ) {

datconnectivitiesHard[,i]=apply(abs(Similarity1[truemodule=="brown", truemodule=="brown"])>thresholds1[i],1,sum)}

SpearmanCorrelationsHard=signif(cor(GS[ truemodule=="brown"], datconnectivitiesHard, method="s",use="p"))

par(mfrow=c(2,2))

plot(powers1, SpearmanCorrelationsSoft,type="n", main="Soft")

text(powers1, SpearmanCorrelationsSoft,labels=powers1,col="red")

abline(v=beta1,col="red")

plot(thresholds1, SpearmanCorrelationsHard,type="n",

main="Hard" ,ylim= range( c(SpearmanCorrelationsSoft, SpearmanCorrelationsHard),na.rm=T) )

text(thresholds1, SpearmanCorrelationsHard,labels=thresholds1,col="black")

abline(v=tau1,col="red")

plot(-sign(TableHard[,3])*TableHard[,2], SpearmanCorrelationsHard,type="n",ylim=range(c(SpearmanCorrelationsHard, SpearmanCorrelationsSoft),na.rm=T),

xlim=range(c(-sign(TableHard[,3])*TableHard[,2], -sign(TableSoft[,3])*TableSoft[,2]),na.rm=T))

text(-sign(TableHard[,3])*TableHard[,2] , SpearmanCorrelationsHard,label=as.character(thresholds1))

points(-sign(TableSoft[,3])*TableSoft[,2]

, SpearmanCorrelationsSoft, pch=as.character(powers1),col="red")

Soft thresholding, as expected, leads to a very robust signal to the choise of AF parameter.

## TOM PLOT

# The following code computes the topological overlap matrix based on

# adjacency matrix.

dissGTOM1soft=TOMdist1(ADJsoft)

collect_garbage()

dissGTOM1hard=TOMdist1(ADJhard)

collect_garbage()

# resulting clustering tree will be used to define gene modules.

hierGTOM1soft <- hclust(as.dist(dissGTOM1soft),method="average");

hierGTOM1hard <- hclust(as.dist(dissGTOM1hard),method="average");

par(mfrow=c(1,2))

plot(hierGTOM1soft,labels=F,sub="",xlab="")

plot(hierGTOM1hard,labels=F,sub="",xlab="")

# here we chose the height cut-off such that many "true" brown module genes are # included in the designated brown module.

h1soft= .94

h1hard= .91

colorh1soft=as.character(modulecolor2(hierGTOM1soft,h1= h1soft, minsize1=20))

#colorh1soft=ifelse(colorh1soft=="grey","brown",colorh1soft)

table(colorh1soft)

colorh1hard=as.character(modulecolor2(hierGTOM1hard,h1= h1hard,minsize1=20))

#colorh1hard=ifelse(colorh1hard=="grey","brown",colorh1hard)

table(colorh1hard)

par(mfrow=c(3,2))

plot(hierGTOM1soft,labels=F,sub="",xlab="")

plot(hierGTOM1hard,labels=F,sub="",xlab="")

hclustplot1(hierGTOM1soft,truemodule,title1="True module colors")

hclustplot1(hierGTOM1hard,truemodule,title1="True module colors")

hclustplot1(hierGTOM1soft, colorh1soft,title1="Identified module colors")

hclustplot1(hierGTOM1hard, colorh1hard,title1="Identified module colors")

table(colorh1soft,truemodule)

table(colorh1hard,truemodule)

> table(colorh1soft,truemodule)

truemodule

colorh1soft blue brown grey turquoise

blue 199 0 3 0

brown 0 92 0 0

grey 1 8 484 0

turquoise 0 0 13 300

> table(colorh1hard,truemodule)

truemodule

colorh1hard blue brown grey turquoise

blue 50 1 9 2

brown 1 23 6 2

grey 148 76 467 185

turquoise 1 0 15 88

yellow 0 0 3 23

Soft thresholding does a much better job at cluster retrieval.

par(mfrow=c(1,1))

# if you have patience and enough stack memory, try to run this code

#TOMplot1(dissGTOM1soft , hierGTOM1soft, colorh1soft)

#TOMplot1(dissGTOM1hard , hierGTOM1hard, colorh1hard)

# Instead, I will restrict the TOM plot to the most connected genes.

restrict1= ksoft>12.5

table(restrict1)

>table(restrict1)

restrict1

FALSE TRUE

500 600

dissGTOM1softrest=TOMdist1(ADJsoft[restrict1,restrict1])

collect_garbage()

dissGTOM1hardrest=TOMdist1(ADJhard[restrict1,restrict1])

hierGTOM1softrest <- hclust(as.dist(dissGTOM1softrest),method="average");

hierGTOM1hardrest <- hclust(as.dist(dissGTOM1hardrest),method="average");

# SOFT TOM PLOT

par(mfrow=c(1,1))

TOMplot1(dissGTOM1softrest , hierGTOM1softrest, truemodule[restrict1])


# HARD TOM PLOT

TOMplot1(dissGTOM1hardrest , hierGTOM1hardrest, colorh1hard[restrict1])


# We also propose to use classical multi-dimensional scaling plots

# for visualizing the network. Here we chose 2 scaling dimensions

cmd1=cmdscale(as.dist(dissGTOM1soft),2)

cmd2=cmdscale(as.dist(dissGTOM1hard),2)

par(mfrow=c(1,2))

plot(cmd1, col=as.character(colorh1soft), main="Soft MDS plot",xlab="Scaling Axis 1", ylab="Scaling Axis 2")

plot(cmd2, col=as.character(colorh1hard), main="Hard MDS plot",xlab="Scaling Axis 1", ylab="Scaling Axis 2")


# Just to be sure let’s re-define the adjacency matrix and connectivities used.

ADJhard=I(Similarity1>tau1)+0.0; diag(ADJhard)=0;khard=apply(ADJhard,2,sum)

ADJsoft=abs(Similarity1)^beta1;diag(ADJsoft)=0;ksoft=apply(ADJsoft,2,sum)

ADJhardbrown=I(abs(Similarity1[colorh1hard=="brown", colorh1hard=="brown"])

>tau1)+0.0; diag(ADJhardbrown)=0;khardbrown=apply(ADJhardbrown,2,sum)

ADJsoftbrown=abs(Similarity1[colorh1soft=="brown", colorh1soft=="brown"])^beta1;diag(ADJsoftbrown)=0;ksoftbrown=apply(ADJsoftbrown,2,sum)

# This computes the intramodular connectivities

AllDegreesSoft=DegreeInOut(ADJsoft,colorh1soft)

AllDegreesHard=DegreeInOut(ADJhard,colorh1hard)

# Here we compute the soft and the hard cluster coefficients

cluster.coefSoft= ClusterCoef.fun(ADJsoft)

cluster.coefHard= ClusterCoef.fun(ADJhard)

# Now we plot cluster coefficient versus connectivity

# for all genes. We use the true module coloring

par(mfrow=c(2,1))

plot(ksoft, cluster.coefSoft,col=truemodule,ylab="Cluster Coefficient",xlab="Connectivity",main="Soft")

#whichmodule="turquoise"

#plot(AllDegreesSoftkWithin[truemodule==whichmodule],

#cluster.coefSoft[truemodule==whichmodule],col=whichmodule,ylab="Cluster #Coefficient",xlab="Connectivity",main=whichmodule)

plot(khard, cluster.coefHard,col=truemodule,ylab="Cluster Coefficient",xlab="Connectivity",main="Hard")

Comment: for the high connectivity genes in each module CC is inversely related to K in unweighted networks (hard thresholding). In contrast, there is a slightly positive (or constant) relationship among hub genes in weighted networks.

#Gene significance analysis

# Now we study how the gene significance is related to connectivity in the brown module

datconnectivitiesSoft=data.frame(matrix(666,nrow=sum(colorh1soft=="brown"),ncol=length(powers1)))

names(datconnectivitiesSoft)=paste("kWithinPower",powers1,sep="")

for (i in c(1:length(powers1)) ) {

datconnectivitiesSoft[,i]=apply(abs(Similarity1[colorh1soft=="brown", colorh1soft=="brown"])^powers1[i],1,sum)}

SpearmanCorrelationsSoft=signif(cor(GS[ colorh1soft=="brown"], datconnectivitiesSoft, method="s",use="p"))

datconnectivitiesHard=data.frame(matrix(666,nrow=sum(colorh1hard=="brown"),ncol=length(thresholds1)))

names(datconnectivitiesHard)=paste("kWithinPower",thresholds1,sep="")

for (i in c(1:length(thresholds1)) ) {

datconnectivitiesHard[,i]=apply(abs(Similarity1[colorh1hard=="brown", colorh1hard=="brown"])>thresholds1[i],1,sum)}

SpearmanCorrelationsHard=signif(cor(GS[ colorh1hard=="brown"], datconnectivitiesHard, method="s",use="p"))

par(mfrow=c(2,2))

plot(powers1, SpearmanCorrelationsSoft,type="n" ,xlab="Power (beta)" , main="Soft")

text(powers1, SpearmanCorrelationsSoft,labels=powers1,col="red")

abline(v=beta1,col="red")

plot(thresholds1, SpearmanCorrelationsHard,type="n" ,ylim= range( c(SpearmanCorrelationsHard),na.rm=T) ,xlab="Thresholds (tau)" ,main="Hard")

text(thresholds1, SpearmanCorrelationsHard,labels=thresholds1,col="black")

abline(v=tau1,col="red")

plot(-sign(TableHard[,3])*TableHard[,2], SpearmanCorrelationsHard,type="n" ,

ylim=range(c(SpearmanCorrelationsHard, SpearmanCorrelationsSoft),na.rm=T), xlim=range(c(-sign(TableHard[,3])*TableHard[,2], -sign(TableSoft[,3])*TableSoft[,2]),na.rm=T),xlab="Signed Scale Free R^2",ylab="Correlation(k, Gene Significance)" )

text(-sign(TableHard[,3])*TableHard[,2], SpearmanCorrelationsHard,label=as.character(thresholds1))

points(-sign(TableSoft[,3])*TableSoft[,2], SpearmanCorrelationsSoft, type="n")

text(-sign(TableSoft[,3])*TableSoft[,2], SpearmanCorrelationsSoft,labels=as.character(powers1),col="red")

We have very robust results for soft thresholding – the results are even more robust than with power1 = 5. For all values of beta, our biological signal is approximately ~0.85. Note also our spearman correlation is highest for beta = 4. This differs with other simulation examples with power1 >5, where a lower value of beta elicits maximum signal.

# Now we want to see how the correlation between kWithin and gene significance

# changes for different hard threshold values tau within the BROWN module.

whichmodule="brown"

# the following data frame contains the intramodular connectivities

# corresponding to different hard thresholds

datconnectivitiesHard=data.frame(matrix(666,nrow=sum(colorh1hard==whichmodule),ncol=length(thresholds1)))

names(datconnectivitiesHard)=paste("kWithinTau",thresholds1,sep="")

for (i in c(1:length(thresholds1)) ) {

datconnectivitiesHard[,i]=apply(abs(Similarity1[colorh1hard==whichmodule, colorh1hard==whichmodule])>=thresholds1[i],1,sum)}

SpearmanCorrelationsHard=signif(cor(GS[ colorh1hard==whichmodule], datconnectivitiesHard, method="s",use="p"))

# Now we define the new connectivity measure omega based on the TOM matrix

# It simply considers TOM as adjacency matrix...

datOmegaINHard=data.frame(matrix(666,nrow=sum(colorh1hard==whichmodule),ncol=length(thresholds1)))

names(datOmegaINHard)=paste("omegaWithinHard",thresholds1,sep="")

for (i in c(1:length(thresholds1)) ) {

datconnectivitiesHard[,i]=apply(

1-TOMdist1(abs(Similarity1[colorh1hard==whichmodule, colorh1hard==whichmodule])>thresholds1[i]),1,sum)}

SpearmanCorrelationsOmegaHard=as.vector(signif(cor(GS[ colorh1hard==whichmodule], datconnectivitiesHard, method="s",use="p")))

# Now we define the intramodular cluster coefficient

datCCinHard=data.frame(matrix(666,nrow=sum(colorh1hard==whichmodule),ncol=length(thresholds1)))

names(datCCinHard)=paste("CCinHard",thresholds1,sep="")

for (i in c(1:length(thresholds1)) ) {

datCCinHard[,i]= ClusterCoef.fun(abs(Similarity1[colorh1hard==whichmodule, colorh1hard==whichmodule])>thresholds1[i])}

SpearmanCorrelationsCCinHard=as.vector(signif(cor(GS[ colorh1hard==whichmodule], datCCinHard, method="s",use="p")))

# Now we compare the performance of the connectivity measures (k.in,

# omega.in, cluster coefficience) across different hard thresholds when it comes to

# predicting prognostic genes in the brown module

dathelpHard=data.frame(signedRsquared=-sign(TableHard[,3])*TableHard[,2], corGSkINHard =as.vector(SpearmanCorrelationsHard), corGSwINHard= as.vector(SpearmanCorrelationsOmegaHard),corGSCCHard=as.vector(SpearmanCorrelationsCCinHard))

# Now we focus on soft thresholding

# the following data frame contains the intramodular connectivities

# corresponding to different soft powers

datconnectivitiesSoft=data.frame(matrix(666,nrow=sum(colorh1soft==whichmodule),ncol=length(powers1)))

names(datconnectivitiesSoft)=paste("kWithinTau",powers1,sep="")

for (i in c(1:length(powers1)) ) {

datconnectivitiesSoft[,i]=apply(abs(Similarity1[colorh1soft==whichmodule, colorh1soft==whichmodule])^powers1[i],1,sum)}

SpearmanCorrelationsSoft=signif(cor(GS[ colorh1soft==whichmodule], datconnectivitiesSoft, method="s",use="p"))

# Now we define the new connectivity measure omega based on the TOM #matrix

# It simply considers TOM as adjacency matrix...

datOmegaINSoft=data.frame(matrix(666,nrow=sum(colorh1soft==whichmodule),ncol=length(powers1)))

names(datOmegaINSoft)=paste("omegaWithinSoft",powers1,sep="")

for (i in c(1:length(powers1)) ) {

datconnectivitiesSoft[,i]=apply(

1-TOMdist1(abs(Similarity1[colorh1soft==whichmodule, colorh1soft==whichmodule])^powers1[i]),1,sum)}

SpearmanCorrelationsOmegaSoft=as.vector(signif(cor(GS[ colorh1soft==whichmodule], datconnectivitiesSoft, method="s",use="p")))

# Now we define the intramodular cluster coefficient

datCCinSoft=data.frame(matrix(666,nrow=sum(colorh1soft==whichmodule),ncol=length(powers1)))

names(datCCinSoft)=paste("CCinSoft",powers1,sep="")

for (i in c(1:length(powers1)) ) {

datCCinSoft[,i]= ClusterCoef.fun(abs(Similarity1[colorh1soft==whichmodule, colorh1soft==whichmodule])^powers1[i])}

SpearmanCorrelationsCCinSoft=as.vector(signif(cor(GS[ colorh1soft==whichmodule], datCCinSoft, method="s",use="p")))

# Now we compare the performance of the connectivity measures (k.in,

# omega.in, cluster coefficience) across different soft powers when it comes to predicting

# prognostic genes in the brown module

dathelpSoft=data.frame(signedRsquared=-sign(TableSoft[,3])*TableSoft[,2], corGSkINSoft =as.vector(SpearmanCorrelationsSoft), corGSwINSoft= as.vector(SpearmanCorrelationsOmegaSoft),corGSCCSoft=as.vector(SpearmanCorrelationsCCinSoft))

par(mfrow=c(1,1))

matplot(thresholds1,dathelpHard,type="l",lty=1,lwd=3,col=c("black","red","blue","green"),ylab="",xlab="tau",main="Hard")

legend(0.65,0.2, c("signed R^2","r(GS,k.in)","r(GS,omega.in)","r(GS,cc.in)"), col=c("black","red","blue","green"), lty=1,lwd=3,ncol = 1, cex=1)

abline(v=tau1,col="red")

matplot(powers1,dathelpSoft,type="l",lty=1,lwd=3,col=c("black","red","blue","green"),ylab="",xlab="beta",main="Soft")

legend(6.8,0.65, c("signed R^2","r(GS,k.in)","r(GS,omega.in)","r(GS,cc.in)"), col=c("black","red","blue","green"), lty=1,lwd=3,ncol = 1, cex=1)

abline(v=beta1,col="red")

Both connectivity measures and the clustering coefficient perform well for all values of beta. This differs from the result achieved with power1 = 5, where all three of these measures only perform well for low values of beta. Omega.in outperforms k.in for nearly all values of beta in our weighetd network.

THE END

1