SI Text R Based Script for the Analysis of Whole Genome (8X15k) Microarrays Comparing

SI Text R –based script for the analysis of whole genome (8x15K) microarrays comparing a DDT resistant field population with two more susceptible West African colonies, Akron (Benin) and Ngoussou (Cameroon).

############################################################################LIMMA NORMALISATION OF RAW DATA - DDT 8x15k MICROARRAYS (three way # # comparison, looped design) #

###########################################################################

#Create a number of files for this process:

##Target file – listing the following for each array

#FileNameTreatmentGErepcy3cy5datearraynumberslide

##Spot Type file – containing info on each spot type and designating a #colour to the spot types for subsequent plots i.e.

#SpotTypeControlTypeProbeNameGeneNamecolour

#detoxtarget0DETOX_**brown

#WGtarget0CUST_**black

#CV0CV_A_90_P**yellow

#Brightcorner1GE_BrightCorner*orange

#Darkcorner1DarkCorner*grey

#positivecontrol1**green

#negativecontrol -1*NegativeControlred

##Design file – containing a row for every sample on all arrays; must be in #the same order the arrays will be combined in at a later step i.e. array #numbers and cy3(Green) cy5(Red) order.

#Array Dye SampleGroup

#US84700254_252705710011_S01re-b_GE2_105_Jan09_2_2.txt Cy51ddt

#US84700254_252705710011_S01re-b_GE2_105_Jan09_2_2.txt Cy34akron

#US84700254_252705710007_S01re-b_GE2_105_Jan09_1_2.txt Cy54akron

#US84700254_252705710007_S01re-b_GE2_105_Jan09_1_2.txt Cy38ngou

#US84700254_252705710007_S01re-b_GE2_105_Jan09_2_4.txt Cy58ngou

#US84700254_252705710007_S01re-b_GE2_105_Jan09_2_4.txt Cy32ddt

#US84700254_252705710011_S01re-b_GE2_105_Jan09_1_3.txt Cy52ddt

#US84700254_252705710011_S01re-b_GE2_105_Jan09_1_3.txt Cy36akron

#set working directory – in R using LIMMA & MAANOVA

setwd ("c:/Documents and Settings/LOCATION OF SCAN FILES")

getwd()

#open limma

source("

biocLite("limma")

library(limma)

#read in target file

targets<- readTargets ("DDTTargets.txt", row.names="arraynumber")

targets

#weight spots i.e. remove spots with signal below background signal using #a boolean formula "rIsWellAboveBG" 1=yes 0=no, R&G sig=1 R+G>1

myfun<-function (x) {

a=x[,"rIsWellAboveBG"] == 1

b=x[,"gIsWellAboveBG"] == 1

as.numeric(a+b >= 1)

}

#read in array files using weight function, default Limma setting for #Agilent arrays is foreground mean signal, background median signal

RG<- read.maimages(targets, source="agilent", wt.fun=myfun)

show(RG)

#check all genes and arrays read correctly,

dim(RG)

names(RG$genes)

#order genes so duplicates are next to each other index=order(RG$genes$ProbeName)

RG=RG[index,]

#read in SpotType file

spottypes<- readSpotTypes("DDTSpotTypes.txt")

spottypes

#match spot types to genes in the RG (read array) list ('status'=character #vector giving the control status of each spot on the array)

RG$genes$Status<- controlStatus(spottypes,RG)

#Create a pre-normalisation

MA-plot

#or to create png images to file

plotMA3by2(RG, prefix=”pre-normalised_MA”)

#plot pre-normalised signal intensities

plotDensities(RG)

#save plot image

Jpeg(‘pre-normalised_densities’)

plotDensities(RG)

dev.off()

#background correction

#method="none" then no correction is done, i.e., the background intensities #are treated as zero. The offset can be used to add a constant to the #intensities before log-transforming, so that the log-ratios are shrunk #towards zero at the lower intensities. This may eliminate or reverse the #usual 'fanning' of log-ratios at low intensities associated with local #background subtraction.

RG.b<- backgroundCorrect(RG, method="none", offset=50)

plotMA3by2(RG.b, prefix="backgroundnonoffset50_MA")

#plot background corrected signal intensities

plotDensities(RG.b)

#save plot image

Jpeg(‘backgroundnoneoffset50_densities’)

plotDensities(RG.b)

dev.off()

#weighting different spots to be more/less important during normalisation #i.e. weight differentially expressed genes 0 and genes not meant to be #differentially expressed 1

RG$weights=modifyWeights(RG$weights, RG$genes$Status, spottypes$SpotType, c (rep(1,3), rep(0,4)))

#within array normalisation method loess

MA=normalizeWithinArrays(RG.b, method="loess")

#plot MA for loess normalised data

plotMA3by2(MA,prefix="DDT_background_none_offset50_loess",col=spottypes$colour, zero.weights=TRUE, common.lim=TRUE)

#boxplot – check the distribution of the red and green ratios for each #array to

boxplot(MA$M[MA$weights==1]~col(MA$M)[MA$weights==1], names=colnames(MA$M), las=2, main="nobackgroundoffset50loesswithinDDT", ylab="M", xaxt="none", ylim=c(-10,10))

axis(1, at=c(1:length(targets[,1])), label=targets$Name, las=2)

#plot densities – check that the red and green dye distribution plotDensities(MA)

############################################################################CONVERSION TO MAANOVA FORMAT AND RUNNING MAANOVA #

###########################################################################

RG.corrected<- RG.MA(MA)

MetaRow=1

MetaCol=1

#bind all the arrays together horizontally

ghanaDDT1.raw=as.data.frame(cbind(RG.corrected[,1]$genes[c(7:11, 4)], MetaRow, MetaCol,RG.corrected[,1]$genes[1:2], RG.corrected[,1]$R, RG.corrected[,1]$G, 1-RG.corrected[,1]$weights))

ghanaDDT1.raw.new=as.data.frame(cbind(RG.corrected[,2]$R, RG.corrected[,2]$G, 1-RG.corrected[,2]$weights))