;
; ACD macro for exporting pixel table of integrals from HMBC spectrum
;
;
CheckDocument (Type = "FID"; Group = "")
FullFT (Dataset2 = "Complex"; Apodization2 = "Default"; InitialSize2 = 1; FinalSize2 = 1; Backward2 = "Off"; Forward2 = "Off"; FIDShift2 = "Default"; FirstPointScalingFactor2 = 1.00; Dataset1 = "Default"; Apodization1 = "Default"; InitialSize1 = 1; FinalSize1 = 1; Backward1 = "Off"; Forward1 = "Off"; FIDShift1 = "Default"; FirstPointScalingFactor1 = 1.00)
;
CheckDocument (Type = "Spectrum"; Group = "")
BaselineCorrection (NoiseFactor = 2)
AutoIntegration (Type = "Bucket"; BucketingModel = "NumberOfBuckets"; F2NumberOfBuckets = 128; F1NumberOfBuckets = 256; F2WidthOfBucket = 0.0400; F1WidthOfBucket = 0.9200; Reference = 10000.00)
ExportTable (Table = "Integrals"; Dir = "$(DOCDIR)"; FileName = "$(DOCNAME).int"; IfExist = "Overwrite")
;
;
;
###########################################
##
## Script for 2D-NMR-Data analysis for chemometrics
##
##
#
# this script is importing ACD integral files from 2D-NMR data proceeded with ACD
#
# for using this script all 2D NMR files should be treated the same way:
# same zero filling, same window function, same base line correction, ...
#
# all integral files have to have the same integration borders as well as
# the same number of bucklets (this script will not check for input errors!)
#
# to obtain good results you may use a marco in ACD (see top of the page)
#
# the integral files should be in one folder or in subfolders of a given
# path, please give all integral files the same and unambiguous file extention.
#
# this script will import the integral data into the 2D array ‘integralDAT’
# the first column contains the f2 chemical shift data, the second column the f1
# chemical shift data, all following columns contain the integral data of
# the corresponding bucklets
#
# with ‘plot(integralDAT[,3])’ you plot the integral data of the first
# integral file with an index number as axis of abscissa and intgegral values as ordinate
#
# for a 2D plot of integral data use ‘plot2DNMR’. With the command
#
# plot2DNMR(integralDAT[1,], integralDAT[2,], integralDAT[3,], type="contour")
#
# you get a 2D NMR contour plot having f2 chemical shift data (1H) on x axis and
# f1 chemical shift data (13C) on y axis of the first integral file.
#
# plot2DNMR(integralDAT[1,], integralDAT[2,], integralDAT[4,], type="filledcontour")
#
# plots a contour plot of the bucklet data from the second file.
#
#
# this script uses the ‘pca’ function from the ‘pcaMethods’ package written by
# Stacklies, W., Redestig, H., Scholz, M., Walther, D. and Selbig, J.
# pcaMethods -- a Bioconductor package providing PCA methods for
# incomplete data. Bioinformatics, 2007, 23, 1164-1167
# (DOI: 10.1093/bioinformatics/btm069)
#
# it is publicly available at
#
# additional to the standard plot function ‘plotPcs’ of this package
# this script provides you with some extra plot versions
# for the loading and score plots the first and second principle
# component are choosen as default. You can easily change it by
# changing the variables ‘pc1’ and ‘pc2’
#
# you can also adapt the trash level of the loadings by
# changing ‘maxpc1POS’ and ‘maxpc1NEG’
#
#
#
# please feel free to adapt this script to your own needs
#A DOWNLOAD WILL BE PROVIDED AT THE IPB HOMEPAGE ( in the databasestools section)
###########################################
###
### setup
### set working directory and file extention
###
# set working directory (path of the ACD integral files)
# use slash not back slash
FilePath <- "D:/Humulus/2D-NMR/Intdata/Set-01"
# set ending of the ACD integral files
FilePattern <- ".int"
#
###########################################
###
### function plot2DNMR
### var: xaxis, yaxis: chemical shifts
###
### function to plot an integral data file
### like a 2D NMR output
### (x axis F2 (1H), y axis F1 (13C))
###
###
plot2DNMR <- function(xaxis=NA, yaxis=NA, intvalue=NA, type="contour"){
intvalue <- matrix(data = intvalue, nrow = dim(table(yaxis)), ncol = dim(table(xaxis)))
mirrowIntValues <- intvalue
for(x in 1:dim(intvalue)[1]){mirrowIntValues[dim(intvalue)[1]+1-x, ] <- intvalue[x, ]}
for(x in 1:dim(intvalue)[2]){intvalue[, dim(intvalue)[2]+1-x] <- mirrowIntValues[, x]}
intvalue <- t(intvalue)
if(type=="contour"){
### contour plot
contour(intvalue, axes=F, main="HMBC", xlab="F2 Chemical Shift (ppm)", ylab="F1 Chemical Shift (ppm)")
axis(side = 1, at= seq(0,1,0.1), labels = round(digits=1, seq(from = max(xaxis), to = min(xaxis), length.out=11)))
axis(side = 2, at= seq(0,1,0.1), labels = round(seq(from = max(yaxis), to = min(yaxis), length.out=11)))
box()
} # end if
else{
### filled contour plot
filled.contour(log(intvalue), color = cm.colors,
plot.title = title(main = "HMBC",
xlab = "F2 Chemical Shift (ppm)",
ylab = "F1 Chemical Shift (ppm)" ),
key.title = title(main="log\nintensity"),
key.axes = axis(4, seq(-4, 6, length.out=11)),
plot.axes = {
axis(side = 2, at= seq(0,1,0.1), labels = round(seq(from = max(yaxis), to = min(yaxis), length.out=11)))
axis(side = 1, at= seq(0,1,0.1), labels = round(digits=1, seq(from = max(xaxis), to = min(xaxis), length.out=11)))
}
)
} # end else
}
### end function plot2DNMR
###
###########################################
###########################################
###
### function readNMRIntList2D
### var: Filename,Path
### result: data.frame (ppm:num, value:num)
###
### function to import an ACD integral list
### (Table of Integrals)
###
### assume, that the file to be imported has the right file format
### line 1: Headline "Table of Integrals" to be skipped
### line 2: Row Headlines: "No.", "F2 (ppm)", "F1 (ppm)", "Value" and "Abs. Value",
### to be skipped
###
### no error correction
readNMRIntList2D <- function(Path=NA, FileName=NA){
if(is.na(FileName))
{cat("readNMRIntList(FileName)\nno filename given\n")
NA}
else{
intDATA <- scan(file=paste(Path, FileName, sep="/"), dec=".", skip=2, what="character")
# remove "[" and "]"
intDATA <- sub("[", intDATA, replacement="", fixed=T)
intDATA <- sub("]", intDATA, replacement="")
length(intDATA)
intDATA <- array(data=intDATA, dim=c(9,length(intDATA)/9))
# delete row 1 (Integral Number), and 3, 6 ("..")
# use only rows 2 (F2 start), 4 (F2 end), 5 (F1 start), 7 (F1 end),
# 8 (Integral Value) and 9 (Absolute Integral Value)
intDATA <- intDATA[c(2,4,5,7:9),]
intDATA <- array(data=as.real(intDATA), dim = dim(intDATA))
rownames(intDATA) <- c("F2a","F2b","F1a","F1b","Int","absInt")
# don’t use start and end integral borders, use the mean
intDATA[1,] <- (intDATA[1,] + intDATA[2,])/2 ## mean F2
intDATA[3,] <- (intDATA[3,] + intDATA[4,])/2 ## mean F1
intDATA <- intDATA[c(1,3,5,6), ]
rownames(intDATA) <- c("F2","F1","Int","absInt")
cat(paste(sep="", "F2: ", length(table(intDATA[1,])), " (", min(intDATA[1,]), " - ", max(intDATA[1,]), " ppm) x F1: ",
length(table(intDATA[2,])), " (", min(intDATA[2,]), " - ", max(intDATA[2,]), " ppm) ", "\n"))
intDATA
}#end else
}
### end function readNMRIntList
###
###########################################
###########################################
###
### read all NMR 2D DATA found in
### FilePath with the given pattern
###
FileList <- list.files(path = FilePath, pattern = FilePattern, recursive=TRUE)
cat("*", length(FileList), "files found\n");
cat("* file 1: ", FileList[1],"\n")
# use only F1, F2 and absInt
integralDAT <- readNMRIntList2D(FilePath, FileList[1])[c(1:2, 4), ]
rownames(integralDAT)[3] <- FileList[1]
for(x in 2:length(FileList)){ ### loop to load all found integral files
## !!! no error check, assume, that all integral files have the dimension and bucket size
## please check the output, all files should have same
## number of items and same F1 and F2 dimensions
cat("* file ", x, ": ", FileList[x],"\n")
integralDAT <- rbind(integralDAT, readNMRIntList2D(FilePath, FileList[x])[4, ])
cat("\n")
rownames(integralDAT)[x+2] <- FileList[x]
# plot2DNMR(integralDAT[1,], integralDAT[2,], integralDAT[x,], type="contour")
} ## end of loop
# name rows and cols of the integral data matrix
rownames(integralDAT) <- sub(FilePattern, rownames(integralDAT), replacement="", fixed=T)
colnames(integralDAT) <- paste(sep="", "H", format(round(integralDAT[1,],2),trim=T,nsmall=2), "/C",format(round(integralDAT[2,],1),trim=T,nsmall=1))
###########################################
## calculate a mean hop file
##
meanIntegralDAT <- integralDAT[1:3, ]
meanIntegralDAT[3,] <- apply(integralDAT[-c(1,2),], 2, mean)
rownames(meanIntegralDAT)[3] <- "mean"
###########################################
## 2D-NMR-PLOT of all files
##
for(x in 3:length(FileList)){
plot2DNMR(integralDAT[1,], integralDAT[2,], integralDAT[x,], type="contour")
}
plot2DNMR(meanIntegralDAT[1,], meanIntegralDAT[2,], meanIntegralDAT[3,], type="contour")
###########################################
## hierarchical clustering
##
hc.result <- hclust(dist(integralDAT[-c(1,2),]), "ave")
plot(hc.result)
###########################################
## PCA
##
##
library(pcaMethods)
pca.results <- pca(integralDAT[-c(1,2),], nPcs=3)
plotPcs(pca.results, type="scores")
## score plots
# plot dots
plot(pca.results@scores[,1:2], lwd=3, pch=19, col="lightgray")
# print names
plot(pca.results@scores[,1:2], type="n"); abline(v=0, h=0, col="lightgray")
text(x=pca.results@scores[,1], y=pca.results@scores[,2],
rownames(integralDAT)[-c(1,2)], cex=1.5)
## loading plot
plotPcs(pca.results, type="loadings")
# loading 1 against 2
plot(pca.results@loadings[,1:2])
#######
##
## plot PCA with legend
windows(width = 9, height = 6, pointsize = 12)
pc1<-1; pc2<-2 # select principal component of choice
plot(x=pca.results@scores[,pc1],y=pca.results@scores[,pc2], xlab=paste(sep="","PC ", pc1," (",round(pca.results@R2[pc1]*100, digits = 1),"%)"),ylab=paste(sep="","PC ", pc2," (",round(pca.results@R2[pc2]*100, digits = 1),"%)"),main="PCA: 2D NMR", cex=3,pch=19, col=1:pca.results@nObs)
legend("bottomright", legend = names(table(rownames(pca.results@scores))), pch=19, pt.cex=1.5, col=1:pca.results@nObs)
text(x=pca.results@scores[,pc1],y=pca.results@scores[,pc2], rownames(integralDAT)[-c(1,2)])
#------
######
### plot loadings
### select principal component
pc1 <- 1
### choose trash level
maxpc1POS <- 0.08 # trash level for positive pc loadings
maxpc1NEG <- -0.09 # trash level for negative pc loadings
### OR
#### select the *10* most importent negative and positive loadings
maxpc1POS <- sort(pca.results@loadings[,pc1], decreasing=T)[11]
maxpc1NEG <- sort(pca.results@loadings[,pc1], decreasing=F)[11]
#### Plot Version 1
#### plot all loadings, x axis is just an index
#### colorize importend loadings green (pos) and red (neg)
plot(x=pca.results@loadings[,pc1],type="h",ylab=paste(sep="","PC ",pc1," Loadings (",round(pca.results@R2[pc1]*100, digits = 1),"%)"),xlab="Peak Index",main="HMBC")
abline(h=0,col="gray")
extrempos<-which(pca.results@loadings[,pc1] > maxpc1POS)
length(extrempos)
abline(h= maxpc1POS,col="gray")
points(y=pca.results@loadings[extrempos,pc1],x=extrempos, cex=3,pch=21,lwd=3,col="lightgray")
points(y=pca.results@loadings[extrempos,pc1],x=extrempos, cex=3,pch=21,lwd=3,col="green",type="h")
extremneg<-which(pca.results@loadings[,pc1] < maxpc1NEG)
length(extremneg)
abline(h=maxpc1NEG,col="gray")
points(y=pca.results@loadings[extremneg,pc1],x=extremneg, cex=3,pch=21,lwd=3,col="lightgray")
points(y=pca.results@loadings[extremneg,pc1],x=extremneg, cex=3,pch=21,lwd=3,col="red",type="h")
text(x=dim(integralDAT)[2]/50,y= max(pca.results@loadings[,pc1]),paste(length(extrempos),"Peaks"),col="green")
text(x=dim(integralDAT)[2]/50,y= min(pca.results@loadings[,pc1]),paste(length(extremneg),"Peaks"),col="red")
text(y=pca.results@loadings[extrempos,pc1],x=extrempos, colnames(integralDAT)[extrempos],col="green")
text(y=pca.results@loadings[extremneg,pc1],x=extremneg, colnames(integralDAT)[extremneg],col="red")
cbind(extrempos,pca.results@loadings[extrempos,pc1])
cbind(extremneg,pca.results@loadings[extremneg,pc1])
##
#### Plot Version 2
#### plot loadings as 2D NMR Spectrum
####
plot2DNMR(integralDAT[1,], integralDAT[2,], pca.results@loadings[,pc1], type="countour")
plot2DNMR(integralDAT[1,], integralDAT[2,], pca.results@loadings[,pc1], type="filled.countour")
#### Plot Version 3
#### plot 2D NMR with overlay
####
plot2DNMR(integralDAT[1,], integralDAT[2,], meanIntegralDAT[3,])
points(x=(max(integralDAT[1,]) - integralDAT[1, extrempos])/(max(integralDAT[1,])-min(integralDAT[1,])),y=(max(integralDAT[2,]) - integralDAT[2, extrempos])/(max(integralDAT[2,])-min(integralDAT[2,])),, col="green",lwd=2,cex=2)
points(x=(max(integralDAT[1,]) - integralDAT[1, extremneg])/(max(integralDAT[1,])-min(integralDAT[1,])),y=(max(integralDAT[2,]) - integralDAT[2, extremneg])/(max(integralDAT[2,])-min(integralDAT[2,])),, col="red",lwd=2,cex=2)
points(x=(max(integralDAT[1,]) - integralDAT[1, extrempos])/(max(integralDAT[1,])-min(integralDAT[1,])),y=(max(integralDAT[2,]) - integralDAT[2, extrempos])/(max(integralDAT[2,])-min(integralDAT[2,])),, col="green",lwd=2,cex=2)