Molecular Mechanics-Based Measures of Steric Effects: Customized Code to Compute Ligand Repulsive Energies
Robert J. Bubel[a], Warthen Douglass, and David P. White*[b]
Department of Chemistry, University of North Carolina at Wilmington, 601 South College Road, Wilmington, NC 28403-3297
Supplementary Material: Ercode.cpp
#include<ctype.h>
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<math.h>
#define TRUE 1
#define FALSE 0
/**********************GLOBAL VARIABLES*******************************/
struct nodetag
{ // each atom or LP in the molecule has one of
int number; // these structures dedicated to them
float x, y, z; // coordinates
char type[10]; // forcefield type
float r0, d0; // constants from parameter file
int side; // parameter indicating side ligand side = 0
// fragment side = 1
struct nodetag *next;
};
typedef nodetag *nodeptr;
nodeptr bgf_list; // global pointer to beginning of data
char rlist[230][100]; // 2d char array of parameter file
int cons[100][25]; // replica of connectivity table in interger form
int m[100][100]; // this is the binary array, may want to make it
// bigger in case of molecules with more than
// 99 atoms
float unitx, unity, unitz; // unit vector of re bond
float re; // length of re bond
/***********************END GLOBAL VARIABLES*******************************/
void init_ref(void)
// opens the parameter file and stores the constants in the rlist
{
int i=0;
int j=0;
char buf[45]="Forcefields (R0 / D0)\n";
FILE *stream;
stream=fopen("param_r.set","rt"); //open parameter file
if (!stream)
{printf("File not opened! \n");
exit(1); //program will not work
}
do
{ fgets(rlist[i], 100, stream); // useless info(top of file)
i++;
}
while(strcmp(buf, rlist[i-1])); // until we get to first line
do
{ fgets(rlist[j], 100, stream); // save these data to rlist
//
j++;
}while(rlist[j-1][0] != 'E'); // ends at END
}/*end init_ref*/
/*****************************************************************/
void menu(void)
{
printf( "======\n"
"Available commands are:\n"
" <1> calculate for *.bgf(start or continue)\n"
" <0> quit \n"
"======\n");
}/*end menu*/
/**********************************************************************/
void getcomm(int &command)
{
printf("Enter command: ");
scanf("%d",&command); // get the command
}/*end getcomm
/******************************************************************/
void connections(char conect[], int cons[])
//receives one line of chars from connectivity table translates it to ints
{
int i, j , k, l;
l=0;
k=9;
char cx[6]; //store 1 char # translated into int
i = strlen(conect);
do{ j=0;
do{ cx[j]=conect[k];
k++; j++;
}while (j<5); // cover five places for each number
cons[l] = atoi(cx); // convert it to an interger
l++;
k++;
}while(k<i+1); // is string is at the end?
cons[l]=-1; // indicates the end of connnections
}/*end connections*/
/********************************************************************/
void find_constant(nodeptr p, char type[])
//uses rlist to find r0 and d0 based on forcefield type
{
char rx[7], dx[11]; // char r0 and d0 to become floats
int ptr=1; // flag for when the proper forcefield
// was found in the parameter file
int i;
int j=0, d=0;
while (rlist[d][0] != 'E' & ptr !=0) // until we reach a match or the end
{ ptr = strncmp(rlist[d],type,5);
d++;
}
if (ptr != 0) // if none is found
{ printf("Constant not available for ");
for(i=0;i<6;i++) printf("%c", type[i]);
printf("\n");
printf("please enter proper values \nr0: ");
scanf("%f", &p->r0); // prompt user for own values
printf("d0: ");
scanf("%f", &p->d0);
}else // type found
{
for (i=27; i<35; i++)
{ rx[j]=rlist[d-1][i]; // save constants as chars
j++;
}
j=0;
for (i=35; i<47; i++)
{ dx[j]=rlist[d-1][i];
j++;
}
p->r0 = atof(rx); // convert them to floats
p->d0 = atof(dx);
}
}/*end find_constant*/
/************************************************************************/
void fill_side_array(int side_array[], int current_line[], int i)
//recursive traverses through atoms on frag side indicates each atom with a 1
{
if(side_array[current_line[0] - 1] != -1)// we only replace -1's with 1's
; //stops from placing a 1 over a 0
else
{ side_array[current_line[0] - 1] = 1; // put a 1 at current position
i=1;
while(current_line[i] != -1) //go through all the numbers that
//are attached to current line
{ fill_side_array(side_array, cons[current_line[i] - 1], i);
i++;
}
}
}/*end fill_side_array*/
/***********************************************************************/
float dist_form(float x1,float y1,float z1,float x2,float y2, float z2)
// given the coordinates of two atoms, find the distance between
{
double half;
float dist;
half = (x1-x2)*(x1-x2)+(y1-y2)*(y1-y2)+(z1-z2)*(z1-z2);
dist = sqrt(half); // find distance
return dist;
}/*end dist_form*/
/**********************************************************************/
void get_unit(int lig, int frag)
// given two atom #'s lookup the info and find unit vector between
{
float distance;
nodeptr t1 = bgf_list, t2 = bgf_list;
while(t1->number != lig) // lookup both atoms based on atom #
t1 = t1->next;
while(t2->number != frag)
t2 = t2->next;
distance = dist_form(t1->x, t1->y, t1->z, t2->x, t2->y, t2->z);
unitx = ((t1->x - t2->x)/distance);
unity = ((t1->y - t2->y)/distance);
unitz = ((t1->z - t2->z)/distance);
re = distance; // equilibrium re bond length
}/*end get_unit*/
/**********************************************************************/
int connected(int ligptr, int frag)
// returns a 1 if the two atom #'s are connected
{
int j=0;
int i=0;
do
{ if (cons[ligptr][i] == frag) // check for connectivity in cons[]
j = 1;
i++;
}while(cons[ligptr][i] != -1);
return j;
}/*end connected()*/
/***********************************************************************/
int find_side(int numatoms)
// uses the connections table>which atoms make up ligand side? fragment side?
// returns a 1 on success, returns 0 if numbers given by user are bad
{
int side_array[100];
int lig, frag, i;
nodeptr t;
int number;
number = bgf_list->number; // number of atoms on list
printf("Which atom number is on"
" the ligand side of the re bond (donor atom): ");
scanf("%d", &lig); // prompt user for atom #'s
printf("Which atom number is on the fragment side of the re bond: ");
scanf("%d", &frag);
if (lig>numatoms || frag>numatoms)
{ printf("You have entered one of the numbers incorrectly! \n");
return 0; // too high a number
} // prevents program from crashing
else
if (!connected(lig-1, frag)) // checks to see if atoms are connected
{ printf("These atoms are not connected! \n");
return 0;
}
else
{ for(i=0; i<99; i++)
side_array[i] = -1; // initialize side_array to all -1's
i=0;
while(cons[lig-1][i] != -1) // atoms connected directly to it
{ if(cons[lig-1][i] == frag) // except the frag atom
;
else
side_array[cons[lig-1][i]-1] = 0; // identifies this side w/ a 0
i++;
}
i=0;
fill_side_array(side_array, cons[frag-1], i); // go through all atoms
i=0; // on fragment side
while(i<99)
{ if(side_array[i] == -1)
side_array[i] = 0; // anything left must be on ligand side
i++;
}
i=number-1;
t = bgf_list;
while(t != NULL)
{ t->side = side_array[i]; // puts proper side value into
i--; // corresponding node
t = t->next;
}
get_unit(lig, frag); // unit vector between the atoms for
} // when we change re bond length
return 1; // 1 indicates a success
}/*end find_side*/
/********************************************************************/
void format_m(int amount)
// stops program from doing calculations twice if[x,y]==1 -> [y,x] gets 0
{ int i=0;
int j=0;
while(i < amount)
{
while(j < amount)
{ if (m[i][j] == 1)
{ if (m[j][i] == 0) printf("fatal error"); //should never happen
if (m[j][i] == 1)
m[j][i] = 0; // switches opposite
// member on matrix to 0
}
j++;
}
i++;
j=0;
}
}/*end format_m*/
/*********************************************************************/
void find_connections(FILE *stream)
// function used to create m[][]-- binary array
// binary array is a square matrix [#atoms x #atoms]
{ int k; int amount=0;
int w, i;
char conect[100];
fgets(conect,100,stream);
do{
if (conect[0] == 'O') // does not look at the ORDER lines
/*do nothing*/;
else
{ connections(conect, cons[amount]); // translate line to ints
amount++;
}
fgets(conect,100,stream);
}while(conect[0] != 'E'); // ends when the END string is read
/*======*/
for(w=0; w< amount; w++) //a binary array m[#][] for each atom
{ k=0;
do{ // 0 -> bonded relationship(1-2),(1-3)
m[w][k]=1; // 1 -> non-bonded
k++;
}while(k < amount); //initializes binary array to all 1's
m[w][k]=-1; //defines last member of array
k = w;
i=0;
int d=0;
do
{ do
{ m[w][cons[k][i]-1]=0; //which atoms have a 1-1 1-2 or 1-3
i++;
}while(cons[k][i] != -1); // 1-1 or 1-2?
d++;
k=(cons[w][d])-1;
i=0;
}while(k != -2); // 1-3 relationship?
}
format_m(amount); // fix matrix prevent calculation 2x
}/*end find_connections*/
/************************************************************************/
int get_data(nodeptr &p, char input[])
// get individual values from 1 line of input
{ int i;
int j=0;
char number[5], x[11], y[11], z[11], type[10];
if(!strncmp("HETATM",input,6)) //test if information is valid
{ for(i=8; i<13; i++){number[j] = input[i]; j++;} j=0; // all of these
for(i=31; i<41; i++){x[j] = input[i]; j++;} j=0; // just put the
for(i=41; i<51; i++){y[j] = input[i]; j++;} j=0; // important data
for(i=51; i<61; i++){z[j] = input[i]; j++;} j=0; // in small
for(i=60; i<67; i++){type[j] = input[i]; j++;} // character strings
p = (nodeptr)malloc(sizeof(nodetag));
if(p==NULL) // malloc didn't give us the mem
{ printf("ran out of memory \n");
printf("please exit program, all previous data is still valid");
return 0; //allows user to exit program smoothly
}
p->number = atoi(number);
p->x = atof(x);
p->y = atof(y);
p->z = atof(z);
strcpy(p->type, type);
find_constant(p, p->type); // determine R0 and D0
return 1;
}
else return 0; // if the information is invalid
}/*end get_data*/
/***********************************************************************/
float vander(nodeptr t1, nodeptr t2, float factor, float r0, float d0)
// receives pointers to two atoms, determine the side, move them
// appropriate distance, calculate and return Evdw,R
{ float coord1[3]; // temp coordinates so we don't change
float coord2[3]; // original value on list
float dis, vander_out;
if(t1->side == 1)
{ coord1[0] = (factor*unitx) + t1->x; //if the atom is on the fragment
coord1[1] = (factor*unity) + t1->y; //side->move it by move it along
coord1[2] = (factor*unitz) + t1->z; //unit vector a distace of factor
}
else
{ coord1[0] = t1->x; // then it is on the ligand side so
coord1[1] = t1->y; // we use the same coordinates
coord1[2] = t1->z;
}
if(t2->side == 1)
{ coord2[0] = (factor*unitx) + t2->x;
coord2[1] = (factor*unity) + t2->y;
coord2[2] = (factor*unitz) + t2->z;
}
else
{ coord2[0] = t2->x;
coord2[1] = t2->y;
coord2[2] = t2->z;
}
dis=dist_form(coord1[0], coord1[1], coord1[2],
coord2[0], coord2[1], coord2[2]); // get distance
vander_out = (d0 * exp(12.5*(1 - (dis/r0)))); // EvdW,R formula
return vander_out;
}/*end vander*/
/*************************************************************************/
void vdw_formula(nodeptr p, int atom1, int atom2, float vdw[])
// receive two atom #'s, lookup info on them calculate 7 different Evdw,R
// for the pair, add it to the total
{ float d0, r0;
nodeptr t1=p, t2=p;
while(t1->number != atom1) // look up info on each atom
t1 = t1->next;
while(t2->number != atom2)
t2 = t2->next;
d0 = sqrt(t1->d0 * t2->d0); // D0-geometric mean of both constants
r0 = (t1->r0 + t2->r0)/2; // R0-arithmetic mean of " "
vdw[0] = vdw[0] + vander(t1, t2, 0, r0, d0); // EvdW,R with no change
// in re bond length
vdw[1] = vdw[1] + vander(t1, t2, .01, r0, d0); // EvdW,R with bond lenth
// increased by .01 angstroms
vdw[2] = vdw[2] + vander(t1, t2, -.01, r0, d0); // EvdW,R with bond lenth
// decreased by .01 angstroms
vdw[3] = vdw[3] + vander(t1, t2, .02, r0, d0); // increased .02
vdw[4] = vdw[4] + vander(t1, t2, -.02, r0, d0); // decreased .02
vdw[5] = vdw[5] + vander(t1, t2, .03, r0, d0);
vdw[6] = vdw[6] + vander(t1, t2, -.03, r0, d0);
}/*end vdw_formula*/
/******************************************************************/
void vanderwahls(nodeptr p, float vdw[])
// examines binary array searching for 1's, sends two atom #'s
// for which seven Evdw,R values are going to be calculated
{ int number, i, j;
number = p->number; // last atom number
i=0; j=0;
while(i<number)
{
while(j<number) // every member of binary array
{ if(m[i][j] == 1)
{ vdw_formula(p, i+1, j+1, vdw);// if a 1, perform calc on it
j++;
}
else
j++; // next element on current row
}
j=0;
i++; // next row
}
}/*end vanderwahls*/
/************************************************************/
void bond_shrinkage(void)
// this function searches for H__C bonds and shrinks bond length by 0.915
{ nodeptr t1, t2;
float distance, ux, uy, uz, n_distance;
int atom1, atom2;
t1 = bgf_list;
t2 = bgf_list;
while(t1 != NULL)
{ if(!strncmp(" H__C ",t1->type,7)) // look for H__C forcefield types
{ atom1 = t1->number; // if it finds one identify the H
atom2 = cons[atom1-1][1]; // and the C
while(t2->number != atom2)
t2 = t2->next; //lookup info about C atom
distance = dist_form(t1->x, t1->y, t1->z, t2->x, t2->y, t2->z);
ux = ((t1->x - t2->x)/distance);
uy = ((t1->y - t2->y)/distance); // find unit vector from C to H
uz = ((t1->z - t2->z)/distance);
n_distance = distance * .915; // decrease distance by 0.915
t1->x = n_distance*ux + t2->x;
t1->y = n_distance*uy + t2->y; // add new vector to C coordinates
t1->z = n_distance*uz + t2->z;
}
t1 = t1->next; // go on looking for more
t2 = bgf_list;
}
}/*end bond_shrinkage*/
/*************************************************************************/
void find_er(float vdw[], char filename[], FILE *out)
//calculates Ligand Repulsive Energy
{ float sxx, sxy, slope, er, avg;
avg = (vdw[0] + vdw[1] + vdw[2] + vdw[3] + vdw[4] + vdw[5] + vdw[6])/7;
sxx = .0028;
sxy = (.01*(vdw[1]-avg))+(-.01*(vdw[2]-avg))+(.02*(vdw[3]-avg))+
(-.02*(vdw[4]-avg))+(.03*(vdw[5]-avg))+(-.03*(vdw[6]-avg));
slope = sxy/sxx; // average slope formula
er = re*slope;
fprintf(out,"%s ", filename); // sends filename to outfile
fprintf(out,"%f \n", er); // sends Er to outfile
}/*end find_er*/
/*********************************************************************/
void free_mem(void)
// traverses through list deleting all nodes to get mem back
{ nodeptr t1,t2;
t1=bgf_list;
t2=bgf_list->next;
while(t2->next != NULL)
{ free (t1);
t1 = t2;
t2 = t2->next;
}
}/*end free_mem*/
/********************************************************************/
void adjust_filename(char filename[])
// receives user input file name, checks and adds *.bgf extension
{ int len;
len = strlen(filename);
if (toupper(filename[len-1]) == 'F' & //if user entered file extension
toupper(filename[len-2]) == 'G' &
toupper(filename[len-3]) == 'B' &
filename[len-4] == '.')
; // do nothing
else
{
char end[4] = ".bgf";
strncat(filename, end, 4); // attach .bgf to end if filename
}
}/*end adjust_filename*/
/********************************************************************/
void bfg(FILE *out, int on_off)
// essentially bgf file driver
{ int done=0;
while(!done) // this while loop allows user to calc
// several files without having to go
// back to menu
{
char filename[50];
char buf[30]="FORMAT CONECT (a6,12i6)\n";// when to stop reading
char input[100]; // holds data temporarily
int numatoms;
int i;
float vdw[7]; // to find Er we need seven different
// values for Evdw
for(i=0; i<7; i++) vdw[i] = 0; // Evdw,R is a summation of all
// individual Evdw's
// so we start each value at zero
nodeptr p;
bgf_list=NULL;
printf("Enter the filename: ");
scanf("%s",filename);
adjust_filename(filename); // this adds the .bgf to the end of
// file so user doesn't need enter it
FILE *stream;
stream=fopen(filename,"rt");
if (!stream)
{printf("File not opened! \n");
done = 1; // go back to the menu
}
else
{
do{
fgets(input, 101, stream);
if(get_data(p, input)) // check if information is useable
// if it is, puts information in p
{p->next = bgf_list;
bgf_list = p; // bgf_list is the real list we use
}
}while(strcmp(buf, input));
numatoms = bgf_list->number;
find_connections(stream); //function uses connectivity table to
// create binary array
if(find_side(numatoms)) // this function is used to tell which
// side of the re each atom is on
{
if (on_off == 1)
bond_shrinkage();//allows for C__H bond shrinkage
vanderwahls(bgf_list, vdw); // simultaneously solves
// for seven values of repulsive
// vanderwahls energy by
// adjusting xyz coordinates of
// individual atoms
find_er(vdw, filename, out);// calculates Er and sends it to ouput
// file along with file name
}
free_mem(); //clears up memory so we can do it again
fclose(stream); //close the current input file stream
}
}
}/*end bfg()*/
void driver()
// gets the ouput filename, options...
{ int done; //finish flag
int command;
int on_off = -1;
char shrink[1];
char outfile[20]; // file for data output
printf("Please enter a filename for the data: ");
scanf("%s",outfile);
FILE *out;
if ((out = fopen(outfile, "wt"))
== NULL)
{
fprintf(stderr, "Cannot open output file.\n");
exit(1);
}
done = FALSE;
do {
printf("Do you want SHRINK_CH_BONDS on? (y/n) \n");
scanf("%s",&shrink);
if(toupper(shrink[0]) == 'Y')
on_off = 1;
else
if(toupper(shrink[0]) == 'N')
on_off =0;
}while (on_off == -1);
menu();
while (!done)
{ getcomm (command);
if (command == 1)
{ bfg(out, on_off); }
else
if (command == 0)
done = TRUE;
menu();
}
fclose(out);
}/*end driver*/
/*********************************************************************/
void main()
{ printf("ERCODE. \n"
"When using this work, please reference \n"
"Robert J. Bubel \n"
"T. Warthen Douglass and \n"
"David P. White \n"
"Journal of Computational Chemistry \n"
"submitted for publication, 1999 \n");
init_ref();//this fills up char rlist[155][100] with information in
//parameter file
driver(); //this is the main part of the program
printf("Bye ! \n");
}
1
[a] Undergraduate student from the State University of New York at New Paltz
[b] Email address: