Molecular Mechanics-Based Measures of Steric Effects: Customized Code to Compute Ligand Repulsive Energies

Robert J. Bubel[a], Warthen Douglass, and David P. White*[b]

Department of Chemistry, University of North Carolina at Wilmington, 601 South College Road, Wilmington, NC 28403-3297

Supplementary Material: Ercode.cpp

#include<ctype.h>

#include<stdio.h>

#include<stdlib.h>

#include<string.h>

#include<math.h>

#define TRUE 1

#define FALSE 0

/**********************GLOBAL VARIABLES*******************************/

struct nodetag

{ // each atom or LP in the molecule has one of

int number; // these structures dedicated to them

float x, y, z; // coordinates

char type[10]; // forcefield type

float r0, d0; // constants from parameter file

int side; // parameter indicating side ligand side = 0

// fragment side = 1

struct nodetag *next;

};

typedef nodetag *nodeptr;

nodeptr bgf_list; // global pointer to beginning of data

char rlist[230][100]; // 2d char array of parameter file

int cons[100][25]; // replica of connectivity table in interger form

int m[100][100]; // this is the binary array, may want to make it

// bigger in case of molecules with more than

// 99 atoms

float unitx, unity, unitz; // unit vector of re bond

float re; // length of re bond

/***********************END GLOBAL VARIABLES*******************************/

void init_ref(void)

// opens the parameter file and stores the constants in the rlist

{

int i=0;

int j=0;

char buf[45]="Forcefields (R0 / D0)\n";

FILE *stream;

stream=fopen("param_r.set","rt"); //open parameter file

if (!stream)

{printf("File not opened! \n");

exit(1); //program will not work

}

do

{ fgets(rlist[i], 100, stream); // useless info(top of file)

i++;

}

while(strcmp(buf, rlist[i-1])); // until we get to first line

do

{ fgets(rlist[j], 100, stream); // save these data to rlist

//

j++;

}while(rlist[j-1][0] != 'E'); // ends at END

}/*end init_ref*/

/*****************************************************************/

void menu(void)

{

printf( "======\n"

"Available commands are:\n"

" <1> calculate for *.bgf(start or continue)\n"

" <0> quit \n"

"======\n");

}/*end menu*/

/**********************************************************************/

void getcomm(int &command)

{

printf("Enter command: ");

scanf("%d",&command); // get the command

}/*end getcomm

/******************************************************************/

void connections(char conect[], int cons[])

//receives one line of chars from connectivity table translates it to ints

{

int i, j , k, l;

l=0;

k=9;

char cx[6]; //store 1 char # translated into int

i = strlen(conect);

do{ j=0;

do{ cx[j]=conect[k];

k++; j++;

}while (j<5); // cover five places for each number

cons[l] = atoi(cx); // convert it to an interger

l++;

k++;

}while(k<i+1); // is string is at the end?

cons[l]=-1; // indicates the end of connnections

}/*end connections*/

/********************************************************************/

void find_constant(nodeptr p, char type[])

//uses rlist to find r0 and d0 based on forcefield type

{

char rx[7], dx[11]; // char r0 and d0 to become floats

int ptr=1; // flag for when the proper forcefield

// was found in the parameter file

int i;

int j=0, d=0;

while (rlist[d][0] != 'E' & ptr !=0) // until we reach a match or the end

{ ptr = strncmp(rlist[d],type,5);

d++;

}

if (ptr != 0) // if none is found

{ printf("Constant not available for ");

for(i=0;i<6;i++) printf("%c", type[i]);

printf("\n");

printf("please enter proper values \nr0: ");

scanf("%f", &p->r0); // prompt user for own values

printf("d0: ");

scanf("%f", &p->d0);

}else // type found

{

for (i=27; i<35; i++)

{ rx[j]=rlist[d-1][i]; // save constants as chars

j++;

}

j=0;

for (i=35; i<47; i++)

{ dx[j]=rlist[d-1][i];

j++;

}

p->r0 = atof(rx); // convert them to floats

p->d0 = atof(dx);

}

}/*end find_constant*/

/************************************************************************/

void fill_side_array(int side_array[], int current_line[], int i)

//recursive traverses through atoms on frag side indicates each atom with a 1

{

if(side_array[current_line[0] - 1] != -1)// we only replace -1's with 1's

; //stops from placing a 1 over a 0

else

{ side_array[current_line[0] - 1] = 1; // put a 1 at current position

i=1;

while(current_line[i] != -1) //go through all the numbers that

//are attached to current line

{ fill_side_array(side_array, cons[current_line[i] - 1], i);

i++;

}

}

}/*end fill_side_array*/

/***********************************************************************/

float dist_form(float x1,float y1,float z1,float x2,float y2, float z2)

// given the coordinates of two atoms, find the distance between

{

double half;

float dist;

half = (x1-x2)*(x1-x2)+(y1-y2)*(y1-y2)+(z1-z2)*(z1-z2);

dist = sqrt(half); // find distance

return dist;

}/*end dist_form*/

/**********************************************************************/

void get_unit(int lig, int frag)

// given two atom #'s lookup the info and find unit vector between

{

float distance;

nodeptr t1 = bgf_list, t2 = bgf_list;

while(t1->number != lig) // lookup both atoms based on atom #

t1 = t1->next;

while(t2->number != frag)

t2 = t2->next;

distance = dist_form(t1->x, t1->y, t1->z, t2->x, t2->y, t2->z);

unitx = ((t1->x - t2->x)/distance);

unity = ((t1->y - t2->y)/distance);

unitz = ((t1->z - t2->z)/distance);

re = distance; // equilibrium re bond length

}/*end get_unit*/

/**********************************************************************/

int connected(int ligptr, int frag)

// returns a 1 if the two atom #'s are connected

{

int j=0;

int i=0;

do

{ if (cons[ligptr][i] == frag) // check for connectivity in cons[]

j = 1;

i++;

}while(cons[ligptr][i] != -1);

return j;

}/*end connected()*/

/***********************************************************************/

int find_side(int numatoms)

// uses the connections table>which atoms make up ligand side? fragment side?

// returns a 1 on success, returns 0 if numbers given by user are bad

{

int side_array[100];

int lig, frag, i;

nodeptr t;

int number;

number = bgf_list->number; // number of atoms on list

printf("Which atom number is on"

" the ligand side of the re bond (donor atom): ");

scanf("%d", &lig); // prompt user for atom #'s

printf("Which atom number is on the fragment side of the re bond: ");

scanf("%d", &frag);

if (lig>numatoms || frag>numatoms)

{ printf("You have entered one of the numbers incorrectly! \n");

return 0; // too high a number

} // prevents program from crashing

else

if (!connected(lig-1, frag)) // checks to see if atoms are connected

{ printf("These atoms are not connected! \n");

return 0;

}

else

{ for(i=0; i<99; i++)

side_array[i] = -1; // initialize side_array to all -1's

i=0;

while(cons[lig-1][i] != -1) // atoms connected directly to it

{ if(cons[lig-1][i] == frag) // except the frag atom

;

else

side_array[cons[lig-1][i]-1] = 0; // identifies this side w/ a 0

i++;

}

i=0;

fill_side_array(side_array, cons[frag-1], i); // go through all atoms

i=0; // on fragment side

while(i<99)

{ if(side_array[i] == -1)

side_array[i] = 0; // anything left must be on ligand side

i++;

}

i=number-1;

t = bgf_list;

while(t != NULL)

{ t->side = side_array[i]; // puts proper side value into

i--; // corresponding node

t = t->next;

}

get_unit(lig, frag); // unit vector between the atoms for

} // when we change re bond length

return 1; // 1 indicates a success

}/*end find_side*/

/********************************************************************/

void format_m(int amount)

// stops program from doing calculations twice if[x,y]==1 -> [y,x] gets 0

{ int i=0;

int j=0;

while(i < amount)

{

while(j < amount)

{ if (m[i][j] == 1)

{ if (m[j][i] == 0) printf("fatal error"); //should never happen

if (m[j][i] == 1)

m[j][i] = 0; // switches opposite

// member on matrix to 0

}

j++;

}

i++;

j=0;

}

}/*end format_m*/

/*********************************************************************/

void find_connections(FILE *stream)

// function used to create m[][]-- binary array

// binary array is a square matrix [#atoms x #atoms]

{ int k; int amount=0;

int w, i;

char conect[100];

fgets(conect,100,stream);

do{

if (conect[0] == 'O') // does not look at the ORDER lines

/*do nothing*/;

else

{ connections(conect, cons[amount]); // translate line to ints

amount++;

}

fgets(conect,100,stream);

}while(conect[0] != 'E'); // ends when the END string is read

/*======*/

for(w=0; w< amount; w++) //a binary array m[#][] for each atom

{ k=0;

do{ // 0 -> bonded relationship(1-2),(1-3)

m[w][k]=1; // 1 -> non-bonded

k++;

}while(k < amount); //initializes binary array to all 1's

m[w][k]=-1; //defines last member of array

k = w;

i=0;

int d=0;

do

{ do

{ m[w][cons[k][i]-1]=0; //which atoms have a 1-1 1-2 or 1-3

i++;

}while(cons[k][i] != -1); // 1-1 or 1-2?

d++;

k=(cons[w][d])-1;

i=0;

}while(k != -2); // 1-3 relationship?

}

format_m(amount); // fix matrix prevent calculation 2x

}/*end find_connections*/

/************************************************************************/

int get_data(nodeptr &p, char input[])

// get individual values from 1 line of input

{ int i;

int j=0;

char number[5], x[11], y[11], z[11], type[10];

if(!strncmp("HETATM",input,6)) //test if information is valid

{ for(i=8; i<13; i++){number[j] = input[i]; j++;} j=0; // all of these

for(i=31; i<41; i++){x[j] = input[i]; j++;} j=0; // just put the

for(i=41; i<51; i++){y[j] = input[i]; j++;} j=0; // important data

for(i=51; i<61; i++){z[j] = input[i]; j++;} j=0; // in small

for(i=60; i<67; i++){type[j] = input[i]; j++;} // character strings

p = (nodeptr)malloc(sizeof(nodetag));

if(p==NULL) // malloc didn't give us the mem

{ printf("ran out of memory \n");

printf("please exit program, all previous data is still valid");

return 0; //allows user to exit program smoothly

}

p->number = atoi(number);

p->x = atof(x);

p->y = atof(y);

p->z = atof(z);

strcpy(p->type, type);

find_constant(p, p->type); // determine R0 and D0

return 1;

}

else return 0; // if the information is invalid

}/*end get_data*/

/***********************************************************************/

float vander(nodeptr t1, nodeptr t2, float factor, float r0, float d0)

// receives pointers to two atoms, determine the side, move them

// appropriate distance, calculate and return Evdw,R

{ float coord1[3]; // temp coordinates so we don't change

float coord2[3]; // original value on list

float dis, vander_out;

if(t1->side == 1)

{ coord1[0] = (factor*unitx) + t1->x; //if the atom is on the fragment

coord1[1] = (factor*unity) + t1->y; //side->move it by move it along

coord1[2] = (factor*unitz) + t1->z; //unit vector a distace of factor

}

else

{ coord1[0] = t1->x; // then it is on the ligand side so

coord1[1] = t1->y; // we use the same coordinates

coord1[2] = t1->z;

}

if(t2->side == 1)

{ coord2[0] = (factor*unitx) + t2->x;

coord2[1] = (factor*unity) + t2->y;

coord2[2] = (factor*unitz) + t2->z;

}

else

{ coord2[0] = t2->x;

coord2[1] = t2->y;

coord2[2] = t2->z;

}

dis=dist_form(coord1[0], coord1[1], coord1[2],

coord2[0], coord2[1], coord2[2]); // get distance

vander_out = (d0 * exp(12.5*(1 - (dis/r0)))); // EvdW,R formula

return vander_out;

}/*end vander*/

/*************************************************************************/

void vdw_formula(nodeptr p, int atom1, int atom2, float vdw[])

// receive two atom #'s, lookup info on them calculate 7 different Evdw,R

// for the pair, add it to the total

{ float d0, r0;

nodeptr t1=p, t2=p;

while(t1->number != atom1) // look up info on each atom

t1 = t1->next;

while(t2->number != atom2)

t2 = t2->next;

d0 = sqrt(t1->d0 * t2->d0); // D0-geometric mean of both constants

r0 = (t1->r0 + t2->r0)/2; // R0-arithmetic mean of " "

vdw[0] = vdw[0] + vander(t1, t2, 0, r0, d0); // EvdW,R with no change

// in re bond length

vdw[1] = vdw[1] + vander(t1, t2, .01, r0, d0); // EvdW,R with bond lenth

// increased by .01 angstroms

vdw[2] = vdw[2] + vander(t1, t2, -.01, r0, d0); // EvdW,R with bond lenth

// decreased by .01 angstroms

vdw[3] = vdw[3] + vander(t1, t2, .02, r0, d0); // increased .02

vdw[4] = vdw[4] + vander(t1, t2, -.02, r0, d0); // decreased .02

vdw[5] = vdw[5] + vander(t1, t2, .03, r0, d0);

vdw[6] = vdw[6] + vander(t1, t2, -.03, r0, d0);

}/*end vdw_formula*/

/******************************************************************/

void vanderwahls(nodeptr p, float vdw[])

// examines binary array searching for 1's, sends two atom #'s

// for which seven Evdw,R values are going to be calculated

{ int number, i, j;

number = p->number; // last atom number

i=0; j=0;

while(i<number)

{

while(j<number) // every member of binary array

{ if(m[i][j] == 1)

{ vdw_formula(p, i+1, j+1, vdw);// if a 1, perform calc on it

j++;

}

else

j++; // next element on current row

}

j=0;

i++; // next row

}

}/*end vanderwahls*/

/************************************************************/

void bond_shrinkage(void)

// this function searches for H__C bonds and shrinks bond length by 0.915

{ nodeptr t1, t2;

float distance, ux, uy, uz, n_distance;

int atom1, atom2;

t1 = bgf_list;

t2 = bgf_list;

while(t1 != NULL)

{ if(!strncmp(" H__C ",t1->type,7)) // look for H__C forcefield types

{ atom1 = t1->number; // if it finds one identify the H

atom2 = cons[atom1-1][1]; // and the C

while(t2->number != atom2)

t2 = t2->next; //lookup info about C atom

distance = dist_form(t1->x, t1->y, t1->z, t2->x, t2->y, t2->z);

ux = ((t1->x - t2->x)/distance);

uy = ((t1->y - t2->y)/distance); // find unit vector from C to H

uz = ((t1->z - t2->z)/distance);

n_distance = distance * .915; // decrease distance by 0.915

t1->x = n_distance*ux + t2->x;

t1->y = n_distance*uy + t2->y; // add new vector to C coordinates

t1->z = n_distance*uz + t2->z;

}

t1 = t1->next; // go on looking for more

t2 = bgf_list;

}

}/*end bond_shrinkage*/

/*************************************************************************/

void find_er(float vdw[], char filename[], FILE *out)

//calculates Ligand Repulsive Energy

{ float sxx, sxy, slope, er, avg;

avg = (vdw[0] + vdw[1] + vdw[2] + vdw[3] + vdw[4] + vdw[5] + vdw[6])/7;

sxx = .0028;

sxy = (.01*(vdw[1]-avg))+(-.01*(vdw[2]-avg))+(.02*(vdw[3]-avg))+

(-.02*(vdw[4]-avg))+(.03*(vdw[5]-avg))+(-.03*(vdw[6]-avg));

slope = sxy/sxx; // average slope formula

er = re*slope;

fprintf(out,"%s ", filename); // sends filename to outfile

fprintf(out,"%f \n", er); // sends Er to outfile

}/*end find_er*/

/*********************************************************************/

void free_mem(void)

// traverses through list deleting all nodes to get mem back

{ nodeptr t1,t2;

t1=bgf_list;

t2=bgf_list->next;

while(t2->next != NULL)

{ free (t1);

t1 = t2;

t2 = t2->next;

}

}/*end free_mem*/

/********************************************************************/

void adjust_filename(char filename[])

// receives user input file name, checks and adds *.bgf extension

{ int len;

len = strlen(filename);

if (toupper(filename[len-1]) == 'F' & //if user entered file extension

toupper(filename[len-2]) == 'G' &

toupper(filename[len-3]) == 'B' &

filename[len-4] == '.')

; // do nothing

else

{

char end[4] = ".bgf";

strncat(filename, end, 4); // attach .bgf to end if filename

}

}/*end adjust_filename*/

/********************************************************************/

void bfg(FILE *out, int on_off)

// essentially bgf file driver

{ int done=0;

while(!done) // this while loop allows user to calc

// several files without having to go

// back to menu

{

char filename[50];

char buf[30]="FORMAT CONECT (a6,12i6)\n";// when to stop reading

char input[100]; // holds data temporarily

int numatoms;

int i;

float vdw[7]; // to find Er we need seven different

// values for Evdw

for(i=0; i<7; i++) vdw[i] = 0; // Evdw,R is a summation of all

// individual Evdw's

// so we start each value at zero

nodeptr p;

bgf_list=NULL;

printf("Enter the filename: ");

scanf("%s",filename);

adjust_filename(filename); // this adds the .bgf to the end of

// file so user doesn't need enter it

FILE *stream;

stream=fopen(filename,"rt");

if (!stream)

{printf("File not opened! \n");

done = 1; // go back to the menu

}

else

{

do{

fgets(input, 101, stream);

if(get_data(p, input)) // check if information is useable

// if it is, puts information in p

{p->next = bgf_list;

bgf_list = p; // bgf_list is the real list we use

}

}while(strcmp(buf, input));

numatoms = bgf_list->number;

find_connections(stream); //function uses connectivity table to

// create binary array

if(find_side(numatoms)) // this function is used to tell which

// side of the re each atom is on

{

if (on_off == 1)

bond_shrinkage();//allows for C__H bond shrinkage

vanderwahls(bgf_list, vdw); // simultaneously solves

// for seven values of repulsive

// vanderwahls energy by

// adjusting xyz coordinates of

// individual atoms

find_er(vdw, filename, out);// calculates Er and sends it to ouput

// file along with file name

}

free_mem(); //clears up memory so we can do it again

fclose(stream); //close the current input file stream

}

}

}/*end bfg()*/

void driver()

// gets the ouput filename, options...

{ int done; //finish flag

int command;

int on_off = -1;

char shrink[1];

char outfile[20]; // file for data output

printf("Please enter a filename for the data: ");

scanf("%s",outfile);

FILE *out;

if ((out = fopen(outfile, "wt"))

== NULL)

{

fprintf(stderr, "Cannot open output file.\n");

exit(1);

}

done = FALSE;

do {

printf("Do you want SHRINK_CH_BONDS on? (y/n) \n");

scanf("%s",&shrink);

if(toupper(shrink[0]) == 'Y')

on_off = 1;

else

if(toupper(shrink[0]) == 'N')

on_off =0;

}while (on_off == -1);

menu();

while (!done)

{ getcomm (command);

if (command == 1)

{ bfg(out, on_off); }

else

if (command == 0)

done = TRUE;

menu();

}

fclose(out);

}/*end driver*/

/*********************************************************************/

void main()

{ printf("ERCODE. \n"

"When using this work, please reference \n"

"Robert J. Bubel \n"

"T. Warthen Douglass and \n"

"David P. White \n"

"Journal of Computational Chemistry \n"

"submitted for publication, 1999 \n");

init_ref();//this fills up char rlist[155][100] with information in

//parameter file

driver(); //this is the main part of the program

printf("Bye ! \n");

}

1

[a] Undergraduate student from the State University of New York at New Paltz

[b] Email address: