http://ccl.net/cca/software/UNIX/babel/babel-1.6/tosmiles.shtml
CCL tosmiles.c
```/*
tosmiles.c

converts connection tables to unique SMILES strings

Simon Kilvington, University of Southampton, 1995
*/

#include "bbltyp.h"

/*
contabtosmiles
converts the given fragment into a unique SMILES string
uses the algorithm described by the Weiningers in J.Chem.Inf.Comput.Sci. 29 (1989), 97-101
returns a ptr to the string which needs to be free'd when you've finished with it
returns NULL if any problems
it will explicitly include any H atoms in frag in the resulting SMILES, so it's best to remove all H atoms from frag before calling this
*/

char *
contabtosmiles(smilescontab_t *frag)
{
char *smiles;
int *rank, *primesum, *tmprank, *primelist;
int a, b, c, n, bto, nranks, lowest;
int nbonds, nprimes;
int prime, same;
smilesatom_t *aptr;
block_ptr blk;
int i;

if(!block_alloc(&blk, "iiii", &rank, frag->natoms, &primesum, frag->natoms, &tmprank, frag->natoms, &primelist, frag->natoms))
return NULL;

/* generate an array of the first "frag->natoms" prime numbers */
nprimes = 0;
for(n=2; nprimesnatoms; n++)
{
prime = TRUE;
for(c=2; c<=(n/2) && prime; c++)
prime = ((n % c) != 0);
if(prime)
primelist[nprimes++] = n;
}

/* determine the initial graph invariants for each atom, store them in primesum */
for(a=0; anatoms; a++)
{
aptr = &frag->atom[a];
/* no of connections */
nbonds = nbondsto(aptr);
primesum[a] = nbonds * 100000;
/* no of non-H bonds (frag should contain no H atoms) */
primesum[a] += nbonds * 10000;
#if 0	/*!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!*/
/* atomic no */
primesum[a] += md_atomicnumber(aptr->symbol) * 100;
/* sign and absolute value of charge */
primesum[a] += md_chargeonatom(aptr) * 10;
/* no of attached H's */
primesum[a] += (md_maxvalency(aptr->symbol) - nbonds);
#endif	/*!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!*/
}

do
{
/* rank each atom based on primesum values (initially these are the invariants, on subsequent loops they are the tie breaker ranks) */
sortranks(frag->natoms, primesum, rank);
do
{
/* generate the product of the "corresponding primes" of each atoms neighbours */
for(a=0; anatoms; a++)
{
aptr = &frag->atom[a];
primesum[a] = 1;
for(b=0; bbondedto[b];
primesum[a] *= (bto != -1) ? primelist[rank[bto]-1] : 1;		/* -1 as array indices start at 0 */
}
}

/* resolve ties in "rank" list using the "primesum" values, put the new ranks in "tmprank" */
nranks = resolveties(frag->natoms, rank, primesum, tmprank);

/* see if the ranks are the same as last time */
same = TRUE;
for(a=0; anatoms && same; a++)
same = (rank[a] == tmprank[a]);

/* move the sorted data back into rank array */
memcpy(rank, tmprank, frag->natoms * sizeof(int));
}
while(!same);

/* see if we have any tied atoms */
if(nranks != frag->natoms)		/* break ties */
{
/* find lowest tied rank */
lowest = 0;
do
{
lowest++;
c = 0;
for(a=0; anatoms; a++)
c += (rank[a] == lowest);
}
while(c == 1);
/* double all the ranks, store them in primesum array */
for(a=0; anatoms; a++)
primesum[a] = rank[a] * 2;
/* reduce the first lowest tied one by 1 */
for(a=0; primesum[a] != (lowest * 2); a++)
;
primesum[a]--;
}

}
while(nranks != frag->natoms);

/* frag is now cannonicalised, lets get on with generating the smiles string */

smiles = generatesmilesstring(frag, rank);

/* clean up */
block_free(&blk);

return smiles;
}

/*
sortranks
generates a list of ranks in "rank" based on the numbers in "value" (ie lowest value is ranked 1 etc)
returns number of different ranks
the value array will be corrupted, it'll end up as all -1's
*/

int
sortranks(int natoms, int *value, int *rank)
{
int a, sort, lowest;

sort = 1;
do
{
/* find the lowest value that isnt -1, our tag for values that have already been sorted out (ho ho!) */
for(a=0; a 0)
{
for(a=0; a 0);
}

return nranks-1;
}

/*
generatesmilesstring
generates a smiles string for the fragment based on the given atomic ranks
*/

typedef struct				/* an array of these holds the data about each atom generated by the first pass thru the fragment */
{					/* the second pass uses these to write out the smiles string */
char          symbol[2];		/* atom symbol */
int          aromatic;		/* TRUE if ANY of the bonds to it are aromatic */
int           ring[md_MAXBONDS];	/* ring closure numbers associated with this atom */
int          rbracket;		/* does this atom end a branch */
int          lbracket;		/* does the NEXT atom start a branch */
int          endfrag;		/* do we need a "." after this atom to separate two structures */
smilesbond_t bond;			/* order of bond to next atom */
} _smileyatomdata;

char *
generatesmilesstring(smilescontab_t *frag, int *rank)
{
char *smiles;
char tmpstr[8];
int *bracketstack, *ringstack, *neworder;		/* neworder[i] is the the index of frag->atom[i] in the atomdata array */
int bsptr, rsptr;
int a, b, bto, f, lowest, tmp, smileslen;
int smipos, atom, nextatom, ringatom, newring, closeatom, maxbonds, natoms, nrings;
smilesbond_t order;
int *visited;
block_ptr blk;
_smileyatomdata *atomdata, initatomdata;

static const char bondchr[] = "-=#:";

natoms = frag->natoms;

if(!(atomdata = malloc(natoms * sizeof(_smileyatomdata))))
return NULL;

if(!block_alloc(&blk, "iiiB", &bracketstack, natoms, &ringstack, natoms*md_MAXBONDS, &neworder, natoms, &visited, natoms))
{
free(atomdata);
return NULL;
}

/* set up the initial (blank) atomdata entry */
initatomdata.symbol[0] = '\0';
initatomdata.symbol[1] = '\0';
initatomdata.aromatic = FALSE;
for(b=0; bnatoms; a++)	/* haven't visited any atoms yet */
visited[a] = FALSE;
natoms = 0;				/* number of atoms we have in our smiley atom data array so far */

/* find the atom with rank 1 */
atom = lowestunvisitedatom(frag->natoms, rank, visited);

/* first pass, determine which order the atoms should appear in, and store some attributes of each one - start with the one we have just found */
while(natoms < frag->natoms && atom != -1)
{
/* set up the atomdata entry for atom number "atom" */
atomdata[natoms] = initatomdata;
strncpy(atomdata[natoms].symbol, frag->atom[atom].symbol, 2);
atomdata[natoms].aromatic = hasaromaticbonds(&frag->atom[atom]);
/* remember we have visited it, and store its index into the atomdata array */
visited[atom] = TRUE;
neworder[atom] = natoms;
/* if there are no unvisited atoms attached to this atom */
if(nunvisitedbondsto(&frag->atom[atom], visited) == 0)
{
/* jump back to the atom on top of the branch stack if there is one */
if(bsptr > 0)
{
/* add an end of branch bracket */
atomdata[natoms].rbracket = TRUE;
/* go back to the start of the branch */
bsptr --;
atom = bracketstack[bsptr];
}
else
{
/* is it time to start a new structure, or have we reached the end of the molecule */
if(natoms != frag->natoms-1)
{
atomdata[natoms].endfrag = TRUE;
atom = lowestunvisitedatom(frag->natoms, rank, visited);
}
/* increase the number of atoms we have seen, ready for next time */
natoms ++;
continue;
}
}
/* there are unvisited atoms attached to this one */
/*!!!!!!!! what if the atom we pulled off the branch stack has no unvisited atoms attached to it?  !!!!!!!!!!*/
/*!!!!!!!! this can only happen if it's the last atom, or the end of a substructure                !!!!!!!!!!*/
nextatom=-1;
/* find the lowest ranked, unvisited, atom attached to "atom", this is the next one to visit */
lowest = frag->natoms+1;
for(b=0; batom[atom].bondedto[b];
if(bto != md_NOBOND && !visited[bto] && rank[bto] < lowest)
{
lowest = rank[bto];
nextatom = bto;
}
}
/*!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!*/
if(nextatom==-1)
{
printf("natoms=%d\tatom=%d\tnext=-1\n", natoms, atom+1);
atom=-1;
continue;
}
/*!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!*/
/* count how many bonds from this atom form rings */
nrings = 0;
/* see if "nextatom" forms a ring, ie can we follow "nextatom" and get back to "atom" again */
ringatom = findunvisitedring(frag, atom, nextatom, visited);
/* did we find a ring */
if(ringatom != -1)
{
/* is the bond order between "atom" and "ringatom" double or triple */
order = getbondorder(&frag->atom[atom], ringatom);
if(order == SMILESBOND_Double || order == SMILESBOND_Triple)
{
/* follow "ringatom" instead of "nextatom" so we dont have to close the ring with a multiple bond */
tmp = nextatom;
nextatom = ringatom;
ringatom = tmp;
}
/* store the two atoms on the ring closure stack */
ringstack[rsptr++] = atom;
ringstack[rsptr++] = ringatom;
/* increase our ring count */
nrings ++;
/* see if any of the other unvisited atoms connected to this one also form a ring */
for(b=0; batom[atom].bondedto[b];
if(bto == md_NOBOND || bto == nextatom || bto == ringatom || visited[bto])
continue;
/* does atom "bto" form a ring (ie can we trace a path from it back to "atom") */
newring = findunvisitedring(frag, atom, bto, visited);
if(newring != -1)
{
/* add the two atoms to the ring stack */
ringstack[rsptr++] = atom;
ringstack[rsptr++] = bto;
/* count how many bonds form rings */
nrings ++;
}
}
}
/* see if this is a branch point, ie more than 1 unvisited bonds to it that don't form rings */
maxbonds = 1 + nrings;
if(nunvisitedbondsto(&frag->atom[atom], visited) > maxbonds)
{
atomdata[natoms].lbracket = TRUE;
/* store this atom on the branch stack */
bracketstack[bsptr] = atom;
bsptr ++;
}
/* store the bond order between this atom and the next */
atomdata[natoms].bond = getbondorder(&frag->atom[atom], nextatom);
/* move onto the next atom */
atom = nextatom;
/* increase the number of atoms we have seen, ready for next time */
natoms ++;
}

/* did anything go wrong */
if(natoms != frag->natoms)
{
free(atomdata);
block_free(&blk);
return NULL;
}

/* pull the ring closure atoms off the ringstack and convert them into ring closure numbers */
nrings = rsptr / 2;
while(nrings > 0)
{
ringatom  = neworder[ringstack[--rsptr]];		/* convert the frag->atom index into an atomdata index */
closeatom = neworder[ringstack[--rsptr]];
/* mark the ring closure in atomdata[] */
for(f=0; atomdata[ringatom].ring[f] != -1; f++)	/* find next free entry */
;
atomdata[ringatom].ring[f] = nrings;
for(f=0; atomdata[closeatom].ring[f] != -1; f++)
;
atomdata[closeatom].ring[f] = nrings;
nrings--;
};

/* work out the max length "smiles" needs to be */
smileslen = 1;					/* don't forget the terminator */
for(a=0; anatoms; a++)
{
/* the atom name */
/* ring closure numbers */
for(b=md_MAXBONDS-1; b>=0; b--)
{
nrings = atomdata[a].ring[b];
if(nrings != -1)
smileslen += (nrings > 9) ? 3 : 1;		/* do we need a "%xx" or just a "x" */
}
/* right brackets */
if(atomdata[a].rbracket)
smileslen ++;
/* left brackets */
if(atomdata[a].lbracket)
smileslen ++;
/* bond symbols - we never need to write aromatic bond symbols because the bound atoms name's will be in lower case */
order = atomdata[a].bond;
if(order == SMILESBOND_Double || order == SMILESBOND_Triple)
smileslen ++;
/* substructure separators */
if(atomdata[a].endfrag)
smileslen ++;
}

/* alloc some space for "smiles" */
if(!(smiles = malloc(smileslen)))
{
free(atomdata);
block_free(&blk);
return NULL;
}

/* second pass - generate the smiles string using the data we have just set up */
smipos = 0;				/* smipos is index of terminator in smiles string */
for(a=0; anatoms; a++)
{
/* print the atom name */
/* ring numbers are stored in reverse numerical order, so print them out backwards */
for(b=md_MAXBONDS-1; b>=0; b--)
{
nrings = atomdata[a].ring[b];
if(nrings != -1)
}
/* does this atom end a branch */
if(atomdata[a].rbracket)
smiles[smipos++] = ')';
/* does the atom end a substructure */
if(atomdata[a].endfrag)
smiles[smipos++] = '.';
/* does the next atom start a branch */
if(atomdata[a].lbracket)
smiles[smipos++] = '(';
/* we never need to write aromatic bond symbols because the bound atoms name's will be in lower case */
order = atomdata[a].bond;
if(order == SMILESBOND_Double || order == SMILESBOND_Triple)
smiles[smipos++] = bondchr[order - SMILESBOND_Single];
}
smiles[smipos] = '\0';

free(atomdata);
block_free(&blk);

return smiles;
}

/*
adds the smiles symbol(s) (in lower case if aromatic is TRUE) to the string
returns the number of characters added
*/

#define NINSUBSET 10

int
addsmilesatom(char *string, char *symbol, int aromatic)
{
int n, s;

static const char *organicsubset[NINSUBSET] = { "B", "C", "N", "O", "P", "S", "F", "Cl", "Br", "I" };

/* if the symbol is not in the "organic subset" we need to enclose it in square brackets */
for(s=0; s < NINSUBSET && strncmp(symbol, organicsubset[s], 2) != 0; s++)
;

/* count how many chars we add to the string */
n = 0;

if(s == NINSUBSET)
string[n++] = '[';

string[n++] = (aromatic) ? tolower(symbol[0]) : symbol[0];

/* is it a one letter symbol */
if(symbol[1] != '\0')
string[n++] = (aromatic) ? tolower(symbol[1]) : symbol[1];

if(s == NINSUBSET)
string[n++] = ']';

return n;
}

#undef NINSUBSET

/*
adds a ring closure digit (or %xx if ringnum > 9) to the given string
returns the number of characters it added
*/

int
{
int n;

if(ringnum < 10)
{
*string = '0' + ringnum;
n = 1;
}
else
{
n = (int)sprintf(string, "%%%d", ringnum);
}

return n;
}

/*
lowestunvisitedatom
returns the index of the atom which has "visited[]" set to FALSE and the lowest value in "rank[]"
*/

int
lowestunvisitedatom(int natoms, int *rank, int *visited)
{
int a, lowestatom, lowestrank;

lowestatom = -1;
for(a=0; anatoms * sizeof(int))))
return -1;

ring = FALSE;
for(b=0; batom[start].bondedto[b];
if(close != md_NOBOND && close != branch && !visited[close])
{
/* take a copy of the visited array as we will need to change it */
memcpy(done, visited, frag->natoms * sizeof(int));
/* can we trace a path from "start", along the bond to "close", and end up at "branch" ? */
ring = dotheymeet(frag, start, close, branch, done);
}
}

free(done);

return ring ? close : -1;
}

/*
dotheymeet
recursively searches from "next" (but not along the bond to "start") for "target"
only visits an atom i if visited[i]==FALSE
all atoms that are visited will have visited[i] set to TRUE
returns TRUE if it was found
*/

int
dotheymeet(smilescontab_t *frag, int start, int next, int target, int *visited)
{
int b, bto;
int meet;

/*!!!!! could we store "visited" for each atom connected to "next" here, then restore it before we return? !!!!!*/

meet = FALSE;
for(b=0; batom[next].bondedto[b];
if(bto != md_NOBOND && bto != start && !visited[bto])
{
visited[bto] = TRUE;
meet = (bto == target) ? TRUE : dotheymeet(frag, next, bto, target, visited);
}
}

return meet;
}

/*
nunvisitedbondsto
returns the number of bonds to the given atom
bonds are only counted if visited[i] == FALSE where i is the index of the atom bound to this one
*/

int
nunvisitedbondsto(smilesatom_t *aptr, int *visited)
{
int b, n, bto;

n=0;
for(b=0; bbondedto[b];
n += ((bto != md_NOBOND) && !visited[bto]);
}

return n;
}

/*
nbondsto
returns the total number of atoms bonded to this one
*/

int
nbondsto(smilesatom_t *aptr)
{
int b, nbonds;

nbonds = 0;
for(b=0; bbondedto[b] != md_NOBOND);

return nbonds;
}

/*
getbondorder
returns the bond order of the bond from the atom pointed to by "aptr" to atom number "atom"
if they are not bonded it returns SMILESBOND_NoBond
*/

smilesbond_t
getbondorder(smilesatom_t *aptr, int atom)
{
int b;
smilesbond_t order;

order = SMILESBOND_NoBond;
for(b=0; bbondedto[b] == atom)
order = aptr->bondtype[b];
}

return order;
}

/*
hasaromaticbonds
returns TRUE if there are one or more aromatic bonds to the given atom
*/

int
hasaromaticbonds(smilesatom_t *aptr)
{
int b;
int has = FALSE;

for(b=0; bbondedto[b] != md_NOBOND && aptr->bondtype[b] == SMILESBOND_Aromatic);

return has;
}
```
 Modified: Tue Jan 21 17:00:00 1997 GMT Page accessed 5474 times since Sat Apr 17 21:37:33 1999 GMT