/*------------------------------------------------------------- Copyright (C) 2000 Peter Clote. All Rights Reserved. Permission to use, copy, modify, and distribute this software and its documentation for NON-COMMERCIAL purposes and without fee is hereby granted provided that this copyright notice appears in all copies. THE AUTHOR AND PUBLISHER MAKE NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT. THE AUTHORS AND PUBLISHER SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES. -------------------------------------------------------------*/ /*------------------------------------- ptwiseMutInf.c Pointwise mutation simulation. Infinite loop. -------------------------------------*/ #include #include // for RAND_MAX #define L 10 #define W 5 #define RREAL ((double)rand()/(double)RAND_MAX) #define pDel 0.0001 #define pIns 0.001 #define pSub 0.001 #define pTransition (pSub*0.8) #define pTransversion (pSub*0.2) #define numGEN // number of generations to interate #define DEBUG 0 // Global declarations char a[L+1][W]; int b[L+1], cum[L+1], origLength, length; // prototypes void printAlignment(char *, int); void delete(int), insert(int,char),subst(int,char); void printContents(char*, int); void mutate(void); main(int argc, char *argv[]) { // declarations int i, j, operand, n; char op,ch; /** check command line arguments **/ if (argc != 2) { fprintf(stderr,"Usage: %s nucleotideSeq\n",argv[0]); exit(1); } if (strlen(argv[1]) > L) { fprintf(stderr,"%s is too long\n",argv[1]); exit(1); } srand(314159); length = origLength = strlen(argv[1]); n=0; b[0]=cum[0]=0; for (i=1;i<=length;i++) { a[i][0]=argv[1][i-1]; b[i]=1; cum[i]=i; } printf("0:\n"); printContents(argv[1],strlen(argv[1])); do { n++; mutate(); printf("n:%d\n",n); printContents(argv[1],strlen(argv[1])); } while ( 1 ); } void printAlignment(char *s, int strLength){ /*----------------------------------------- Original input string s0,s1,...,s{n-1} The a[i] are the bins; i.e. a[i][0],...,a[i][b[i]-1]] s0 s1 s2 ... s{n-1} a0 a1 a2 a3 an The bin a[i] consists of s{i-1} together with insertions thereafter. -------------------------------------------*/ int i,j; for (i=0;i0) printf("%c",a[i][0]); else printf(" "); // ensures that if bin empty, a placeholder printed // for alignment with original sequence for (j=1;jorigLength) i=origLength; if (b[i]==1) a[i][0]=' '; // delete from a bin with 1 element else { for (j=k-cum[i]+b[i]; j0) cum[j]--; return; } while (k >= cum[i]) i++; // find least i with k < cum[i]; must exist, // else previous case held so k==cum[origLength] if (b[i]==1) a[i][0]=' '; // delete from a bin with 1 element else { for (j=k-cum[i]+b[i]; j0) cum[j]--; return; } void insert(int k, char ch){ /*-------------------------- Current string r0 ... r{length-1}, and 0 <= k <= length. Insert before the k-th symbol of r if k < length, and insert after r{length-1} if k=length. ---------------------------*/ int i=0,j; while (k>cum[i]) i++; // find least i with k <= cum[i]; such must exist if (b[i]==0) a[i][0]=ch; // insert into bin with 0 elements // can only happen if k=b[0]=0 else { for (j=b[i];j>k-cum[i]+b[i];j--) a[i][j]=a[i][j-1]; // move up elements in bin a[i][k-cum[i]+b[i]]=ch; } b[i]++; length++; for (j=i;j<=origLength;j++) cum[j]++; } void subst(int k, char ch){ int i=0,j; while (k >= cum[i]) i++; // find least i with k < cum[i] a[i][k-cum[i]+b[i]] = ch; } void mutate(){ int i,k; char ch; double z; if (RREAL < pDel){ if (length == 0) return; k = rand()%length; delete(k); } if (RREAL < pIns){ k = rand()%(length+1); z = RREAL; if (z<0.25) insert(k,'A'); else if (z<0.5) insert(k,'C'); else if (z<0.75) insert(k,'G'); else insert(k,'T'); // used RREAL instead of rand() and switch // since statistical properties better } if (RREAL < pSub){ if (length==0) return; if (RREAL < pTransition/pSub){ k = rand()%length; i=0; while (k>=cum[i]) i++; ch = a[i][k-cum[i]+b[i]]; switch (ch) { case 'A' : subst(k,'G'); break; case 'C' : subst(k,'T'); break; case 'G' : subst(k,'A'); break; case 'T' : subst(k,'G'); break; default : printf("%c not nucleotide\n",ch); exit(1); } } // end of treatment of transition else { k = rand()%length; i=0; while (k>=cum[i]) i++; ch = a[i][k-cum[i]+b[i]]; switch (ch) { case 'A' : if (RREAL < 0.5) subst(k,'C'); else subst(k,'T'); break; case 'C' : if (RREAL < 0.5) subst(k,'A'); else subst(k,'G'); break; case 'G' : if (RREAL < 0.5) subst(k,'C'); else subst(k,'T'); break; case 'T' : if (RREAL < 0.5) subst(k,'A'); else subst(k,'G'); break; default : printf("%c not nucleotide\n",ch); exit(1); } } // end of treatment of transversion } } /*----------------------------------- Example: argv[1] is original nucleotide sequence. a[i][j] is j-th element in i-th bin, so at beginning, only one element in each bin and a[i][0] = argv[1][i]. b[i] is number of elements in i-th bin, and cum[i] is cumulative number of elements in 0,1,...,i-th bins. length is number of elements in the current nucleotide sequence, originally set to strlen(argv[1]). k is in range 0,...,length-1 We'll insert,delete,or substitute the k-th element of current nucleotide sequence. This involves finding the appropriate bin to change (i.e. find least i such that k < cum[i]). Suppose b={3,1,2,4,2} and k=8. Then cum={3,4,6,10,12}, and east i with k < cum[i] satisfies i=3. ------------------------------------*/