/*------------------------------------------------------------- Copyright (C) 2000 Peter Clote. All Rights Reserved. Permission to use, copy, modify, and distribute this software and its documentation for NON-COMMERCIAL purposes and without fee is hereby granted provided that this copyright notice appears in all copies. THE AUTHOR AND PUBLISHER MAKE NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT. THE AUTHORS AND PUBLISHER SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES. -------------------------------------------------------------*/ /****************************************** entropyPlot0.c P. Clote, 7 Sept 1998 Program computes entropy (single and dinucleotide) This program assumes complete data -- if there is incomplete data (eg R,Y, etc), then there is no contribution to nucleotide/dinucleotide count. Nevertheless, since the denominator is window size W (for nucleotides) and W-1 (for dinucleotides), this leads to small values if many data items are incomplete. In a complicated fashion, this is corrected in entropy1.c WARNING - This runs on my Tecra laptop running Linux. You may get a runtime error on another machine, because the main loop is non-terminating (while (1)). In entropy1.c this was corrected. ******************************************/ #include #include #define W 100 /* window size */ #define lg(x) ( ((x) == 0) ? 0 : (log(x)/log(2)) ) main(int argc, char *argv[]) { FILE *in, *out1, *out2; /* out1 is entropy plot for single nucleotides */ /* out2 is entropy plot for dinucleotides */ int n=0,i,j,a=0,c=0,g=0,t=0; /* a,c,g,t nucleotides */ int num = 0,s=0,k=0,w=0,r=0,y=0; /* num nucleotides in file */ char ch; char win[W]; /* window */ int first,second,index1,index2; double ent1, ent2; /*** entropy1 and entropy2 */ double fa,fc,fg,ft; /*** frequency of a,c,g,t */ int pair[4][4]; /*** for dinucleotide counts */ double freq[4][4]; /*** for dinucleotide frequencies */ void error(char *); /** error handling */ /**** Error handling for input file ****/ if (argc != 4) { fprintf(stderr,"%s in out1 out2 \n",argv[0]); exit(1); } /***** Initialization of I/O files and pair array **/ in = fopen(argv[1],"r"); out1 = fopen(argv[2],"w"); out2 = fopen(argv[3],"w"); for (i=0;i<4;i++) for (j=0;j<4;j++) pair[i][j] = 0; /** remove first line header from M. jannaschii genome file **/ while ( ( ch = fgetc(in)) != '\n' ); /** remove first line **/ /* fill up window. Assume file contains at least W char */ i=0; while (i