/*! Filename: Organism.h
Created: June 22, 2011
Author: Ivan Erill <erill@umbc.edu>, Bob Forder <rforder1@umbc.edu>
*/
#ifndef ORGANISM_H
#define ORGANISM_H
#include "ObjectFactory.h"
#include "GenericOrganism.h"
#include "GenericTranslator.h"
#include "GenericSampler.h"
#include "GenericRecognizer.h"
#include "NcrData.h"
#include "ExpData.h"
#include "DNASequence.h"
#include "CodingDNASequence.h"
#include "random.h"
#include "Environment.h"
#include <cmath>
#include <cassert>
#include <vector>
#include <cstdio>
using namespace std;
/*
The following macro is a dirty hack for mapping base characters to indices
0, 1, 2, and 3 to avoid complicated branching. This is a lot faster than
switching on the base character or using a series of "if" statements.
Make sure you don't pass anything except 'A', 'T', 'C', or 'G' to this
macro or it'll do weird stuff. Bellow is a table which illustrates how the
trick works:
Character (base>>1)&0x3 Int
'A' = 65 = 1000001b 00b 0
'C' = 67 = 1000011b 01b 1
'T' = 84 = 1010100b 10b 2
'G' = 71 = 1000111b 11b 3
*/
#define BASE2INDEX(x) (((x) >> 1) & 0x3)
class Organism : GenericOrganism
{
protected:
vector<CodingDNASequence *> * codingRegions; /* Vector of pointers to coding regions */
vector<DNASequence *> * nonCodingRegions; /* Vector of pointers to non-coding regions */
ExpData * expData; /* Calculates expression levels of NCRs */
vector<GenericRecognizer *> * recognizers; /* Vector of pointers to recognizers */
GenericSampler * sampler; /* Pointer to sampler */
GenericTranslator * translator; /* Pointer to translator */
DNASequence * background; /* Pointer to background */
string recClass; /* Name of recognizer class */
int recCount; /* Number of recognizers */
double recCopies; /* Number of copies of each recognizer */
int ncrCount; /* Number of non-coding regions */
int ncrWidth; /* Width of each non-coding region */
vector<int> hiddenNodes; /* Number of nodes per hidden layer */
int samples; /* Number of times to sample background when calculating avg. */
vector<double> * encoded; /* A vector for encoding input for recognizers
TODO: We should be using the input layer of the recognizer here! */
Environment * env; /* Pointer to environment */
double alpha; /* Probability of mutating a non-coding region (as opposed to a coding region */
double fitnessValue; /* The last calculated fitness of the organism */
double gcBias; /* gcBias for online mode */
vector<double> * cubBias; /* cubBias for online mode */
bool useCubBias; /* use cubBias? (online mode only */
bool onlineBg; /* Is the background generated stochastically? */
bool dirty;
double E_nl; /* Schneider's small sample correction */
Organism ();
public:
Organism (GenericSampler * sampler,
GenericTranslator * translator,
DNASequence * background,
string recClass,
int recCount,
double recCopies,
int ncrCount,
int ncrWidth,
vector<int> hiddenNodes,
int samples,
Environment * env,
double alpha,
double gcBias,
vector<double> * cubBias,
bool useCubBias);
virtual ~Organism();
/* Debug */
void print(FILE *fd, int print_motif);
virtual double fitness();
virtual int mutate();
virtual int crossover(GenericOrganism *partner);
virtual int randomize();
virtual int overwrite(GenericOrganism *dst);
virtual GenericOrganism * copy();
virtual double calcScoreSum(int rec);
virtual int calcInfoContent(int recognizer, double *r_seq);
virtual int calcColumnEntropy(int recognizer, int col, double *entropy);
virtual int calcMutualInfo(int recognizer, double *mi);
};
#endif