/*! Filename: Sampler.cpp
Created: June 22, 2011
Modified: July 7, 2011
Author: Ivan Erill <erill@umbc.edu>, Bob Forder <rforder1@umbc.edu>
The sampler class acts as an interface between the non-coding regions of
the genome and the recognizers. The recognizer is an ANN which processes
vectors of doubles. Non-coding regions are represented as a sequence
string along with any pertinent derived data. The sampler class employs
methods which allow a non-coding region to be encoded as a vector of
doubles. The sampler sets its own codingSize depending on the window size.
*/
#include "Sampler.h"
/*! Create a new sampler */
Sampler::Sampler(int windowSize)
{
this->windowSize = windowSize;
}
Sampler::~Sampler()
{
}
/*! Initializes derived information in a non-coding region.
Not currently implemented */
int Sampler::negotiate(DNASequence *sequence)
{
return 0;
}
/*! Takes a DNA sequence and encodes getWindowSize() bases as a vector of doubles
at the specified position. */
int Sampler::encode(DNASequence *sequence, vector<double> *out, int position)
{
char c;
/* TODO: This is a stupid way to do this. Think of something slicker. */
for (int i = 0; i < windowSize; i++)
{
c = sequence->seqString->at(i + position);
/* There is a negligible difference between these */
/*
switch (c) {
case 'A':
(*out)[(i * 4) + 0] = 1.0;
(*out)[(i * 4) + 1] = 0.0;
(*out)[(i * 4) + 2] = 0.0;
(*out)[(i * 4) + 3] = 0.0;
break;
case 'T':
(*out)[(i * 4) + 0] = 1.0;
(*out)[(i * 4) + 1] = 0.0;
(*out)[(i * 4) + 2] = 0.0;
(*out)[(i * 4) + 3] = 0.0;
break;
case 'C':
(*out)[(i * 4) + 0] = 1.0;
(*out)[(i * 4) + 1] = 0.0;
(*out)[(i * 4) + 2] = 0.0;
(*out)[(i * 4) + 3] = 0.0;
break;
case 'G':
(*out)[(i * 4) + 0] = 1.0;
(*out)[(i * 4) + 1] = 0.0;
(*out)[(i * 4) + 2] = 0.0;
(*out)[(i * 4) + 3] = 0.0;
break;
}
*/
(*out)[(i * 4) + 0] = (double)(c == 'A');
(*out)[(i * 4) + 1] = (double)(c == 'T');
(*out)[(i * 4) + 2] = (double)(c == 'C');
(*out)[(i * 4) + 3] = (double)(c == 'G');
}
return 0;
}
/*! This is called whenever a non-coding region is mutated at "position"
Not currently implemented */
int Sampler::update(DNASequence *sequence, int position)
{
return 0;
}
/*! Returns the size of the vector needed when encoding a DNA sequence */
int Sampler::getCodedSize()
{
return this->windowSize * 4;
}
/*! Returns the number of bases evaluated when encoding a DNA sequence */
int Sampler::getWindowSize()
{
return this->windowSize;
}