Menu

[r19]: / trunk / src / Sampler.cpp  Maximize  Restore  History

Download this file

103 lines (87 with data), 3.0 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
/*! Filename: Sampler.cpp
Created: June 22, 2011
Modified: July 7, 2011
Author: Ivan Erill <erill@umbc.edu>, Bob Forder <rforder1@umbc.edu>
The sampler class acts as an interface between the non-coding regions of
the genome and the recognizers. The recognizer is an ANN which processes
vectors of doubles. Non-coding regions are represented as a sequence
string along with any pertinent derived data. The sampler class employs
methods which allow a non-coding region to be encoded as a vector of
doubles. The sampler sets its own codingSize depending on the window size.
*/
#include "Sampler.h"
/*! Create a new sampler */
Sampler::Sampler(int windowSize)
{
this->windowSize = windowSize;
}
Sampler::~Sampler()
{
}
/*! Initializes derived information in a non-coding region.
Not currently implemented */
int Sampler::negotiate(DNASequence *sequence)
{
return 0;
}
/*! Takes a DNA sequence and encodes getWindowSize() bases as a vector of doubles
at the specified position. */
int Sampler::encode(DNASequence *sequence, vector<double> *out, int position)
{
char c;
/* TODO: This is a stupid way to do this. Think of something slicker. */
for (int i = 0; i < windowSize; i++)
{
c = sequence->seqString->at(i + position);
/* There is a negligible difference between these */
/*
switch (c) {
case 'A':
(*out)[(i * 4) + 0] = 1.0;
(*out)[(i * 4) + 1] = 0.0;
(*out)[(i * 4) + 2] = 0.0;
(*out)[(i * 4) + 3] = 0.0;
break;
case 'T':
(*out)[(i * 4) + 0] = 1.0;
(*out)[(i * 4) + 1] = 0.0;
(*out)[(i * 4) + 2] = 0.0;
(*out)[(i * 4) + 3] = 0.0;
break;
case 'C':
(*out)[(i * 4) + 0] = 1.0;
(*out)[(i * 4) + 1] = 0.0;
(*out)[(i * 4) + 2] = 0.0;
(*out)[(i * 4) + 3] = 0.0;
break;
case 'G':
(*out)[(i * 4) + 0] = 1.0;
(*out)[(i * 4) + 1] = 0.0;
(*out)[(i * 4) + 2] = 0.0;
(*out)[(i * 4) + 3] = 0.0;
break;
}
*/
(*out)[(i * 4) + 0] = (double)(c == 'A');
(*out)[(i * 4) + 1] = (double)(c == 'T');
(*out)[(i * 4) + 2] = (double)(c == 'C');
(*out)[(i * 4) + 3] = (double)(c == 'G');
}
return 0;
}
/*! This is called whenever a non-coding region is mutated at "position"
Not currently implemented */
int Sampler::update(DNASequence *sequence, int position)
{
return 0;
}
/*! Returns the size of the vector needed when encoding a DNA sequence */
int Sampler::getCodedSize()
{
return this->windowSize * 4;
}
/*! Returns the number of bases evaluated when encoding a DNA sequence */
int Sampler::getWindowSize()
{
return this->windowSize;
}