//============================================================================
// Name        : Color space /w true Qual fq file to basespace fq file (colorQ2ip33fq)
// Author      : E.Korsching
// Version     : 0.1
// Cancer & Complex Systems Research Group, E.Korsching
// Institute of Bioinformatics, Niels-Stensen-Str. 12, 48149 Münster, Germany
// http://complex-systems.uni-muenster.de/
// Licence     : GNU GPLv3
// Description : This is to take the color fastq format
//				 and convert the 2cd line of 4 lines to basespace format
//				 and delete the first qual code in the 4th line (anchor base two color encoding)
//				 because this anchor base is away after decoding.
//============================================================================

#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <map>
#include <boost/algorithm/string/split.hpp>
#include <boost/algorithm/string.hpp>
#include <boost/algorithm/string/classification.hpp>
#include <boost/lexical_cast.hpp>

#include <boost/program_options.hpp>
#include <sstream>

using namespace std;
using namespace boost;
using namespace boost::program_options;


// global variables to hold the name/path and the transformed sequences
string baseseq;
string qseq;
std::string cfastq;

void cs2bs(string cstring);
void qminusone(string qstring);


bool cmdlargs(int argc, char* argv[])
{
    if (argc != 2) { // We expect 2 arguments: the program name, the input path
        std::cerr << "Usage: " << argv[0] << "  INPUT.color.fq" << std::endl;
        return 1;
    }
    cfastq=argv[1];		// save in global var
    return 0;  // return ok
}


int main(int argc, char** argv) {

	bool result = cmdlargs(argc, argv);
	if (result)
	  return 1;

	string cfastq_line;

	ifstream input_cfastq(cfastq);

	int idxcounter=1;
	int endnum; 

	if (input_cfastq.is_open()){
		while (getline (input_cfastq, cfastq_line)) {
			if (idxcounter == 2) {
				cs2bs(cfastq_line);
				cout << baseseq << endl;
			}
			if ((idxcounter == 1) || (idxcounter == 3)){
				cout << cfastq_line << endl;
			}
			if (idxcounter == 4){
				qminusone(cfastq_line);
				cout << qseq << endl;
				idxcounter=0;
			}
			idxcounter=idxcounter+1;
		}
		
		input_cfastq.close();
	}

	endnum=(idxcounter-1) % 4;
	if ( endnum == 0 )
	{
		return 0; // is even
	}else{
		return 1; // is odd --> error
	}
}


void cs2bs (string cstring) {
	// translate from color code into base code
	std::map<std::string, char> twobase;

	twobase["A0"] = 'A';
	twobase["A1"] = 'C';
	twobase["A2"] = 'G';
	twobase["A3"] = 'T';

	twobase["C1"] = 'A';
	twobase["C0"] = 'C';
	twobase["C3"] = 'G';
	twobase["C2"] = 'T';

	twobase["G2"] = 'A';
	twobase["G3"] = 'C';
	twobase["G0"] = 'G';
	twobase["G1"] = 'T';

	twobase["T3"] = 'A';
	twobase["T2"] = 'C';
	twobase["T1"] = 'G';
	twobase["T0"] = 'T';

	//string colorspace = "T303202210001123002120020110233120030023222011120021101131030321030003311220";
	//string colorspace = "T30320221000.123002120020110233120030023222011120021101131030321030003311220";

	string colorspace = cstring;
	string basespace = colorspace;
	int baselength = colorspace.length();
	using boost::lexical_cast;

	for (int i=1; i<=baselength; i++){
		if (colorspace[i] == '.'){
			for(int j=i; j<=baselength; j++) {
				basespace[j] = 'N';
			}
			break;
		}
		// colorbase = "T3"
		string colorbase = boost::lexical_cast<std::string>(basespace[(i-1)]) + boost::lexical_cast<std::string>(colorspace[i]);
		basespace[i]=(*twobase.find(colorbase)).second;
	}

	//cout << colorspace << endl;
	basespace.erase(0,1);

	// copy the decoded sequence content from loca variable to global variable.
	baseseq=basespace;
}


void qminusone (string qstring) {
	// remove one quality score value, because the first base is artificial
	// in two base encoding
	string qspace;
	int qlength = qstring.length();

	if (!qstring.empty()) {
		qspace = qstring.substr(1, qlength);
	}

	qseq=qspace;
}
