// ========================================================================== // quality_trimmer // ========================================================================== // Copyright (c) 2006-2013, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // * Neither the name of Knut Reinert or the FU Berlin nor the names of // its contributors may be used to endorse or promote products derived // from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH // DAMAGE. // // ========================================================================== // Author: Your Name <your.email@example.net> // ========================================================================== #include <seqan/basic.h> #include <seqan/sequence.h> #include <seqan/seq_io.h> #include <seqan/arg_parse.h> // ========================================================================== // Classes // ========================================================================== // -------------------------------------------------------------------------- // Class QualityTrimmerOptions // -------------------------------------------------------------------------- // This struct stores the options from the command line. using namespace seqan; struct QualityTrimmerOptions { CharString inputReads; CharString outputFile; unsigned treshold; QualityTrimmerOptions() : treshold(0) {}; }; // ========================================================================== // Functions // ========================================================================== // -------------------------------------------------------------------------- // Function parseCommandLine() // -------------------------------------------------------------------------- seqan::ArgumentParser::ParseResult parseCommandLine(QualityTrimmerOptions & options, int argc, char const ** argv) { // Setup ArgumentParser. seqan::ArgumentParser parser("read_mapper"); // Set short description, version, and date. setShortDescription(parser, "Put a Short Description Here"); setVersion(parser, "0.1"); setDate(parser, "July 2012"); // Define usage line and long description. addUsageLine(parser, "[\\fIOPTIONS\\fP] \"\\fITEXT\\fP\""); addDescription(parser, "This tool removes x bases from the end of each read."); // We require three arguments. // The reads addOption(parser, ArgParseOption("is", "inputReads", "Name of the multi-FASTA input.", ArgParseArgument::INPUTFILE, "IN")); setRequired(parser, "is"); // The output file addOption(parser, ArgParseOption("o", "outputFile", "Name of the multi-FASTA input.", ArgParseArgument::OUTPUTFILE, "OUT")); setRequired(parser, "o"); // The quality threshold addOption(parser, ArgParseOption("t", "threshold", "Error rate", ArgParseArgument::INTEGER, "ERROR")); // Add Examples Section. addTextSection(parser, "Examples"); addListItem(parser, "\\fquality_trimmer\\fP \\fB-r\\fP \\fIread_file\\fP \\fB-o\\fP \\fIoutput_file\\fP \\fB-t\\fP \\fIthreshold\\fP", ""); // Parse command line. seqan::ArgumentParser::ParseResult res = seqan::parse(parser, argc, argv); // Only extract options if the program will continue after parseCommandLine() if (res != seqan::ArgumentParser::PARSE_OK) return res; getOptionValue(options.inputReads, parser, "inputReads"); getOptionValue(options.outputFile, parser, "outputFile"); getOptionValue(options.treshold, parser, "threshold"); return seqan::ArgumentParser::PARSE_OK; } // -------------------------------------------------------------------------- // Function readFiles() // -------------------------------------------------------------------------- // Function to load the reads from file. template <typename TChar, typename TValue, typename TQual> int readFiles(StringSet<String<TChar> > & meta, StringSet<String<TValue> > & seq, StringSet<String<TQual> > & qual, char const * filename) { std::fstream stream(filename, std::ios::binary | std::ios::in); RecordReader<std::fstream, DoublePass<> > reader(stream); return read2(meta, seq, qual, reader, Fastq()); } // -------------------------------------------------------------------------- // Function writeFiles() // -------------------------------------------------------------------------- // Function to write the output file to disk. template <typename TChar, typename TValue, typename TQual> int writeFiles(StringSet<String<TChar> > & meta, StringSet<String<TValue> > & seq, StringSet<String<TQual> > & qual, char const * filename) { std::fstream stream(filename, std::ios::binary | std::ios::out); RecordReader<std::fstream, DoublePass<> > reader(stream); return write2(stream, meta, seq, qual, Fastq()); } // -------------------------------------------------------------------------- // Function trim() // -------------------------------------------------------------------------- // Trimming the reads template <typename TValue, typename TQual> void trim(StringSet<String<TValue> > & seq, StringSet<String<TQual> > & qual, QualityTrimmerOptions const & options) { for (unsigned i = 0; i < length(qual); ++i) { resize(qual[i], length(qual[i]) - options.treshold); resize(seq[i], length(seq[i]) - options.treshold); } } // -------------------------------------------------------------------------- // Function main() // -------------------------------------------------------------------------- int main(int argc, char const ** argv) { // Parse the command line. seqan::ArgumentParser parser; QualityTrimmerOptions options; seqan::ArgumentParser::ParseResult res = parseCommandLine(options, argc, argv); // If there was an error parsing or built-in argument parser functionality // was triggered then we exit the program. The return code is 1 if there // were errors and 0 if there were none. if (res != seqan::ArgumentParser::PARSE_OK) return res == seqan::ArgumentParser::PARSE_ERROR; std::cout << "Quality Trimming\n" << "================\n\n"; // Inіt variables for redaIds, readSeq and quals. StringSet<String<char> > meta; StringSet<String<Dna5> > seq; StringSet<String<char> > qual; // Load the reads if (readFiles(meta, seq, qual, toCString(options.inputReads)) != 0) return 1; // Trim the reads trim(seq, qual, options); // Write the outpus if (writeFiles(meta, seq, qual, toCString(options.outputFile)) != 0) return 1; return 0; }