/* * Tokenizer.h * * Created on: 28/02/2012 * Author: Arafat noureddine */ #ifndef TOKENIZER_H_ #define TOKENIZER_H_ #include <stdexcept> #include <sstream> #include <iostream> #include <vector> #include <string> ///////////////////////////////////////////////////////////////////////////// /// \class Tokenizer /// \brief String Tokenizer /// /// \class Tokenizer /// ///////////////////////////////////////////////////////////////////////////// class Tokenizer { public: ///\cond HIDDEN_SYMBOLS typedef std::vector<std::string>::const_iterator iterator; ///\endcond //---------------------------------------------------------------------------------------------------------- /// ctor : default string is "", default delimiter is " " Tokenizer(const std::string& record = "", char delimiter = ' ') { /*std::cout << "Tokenizer::Tokenizer(\'" <<record<< "\' , \'"<<delimiter << "\')" << std::endl;*/ m_character_delimiter = delimiter; assign(record); } //---------------------------------------------------------------------------------------------------------- /// copy ctor Tokenizer(const Tokenizer& another) { /*std::cout << "Tokenizer::Tokenizer(const Tokenizer&)" << std::endl;*/ m_character_delimiter = another.m_character_delimiter; m_vec_tokens = another.m_vec_tokens; } //---------------------------------------------------------------------------------------------------------- /// dtor virtual ~Tokenizer() { /*std::cout << "Tokenizer::~Tokenizer()" << std::endl;*/; } //---------------------------------------------------------------------------------------------------------- /// affectation operator Tokenizer& operator=(const Tokenizer &another) { /*std::cout << "Tokenizer& operator=(" << another << ")" << std::endl;*/ if(this != &another) { m_vec_tokens.clear(); m_character_delimiter = another.m_character_delimiter; m_vec_tokens = another.m_vec_tokens; } return *this; } //---------------------------------------------------------------------------------------------------------- /// tokenize a new string void assign(const std::string& record) { /*std::cout << "Tokenizer::setString("<<record<<")" << std::endl;*/ std::stringstream ss(record); std::string field; m_vec_tokens.clear(); while(getline(ss, field, m_character_delimiter)) m_vec_tokens.push_back(trim(field)); } //---------------------------------------------------------------------------------------------------------- /// iterator allow browsing the list of each token iterator begin() const { return m_vec_tokens.begin(); } //---------------------------------------------------------------------------------------------------------- /// iterator allow browsing the list of each token iterator end() const { return m_vec_tokens.end(); } //---------------------------------------------------------------------------------------------------------- /// return a token at index const std::string operator[](unsigned int index) const { /*std::cout << "const string Tokenizer::operator[](" << index << ")" << std::endl;*/ //if(index < 0) // throw std::logic_error("Tokenizer() - index Overflow - index must be positive or null !"); if(index >= m_vec_tokens.size()) throw std::logic_error("Tokenizer() - index Overflow - index must be less than the size of available token !"); return m_vec_tokens.at(index); } //---------------------------------------------------------------------------------------------------------- /// return the number of available tokens int count(void) const { return m_vec_tokens.size(); } private: /// remove (whitespace, tab) from the string std::string trim(const std::string& field) { std::string::size_type start = field.find_first_not_of(" \t\v"); return field.substr(start, std::string::npos); } /// list of tokens std::vector<std::string> m_vec_tokens; /// delimiter character char m_character_delimiter; }; #endif /* TOKENIZER_H_ */