Skip to content
Snippets Groups Projects
Tokenizer.h 4.29 KiB
Newer Older
Florent Langlois's avatar
Florent Langlois committed
/*
 * Tokenizer.h
 *
 *  Created on: 28/02/2012
 *      Author: Arafat noureddine
 */

#ifndef TOKENIZER_H_
#define TOKENIZER_H_

#include <stdexcept>
#include <sstream>
#include <iostream>
#include <vector>
#include <string>

/////////////////////////////////////////////////////////////////////////////
/// \class Tokenizer
/// \brief String Tokenizer
///
/// \class Tokenizer
///
/////////////////////////////////////////////////////////////////////////////


class Tokenizer
{
public:
    ///\cond HIDDEN_SYMBOLS
    typedef std::vector<std::string>::const_iterator iterator;
    ///\endcond    

    //----------------------------------------------------------------------------------------------------------
    /// ctor : default string is "", default delimiter is " "
    Tokenizer(const std::string& record = "", char delimiter = ' ')
    {
        /*std::cout << "Tokenizer::Tokenizer(\'" <<record<< "\' , \'"<<delimiter << "\')" << std::endl;*/
        m_character_delimiter = delimiter;
        assign(record);
    }

    //----------------------------------------------------------------------------------------------------------
    /// copy ctor
    Tokenizer(const Tokenizer& another)
    {
        /*std::cout << "Tokenizer::Tokenizer(const Tokenizer&)" << std::endl;*/
        m_character_delimiter = another.m_character_delimiter;
        m_vec_tokens = another.m_vec_tokens;
    }

    //----------------------------------------------------------------------------------------------------------
    /// dtor
    virtual ~Tokenizer()
    {
        /*std::cout << "Tokenizer::~Tokenizer()" << std::endl;*/;
    }

    //----------------------------------------------------------------------------------------------------------
    ///  affectation operator
    Tokenizer& operator=(const Tokenizer &another)
    {
        /*std::cout << "Tokenizer& operator=(" << another << ")" << std::endl;*/
        if(this != &another)
        {
            m_vec_tokens.clear();
            m_character_delimiter = another.m_character_delimiter;
            m_vec_tokens = another.m_vec_tokens;
        }
        return *this;
    }

    //----------------------------------------------------------------------------------------------------------
    /// tokenize a new string
    void assign(const std::string& record)
    {
        /*std::cout << "Tokenizer::setString("<<record<<")" << std::endl;*/
        std::stringstream ss(record);
        std::string field;
        m_vec_tokens.clear();
        while(getline(ss, field, m_character_delimiter))
            m_vec_tokens.push_back(trim(field));
    }

    //----------------------------------------------------------------------------------------------------------
    /// iterator allow browsing the list of each token
    iterator begin() const
    {
        return m_vec_tokens.begin();
    }

    //----------------------------------------------------------------------------------------------------------
    /// iterator allow browsing the list of each token
    iterator end() const
    {
        return m_vec_tokens.end();
    }

    //----------------------------------------------------------------------------------------------------------
    /// return a token at index
    const std::string operator[](unsigned int index) const
    {
        /*std::cout << "const string Tokenizer::operator[](" << index << ")" << std::endl;*/
        //if(index < 0)
        //    throw std::logic_error("Tokenizer() - index Overflow - index must be positive or null !");
        if(index >= m_vec_tokens.size())
            throw std::logic_error("Tokenizer() - index Overflow - index must be less than the size of available token !");
        return m_vec_tokens.at(index);
    }

    //----------------------------------------------------------------------------------------------------------
    /// return the number of available tokens
    int count(void) const
    {
        return m_vec_tokens.size();
    }

private:

    /// remove (whitespace, tab) from the string
    std::string trim(const std::string& field)
    {
        std::string::size_type start = field.find_first_not_of(" \t\v");
        return field.substr(start, std::string::npos);
    }


    /// list of tokens 
    std::vector<std::string> m_vec_tokens;

    /// delimiter character
    char m_character_delimiter;
};



#endif /* TOKENIZER_H_ */