Tokenize a string: Difference between revisions
Content added Content deleted
m (→[[Python]]) |
(Added C++ example) |
||
Line 2: | Line 2: | ||
Separate the string "Hello,How,Are,You,Today" by commas into an array so that each index of the array stores a different word. Display the words to the 'user', in the simplest manner possible, separated by a period. To simplify, you may display a trailing period. |
Separate the string "Hello,How,Are,You,Today" by commas into an array so that each index of the array stores a different word. Display the words to the 'user', in the simplest manner possible, separated by a period. To simplify, you may display a trailing period. |
||
==[[C plus plus|C++]]== |
|||
[[Category:C plus plus|C++]] |
|||
'''Standard:''' [[C plus plus|ANSI C++]] |
|||
'''Compiler:''' [[GCC]] g++ (GCC) 3.4.4 (cygming special) |
|||
'''Library:''' STL |
|||
This may not be the most efficient method to do this as it involves |
|||
redundant copies in the background, but it is very easy to use. In |
|||
most cases it will be a good choice as long as it is not used as an |
|||
inner loop in a performance critical system. |
|||
Note doxygen tags in comments before function, describing details of |
|||
interface. |
|||
#include <string> |
|||
#include <vector> |
|||
/// \brief convert input string into vector of string tokens |
|||
/// |
|||
/// \note consecutive delimiters will be treated as single delimiter |
|||
/// \note delimiters are _not_ included in return data |
|||
/// |
|||
/// \param input string to be parsed |
|||
/// \param delims list of delimiters. |
|||
std::vector<std::string> tokenize_str(const std::string & str, |
|||
const std::string & delims=", \t") |
|||
{ |
|||
using namespace std; |
|||
// Skip delims at beginning. |
|||
string::size_type lastPos = str.find_first_not_of(delims, 0); |
|||
// Find first "non-delimiter". |
|||
string::size_type pos = str.find_first_of(delims, lastPos); |
|||
// output vector |
|||
vector<string> tokens; |
|||
while (string::npos != pos || string::npos != lastPos) |
|||
{ |
|||
// Found a token, add it to the vector. |
|||
tokens.push_back(str.substr(lastPos, pos - lastPos)); |
|||
// Skip delims. Note the "not_of" |
|||
lastPos = str.find_first_not_of(delims, pos); |
|||
// Find next "non-delimiter" |
|||
pos = str.find_first_of(delims, lastPos); |
|||
} |
|||
return tokens; |
|||
} |
|||
here is sample usage code: |
|||
#include <iostream> |
|||
int main() { |
|||
using namespace std; |
|||
string s("Hello,How,Are,You,Today"); |
|||
vector<string> v(tokenize_str(s)); |
|||
for (unsigned i = 0; i < v.size(); i++) |
|||
cout << v[i] << "."; |
|||
cout << endl; |
|||
return 0; |
|||
} |
|||
==[[Java]]== |
==[[Java]]== |