可以猜测,Python字符串是引用计数的不可变字符串,因此在Python代码中不会复制任何字符串,而C
++
std::string是可变值类型,并且被复制的机会最小。
如果目标是快速拆分,则可以使用恒定时间的子字符串操作,这意味着仅 引用 原始字符串的部分,如Python(以及Java和C#…)一样。
C std::string
类具有一个赎回功能:它是 standard
,因此它可以用于在效率不是主要考虑因素的地方安全,方便地传递字符串。但是足够的聊天。代码-
在我的机器上,这当然比Python快,因为Python的字符串处理是在C中实现的,而C是C 的子集(他):
#include <iostream> #include <string>#include <sstream>#include <time.h>#include <vector>using namespace std;class StringRef{private: char const* begin_; int size_;public: int size() const { return size_; } char const* begin() const { return begin_; } char const* end() const { return begin_ + size_; } StringRef( char const* const begin, int const size ) : begin_( begin ) , size_( size ) {}};vector<StringRef> split3( string const& str, char delimiter = ' ' ){ vector<StringRef> result; enum State { inSpace, inToken }; State state = inSpace; char const* pTokenBegin = 0; // Init to satisfy compiler. for( auto it = str.begin(); it != str.end(); ++it ) { State const newState = (*it == delimiter? inSpace : inToken); if( newState != state ) { switch( newState ) { case inSpace: result.push_back( StringRef( pTokenBegin, &*it - pTokenBegin ) ); break; case inToken: pTokenBegin = &*it; } } state = newState; } if( state == inToken ) { result.push_back( StringRef( pTokenBegin, &*str.end() - pTokenBegin ) ); } return result;}int main() { string input_line; vector<string> spline; long count = 0; int sec, lps; time_t start = time(NULL); cin.sync_with_stdio(false); //disable synchronous IO while(cin) { getline(cin, input_line); //spline.clear(); //empty the vector for the next line to parse //I'm trying one of the two implementations, per compilation, obviously:// split1(spline, input_line); //split2(spline, input_line); vector<StringRef> const v = split3( input_line ); count++; }; count--; //subtract for final over-read sec = (int) time(NULL) - start; cerr << "C++ : Saw " << count << " lines in " << sec << " seconds." ; if (sec > 0) { lps = count / sec; cerr << " Crunch speed: " << lps << endl; } else cerr << endl; return 0;}//compiled with: g++ -Wall -O3 -o split1 split_1.cpp -std=c++0x免责声明:我希望没有任何错误。我没有测试功能,只是检查了速度。但是我认为,即使有一个或两个错误,更正也不会明显影响速度。



