C++的正则表达式

正则表达式的结构
- //regex_match匹配和regex_search
- //查询到的具体的信息smatch
- //查找的逐步查找
- //token,拆分
- //replace 替代
- //regex的常量

备注：参考书籍《C++标准库（第二版）》

正则表达式的结构

需要的头文件

#include

//regex_match匹配和regex_search

void out(bool b)
{
	cout << (b ? "found" : "not found") << endl;
}

//匹配match
void test01()
{
	regex reg1("<.*>.*");
	bool found = regex_match("value", reg1);
	out(found);

	regex reg2("<(.*)>.*");
	found = regex_match("value", reg2);
	out(found);

	regex reg3("<\(.*\)>.*",regex_constants::grep);
	found = regex_match("value", reg3);
	out(found);

	found = regex_match("value", regex("<(.*)>.*"));
	out(found);
}

//查询到的具体的信息smatch

void test02()
{
	string data = "XML tag::the value.";
	cout << "data:        " << data << "nn";

	smatch m;
	bool found = regex_search(data, m, regex("<(.*)>(.*)"));
	cout << "m.empty() :  " << boolalpha << m.empty() << endl;
	cout << "m.size() : " << m.size() << endl;

	if (found)
	{
		cout << "m.str()	:	" << m.str() << endl;
		cout << "m.length()	:	" << m.length() << endl;
		cout << "m.position()	:	" << m.position() << endl;
		cout << "m.prefix().str()	:	" << m.prefix().str() << endl;
		cout << "m.suffix().str()	:	" << m.suffix().str() << endl;
		//prefix()表示sub_match对象，表示第一个匹配合格的字符的前方所有字符
		//suffix()表示sub_match对象，表示最末一个匹配合格的字符的后方所有字符
	}
	cout << "--------------" << endl;
	for (int i = 0; i < m.size(); i++)
	{
		cout << "m[" << i << "].str():	" << m[i].str() << endl;
		cout << "m.str("<length() << ")" << endl;
	}
}

//查找的逐步查找

//使用for循环进行查找，找到之后重查找的位置继续查找
void test03()
{
	string data =	"n"
					"Nicon"
					"Josuttisn"
					"n";
	regex reg("<(.*)>(.*)");

	auto pos = data.cbegin();
	auto end = data.cend();
	smatch m;
	int i = 0;
	for (; regex_search(pos, end, m, reg); pos = m.suffix().first)
	{
		i++;
		cout << "match:　" << m.str() << endl;
		cout << "tag:	" << m.str(1) << endl;
		cout << "value:" << m.str(2) << endl;
	}
	cout << i << endl;
}

//逐一迭代（使用迭代器）
void test04()
{
	string data = "n"
		"Nicon"
		"Josuttisn"
		"n";
	regex reg("<(.*)>(.*)");

	//使用迭代器
	sregex_iterator pos(data.cbegin(), data.cend(), reg);
	sregex_iterator end;
	for (; pos != end; ++pos)
	{
		cout << "match: " << pos->str() << endl;
		cout << "tag:" << pos->str(1) << endl;
		cout << "value: " << pos->str(2) << endl;
	}

	sregex_iterator beg(data.cbegin(), data.end(), reg);
	for_each(beg, end, [](const smatch& m) {
		cout << "match: " << m.str() << endl;
		cout << "tag:" << m.str(1) << endl;
		cout << "value: " << m.str(2) << endl;
		});
}

//token,拆分

//token,拆分
void test05()
{
	string data = "n"
		"Nicon"
		"Josuttisn"
		"n";
	regex reg("<(.*)>(.*)");

	//使用迭代器
	sregex_token_iterator pos(data.cbegin(), data.cend(), reg, { 0,2 });
	sregex_token_iterator end;
	for (; pos != end; ++pos)
	{
		cout << "match: " << pos->str() << endl;
	}
	cout << endl;

	string name = "nice,jim,helut,paul,tim,john,paul,rita";
	regex sep("[tn]*[,;.][tn]*");
	sregex_token_iterator p(name.cbegin(), name.cend(), sep, -1);
	sregex_token_iterator e;
	for (; p != e; ++p)
	{
		cout << "name: " << *p << endl;
	}
}

//replace 替代

替代用的符号：

//replace
void test06()
{
	string data = "n"
		"Nicon"
		"Josuttisn"
		"n";
	regex reg("<(.*)>(.*)");

	cout << regex_replace(data, reg, "<$1 value="$2"/>") << endl;

	cout << regex_replace(data, reg, "<\1 value="\2"/>", regex_constants::format_sed) << endl;

	string res2;
	regex_replace(back_inserter(res2), data.begin(), data.end(), reg, "<$1 value="$2"/>", regex_constants::format_no_copy | regex_constants::format_first_only);
	cout << res2 << endl;
}

//regex的常量

//regex的常量
void test07()
{
	string pat1 = R"(\.*index{([^}]*)})";
	string pat2 = R"(\.*index{(.*)}{(.*)})";
	regex pat(pat1 + "n" + pat2, regex_constants::egrep | regex_constants::icase);

	string data((istreambuf_iterator(cin)), istreambuf_iterator());
	
	smatch m;
	auto pos = data.cbegin();
	auto end = data.cend();
	for (; regex_search(pos, end, m, pat); pos = m.suffix().first)
	{
		cout << "match:" << m.str() << endl;
		cout << "val:" << m.str(1)+m.str(2) << endl;
		cout << "see: " << m.str(3) << endl;
	}

}

C++的正则表达式

C/C++/C#相关栏目本月热门文章