本文实例讲述了Java实现的文本字符串操作工具类。分享给大家供大家参考,具体如下:
package com.gcloud.common;
import org.apache.commons.lang.StringUtils;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.text.BreakIterator;
import java.util.ArrayList;
import java.util.Date;
import java.util.Random;
public class TextUtil {
private static final char[] QUOTE_ENCODE = """.toCharArray();
private static final char[] AMP_ENCODE = "&".toCharArray();
private static final char[] LT_ENCODE = "<".toCharArray();
private static final char[] GT_ENCODE = ">".toCharArray();
private final static String[] hex = { "00", "01", "02", "03", "04", "05",
"06", "07", "08", "09", "0A", "0B", "0C", "0D", "0E", "0F", "10",
"11", "12", "13", "14", "15", "16", "17", "18", "19", "1A", "1B",
"1C", "1D", "1E", "1F", "20", "21", "22", "23", "24", "25", "26",
"27", "28", "29", "2A", "2B", "2C", "2D", "2E", "2F", "30", "31",
"32", "33", "34", "35", "36", "37", "38", "39", "3A", "3B", "3C",
"3D", "3E", "3F", "40", "41", "42", "43", "44", "45", "46", "47",
"48", "49", "4A", "4B", "4C", "4D", "4E", "4F", "50", "51", "52",
"53", "54", "55", "56", "57", "58", "59", "5A", "5B", "5C", "5D",
"5E", "5F", "60", "61", "62", "63", "64", "65", "66", "67", "68",
"69", "6A", "6B", "6C", "6D", "6E", "6F", "70", "71", "72", "73",
"74", "75", "76", "77", "78", "79", "7A", "7B", "7C", "7D", "7E",
"7F", "80", "81", "82", "83", "84", "85", "86", "87", "88", "89",
"8A", "8B", "8C", "8D", "8E", "8F", "90", "91", "92", "93", "94",
"95", "96", "97", "98", "99", "9A", "9B", "9C", "9D", "9E", "9F",
"A0", "A1", "A2", "A3", "A4", "A5", "A6", "A7", "A8", "A9", "AA",
"AB", "AC", "AD", "AE", "AF", "B0", "B1", "B2", "B3", "B4", "B5",
"B6", "B7", "B8", "B9", "BA", "BB", "BC", "BD", "BE", "BF", "C0",
"C1", "C2", "C3", "C4", "C5", "C6", "C7", "C8", "C9", "CA", "CB",
"CC", "CD", "CE", "CF", "D0", "D1", "D2", "D3", "D4", "D5", "D6",
"D7", "D8", "D9", "DA", "DB", "DC", "DD", "DE", "DF", "E0", "E1",
"E2", "E3", "E4", "E5", "E6", "E7", "E8", "E9", "EA", "EB", "EC",
"ED", "EE", "EF", "F0", "F1", "F2", "F3", "F4", "F5", "F6", "F7",
"F8", "F9", "FA", "FB", "FC", "FD", "FE", "FF" };
private final static byte[] val = { 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x00, 0x01,
0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x3F, 0x3F, 0x3F,
0x3F, 0x3F, 0x3F, 0x3F, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x3F,
0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
0x3F, 0x3F, 0x3F, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x3F, 0x3F,
0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F };
private static MessageDigest digest = null;
public static String replace(String line , String oldStr, String newStr){
if (StringUtils.isEmpty(line)) return null;
int index = 0;
if ((index = line.indexOf(oldStr, index)) >= 0){
char[] charArray = line.toCharArray();
char[] newStrArray = newStr.toCharArray();
int oldLen = oldStr.length();
StringBuffer buf = new StringBuffer(charArray.length);
buf.append(charArray, 0, index).append(newStrArray);
index += oldLen;
int i = index;
while((index = line.indexOf(oldStr, index)) > 0){
buf.append(charArray, i, index - i).append(newStrArray);
index += oldLen;
i = index;
}
buf.append(charArray, i, charArray.length - i);
return buf.toString();
}
return line;
}
public static String replace(String line , String oldStr, String newStr, int[] countArr){
if (StringUtils.isEmpty(line)) return null;
int index = 0;
if ((index = line.indexOf(oldStr, index)) >= 0){
int count = 0;
count++;
char[] charArray = line.toCharArray();
char[] newStrArray = newStr.toCharArray();
int oldLen = oldStr.length();
StringBuffer buf = new StringBuffer(charArray.length);
buf.append(charArray, 0, index).append(newStrArray);
index += oldLen;
int i = index;
while((index = line.indexOf(oldStr, index)) > 0){
buf.append(charArray, i, index - i).append(newStrArray);
index += oldLen;
i = index;
count++;
}
buf.append(charArray, i, charArray.length - i);
countArr[0] = count;
return buf.toString();
}
return line;
}
public static String replaceIgnoreCase(String line , String oldStr, String newStr){
if (StringUtils.isEmpty(line)) return null;
int index = 0;
line = line.toLowerCase();
oldStr = oldStr.toLowerCase();
if ((index = line.indexOf(oldStr, index)) >= 0){
char[] charArray = line.toCharArray();
char[] newStrArray = newStr.toCharArray();
int oldLen = oldStr.length();
StringBuffer buf = new StringBuffer(charArray.length);
buf.append(charArray, 0, index).append(newStrArray);
index += oldLen;
int i = index;
while((index = line.indexOf(oldStr, index)) > 0){
buf.append(charArray, i, index - i).append(newStrArray);
index += oldLen;
i = index;
}
buf.append(charArray, i, charArray.length - i);
return buf.toString();
}
return line;
}
public static String replaceIgnoreCase(String line , String oldStr, String newStr, int[] countArr){
if (StringUtils.isEmpty(line)) return null;
int index = 0;
line = line.toLowerCase();
oldStr = oldStr.toLowerCase();
if ((index = line.indexOf(oldStr, index)) >= 0){
char[] charArray = line.toCharArray();
char[] newStrArray = newStr.toCharArray();
int oldLen = oldStr.length();
StringBuffer buf = new StringBuffer(charArray.length);
buf.append(charArray, 0, index).append(newStrArray);
index += oldLen;
int i = index;
int count = 0;
while((index = line.indexOf(oldStr, index)) > 0){
count ++;
buf.append(charArray, i, index - i).append(newStrArray);
index += oldLen;
i = index;
}
buf.append(charArray, i, charArray.length - i);
countArr[0] = count;
return buf.toString();
}
return line;
}
public static String escapeHTMLTags(String htmlStr) {
if (StringUtils.isEmpty(htmlStr)) return null;
char ch;
int last = 0;
char[] htmlStrArr = htmlStr.toCharArray();
int len = htmlStrArr.length;
StringBuffer outBuf = new StringBuffer((int) (len * 1.3));
int i = 0;
for (; i < len; i++) {
ch = htmlStrArr[i];
if (ch > '>') {
continue;
} else if (ch == '<') {
if (i > last) {
outBuf.append(htmlStrArr, last, i - last);
}
last = i + 1;
outBuf.append(LT_ENCODE);
} else if (ch == '>') {
if (i > last) {
outBuf.append(htmlStrArr, last, i - last);
}
last = i + 1;
outBuf.append(GT_ENCODE);
}
}
if (last == 0) {
return htmlStr;
}
if (i > last) {
outBuf.append(htmlStrArr, last, i - last);
}
return outBuf.toString();
}
public synchronized static String hash(String data) {
if (digest == null) {
try {
digest = MessageDigest.getInstance("MD5");
} catch (NoSuchAlgorithmException e) {
System.err.println("Failed to load the MD5 MessageDigest.Jive will be unable to function normally.");
e.printStackTrace();
}
}
digest.update(data.getBytes());
return encodeHex(digest.digest());
}
public static final String encodeHex(byte[] bytes) {
StringBuffer buf = new StringBuffer(bytes.length * 2);
int i;
for (i = 0; i < bytes.length; i++) {
if (((int) bytes[i] & 0xff) < 0x10) {
buf.append("0");
}
buf.append(Long.toString((int) bytes[i] & 0xff, 16));
}
return buf.toString();
}
public static byte[] decodeHex(String hex) {
char[] chars = hex.toCharArray();
byte[] bytes = new byte[chars.length / 2];
int byteCount = 0;
for (int i = 0; i < chars.length; i += 2) {
byte newByte = 0x00;
newByte |= hexCharToByte(chars[i]);
newByte <<= 4;
newByte |= hexCharToByte(chars[i + 1]);
bytes[byteCount] = newByte;
byteCount++;
}
return bytes;
}
private static final byte hexCharToByte(char ch) {
switch (ch) {
case '0':
return 0x00;
case '1':
return 0x01;
case '2':
return 0x02;
case '3':
return 0x03;
case '4':
return 0x04;
case '5':
return 0x05;
case '6':
return 0x06;
case '7':
return 0x07;
case '8':
return 0x08;
case '9':
return 0x09;
case 'a':
return 0x0A;
case 'b':
return 0x0B;
case 'c':
return 0x0C;
case 'd':
return 0x0D;
case 'e':
return 0x0E;
case 'f':
return 0x0F;
}
return 0x00;
}
// *********************************************************************
// * base64 - a simple base64 encoder and decoder.
// *********************************************************************
public static String encodebase64(String data) {
return encodebase64(data.getBytes());
}
public static String encodebase64(byte[] data) {
int c;
int len = data.length;
StringBuffer ret = new StringBuffer(((len / 3) + 1) * 4);
for (int i = 0; i < len; ++i) {
c = (data[i] >> 2) & 0x3f;
ret.append(cvt.charAt(c));
c = (data[i] << 4) & 0x3f;
if (++i < len)
c |= (data[i] >> 4) & 0x0f;
ret.append(cvt.charAt(c));
if (i < len) {
c = (data[i] << 2) & 0x3f;
if (++i < len)
c |= (data[i] >> 6) & 0x03;
ret.append(cvt.charAt(c));
} else {
++i;
ret.append((char) fillchar);
}
if (i < len) {
c = data[i] & 0x3f;
ret.append(cvt.charAt(c));
} else {
ret.append((char) fillchar);
}
}
return ret.toString();
}
public static String decodebase64(String data) {
return decodebase64(data.getBytes());
}
public static String decodebase64(byte[] data) {
int c, c1;
int len = data.length;
StringBuffer ret = new StringBuffer((len * 3) / 4);
for (int i = 0; i < len; ++i) {
c = cvt.indexOf(data[i]);
++i;
c1 = cvt.indexOf(data[i]);
c = ((c << 2) | ((c1 >> 4) & 0x3));
ret.append((char) c);
if (++i < len) {
c = data[i];
if (fillchar == c)
break;
c = cvt.indexOf((char) c);
c1 = ((c1 << 4) & 0xf0) | ((c >> 2) & 0xf);
ret.append((char) c1);
}
if (++i < len) {
c1 = data[i];
if (fillchar == c1)
break;
c1 = cvt.indexOf((char) c1);
c = ((c << 6) & 0xc0) | c1;
ret.append((char) c);
}
}
return ret.toString();
}
private static final int fillchar = '=';
private static final String cvt = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789+/";
public static final String[] toLowerCaseWordArray(String text) {
if (text == null || text.length() == 0) {
return new String[0];
}
ArrayList wordList = new ArrayList();
BreakIterator boundary = BreakIterator.getWordInstance();
boundary.setText(text);
int start = 0;
for (int end = boundary.next(); end != BreakIterator.DONE; start = end, end = boundary.next()) {
String tmp = text.substring(start, end).trim();
tmp = replace(tmp, "+", "");
tmp = replace(tmp, "/", "");
tmp = replace(tmp, "\", "");
tmp = replace(tmp, "#", "");
tmp = replace(tmp, "*", "");
tmp = replace(tmp, ")", "");
tmp = replace(tmp, "(", "");
tmp = replace(tmp, "&", "");
if (tmp.length() > 0) {
wordList.add(tmp);
}
}
return (String[]) wordList.toArray(new String[wordList.size()]);
}
private static Random randGen = new Random();
private static char[] numbersAndLetters = ("0123456789abcdefghijklmnopqrstuvwxyz" + "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ").toCharArray();
public static final String randomString(int length) {
if (length < 1) {
return null;
}
char[] randBuffer = new char[length];
for (int i = 0; i < randBuffer.length; i++) {
randBuffer[i] = numbersAndLetters[randGen.nextInt(71)];
}
return new String(randBuffer);
}
public static final String chopAtWord(String string, int length) {
if (string == null) {
return string;
}
char[] charArray = string.toCharArray();
int sLength = string.length();
if (length < sLength) {
sLength = length;
}
// First check if there is a newline character before length; if so,
// chop word there.
for (int i = 0; i < sLength - 1; i++) {
// Windows
if (charArray[i] == 'r' && charArray[i + 1] == 'n') {
return string.substring(0, i + 1);
}
// Unix
else if (charArray[i] == 'n') {
return string.substring(0, i);
}
}
// Also check boundary case of Unix newline
if (charArray[sLength - 1] == 'n') {
return string.substring(0, sLength - 1);
}
// Done checking for newline, now see if the total string is less than
// the specified chop point.
if (string.length() < length) {
return string;
}
// No newline, so chop at the first whitespace.
for (int i = length - 1; i > 0; i--) {
if (charArray[i] == ' ') {
return string.substring(0, i).trim();
}
}
// Did not find word boundary so return original String chopped at
// specified length.
return string.substring(0, length);
}
public static final String escapeForXML(String string) {
if (string == null) {
return null;
}
char ch;
int i = 0;
int last = 0;
char[] input = string.toCharArray();
int len = input.length;
StringBuffer out = new StringBuffer((int) (len * 1.3));
for (; i < len; i++) {
ch = input[i];
if (ch > '>') {
continue;
} else if (ch == '<') {
if (i > last) {
out.append(input, last, i - last);
}
last = i + 1;
out.append(LT_ENCODE);
} else if (ch == '&') {
if (i > last) {
out.append(input, last, i - last);
}
last = i + 1;
out.append(AMP_ENCODE);
} else if (ch == '"') {
if (i > last) {
out.append(input, last, i - last);
}
last = i + 1;
out.append(QUOTE_ENCODE);
}
}
if (last == 0) {
return string;
}
if (i > last) {
out.append(input, last, i - last);
}
return out.toString();
}
public static final String unescapeFromXML(String string) {
string = replace(string, "<", "<");
string = replace(string, ">", ">");
string = replace(string, """, """);
return replace(string, "&", "&");
}
public static String escape(String s) {
StringBuffer sbuf = new StringBuffer();
int len = s.length();
for (int i = 0; i < len; i++) {
int ch = s.charAt(i);
if (ch == ' ') { // space : map to '+'
sbuf.append('+');
} else if ('A' <= ch && ch <= 'Z') { // 'A'..'Z' : as it was
sbuf.append((char) ch);
} else if ('a' <= ch && ch <= 'z') { // 'a'..'z' : as it was
sbuf.append((char) ch);
} else if ('0' <= ch && ch <= '9') { // '0'..'9' : as it was
sbuf.append((char) ch);
} else if (ch == '-'
|| ch == '_' // unreserved : as it was
|| ch == '.' || ch == '!' || ch == '~' || ch == '*'
|| ch == ''' || ch == '(' || ch == ')') {
sbuf.append((char) ch);
} else if (ch <= 0x007F) { // other ASCII : map to %XX
sbuf.append('%');
sbuf.append(hex[ch]);
} else { // unicode : map to %uXXXX
sbuf.append('%');
sbuf.append('u');
sbuf.append(hex[(ch >>> 8)]);
sbuf.append(hex[(0x00FF & ch)]);
}
}
return sbuf.toString();
}
public static String unescape(String s) {
StringBuffer sbuf = new StringBuffer();
int i = 0;
int len = s.length();
while (i < len) {
int ch = s.charAt(i);
if (ch == '+') { // + : map to ' '
sbuf.append(' ');
} else if ('A' <= ch && ch <= 'Z') { // 'A'..'Z' : as it was
sbuf.append((char) ch);
} else if ('a' <= ch && ch <= 'z') { // 'a'..'z' : as it was
sbuf.append((char) ch);
} else if ('0' <= ch && ch <= '9') { // '0'..'9' : as it was
sbuf.append((char) ch);
} else if (ch == '-'
|| ch == '_' // unreserved : as it was
|| ch == '.' || ch == '!' || ch == '~' || ch == '*'
|| ch == ''' || ch == '(' || ch == ')') {
sbuf.append((char) ch);
} else if (ch == '%') {
int cint = 0;
if ('u' != s.charAt(i + 1)) { // %XX : map to ascii(XX)
cint = (cint << 4) | val[s.charAt(i + 1)];
cint = (cint << 4) | val[s.charAt(i + 2)];
i += 2;
} else { // %uXXXX : map to unicode(XXXX)
cint = (cint << 4) | val[s.charAt(i + 2)];
cint = (cint << 4) | val[s.charAt(i + 3)];
cint = (cint << 4) | val[s.charAt(i + 4)];
cint = (cint << 4) | val[s.charAt(i + 5)];
i += 5;
}
sbuf.append((char) cint);
}
i++;
}
return sbuf.toString();
}
private static final char[] zeroArray = "0000000000000000".toCharArray();
public static final String zeroPadString(String string, int length) {
if (string == null || string.length() > length) {
return string;
}
StringBuffer buf = new StringBuffer(length);
buf.append(zeroArray, 0, length - string.length()).append(string);
return buf.toString();
}
public static final String dateToMillis(Date date) {
return zeroPadString(Long.toString(date.getTime()), 15);
}
public static void main(String[] args) {
System.out.println(replace("aaaaabbbcccc", "aa", "gg"));
System.out.println(replaceIgnoreCase("AAAAbbbcccc", "aa", "gg"));
System.out.println(escapeHTMLTags("AAAAbb"));
}
}
更多关于java算法相关内容感兴趣的读者可查看本站专题:《Java字符与字符串操作技巧总结》、《Java数据结构与算法教程》、《Java操作DOM节点技巧总结》、《Java文件与目录操作技巧汇总》和《Java缓存操作技巧汇总》
希望本文所述对大家java程序设计有所帮助。



