Hive Rule 对应优化时的一个规则,是否应用这个优化,基于应用这个规则的代价。应用这个规则的代价越小越好。(正数的情况下,负数代表不适用此规则)
public interface Rule {
// 规则的代价,越小越好
int cost(Stack stack) throws SemanticException;
// 规则的名称,用于调试
String getName();
}
Rule 有 3 种子类,分别为 TypeRule,RuleExactMatch 和 RuleRegExp。
TypeRuleTypeRule 最简单,仅当 Node 的类型配置指定的 Type 有效。
public class TypeRule implements Rule {
private Class> nodeClass;
// 构造时传递要匹配的类型
public TypeRule(Class> nodeClass) {
this.nodeClass = nodeClass;
}
// 当节点的类型匹配时,返回 1,否则 -1.
@Override
public int cost(Stack stack) throws SemanticException {
if (stack == null) {
return -1;
}
if (nodeClass.isInstance(stack.peek())) {
return 1;
}
return -1;
}
@Override
public String getName() {
return nodeClass.getName();
}
}
RuleExactMatch
仅当 stack 中的元素的名字和 pattern 完全匹配,返回 1,否则返回 -1。
public class RuleExactMatch implements Rule {
private final String ruleName;
private final String[] pattern;
public RuleExactMatch(String ruleName, String[] pattern) {
this.ruleName = ruleName;
this.pattern = pattern;
}
@Override
public int cost(Stack stack) throws SemanticException {
int numElems = (stack != null ? stack.size() : 0);
if (numElems != pattern.length) {
return -1;
}
for (int pos = numElems - 1; pos >= 0; pos--) {
if(!stack.get(pos).getName().equals(pattern[pos])) {
return -1;
}
}
return numElems;
}
@Override
public String getName() {
return ruleName;
}
}
RuleRegExp
可以包含通配符的匹配规则。
public class RuleRegExp implements Rule {
private final String ruleName;
private final Pattern patternWithWildCardChar;
private final String patternWithoutWildCardChar;
private String[] patternORWildChar;
// 通配符定义
private static final Set wildCards = new HashSet(Arrays.asList(
'[', '^', '$', '*', ']', '+', '|', '(', '\', '.', '?', ')', '&'));
public RuleRegExp(String ruleName, String regExp) {
this.ruleName = ruleName;
if (patternHasWildCardChar(regExp)) {
// 如果仅含有 '|',是 or 匹配, 则不用编译成正则表达式(效率的原因)。
if (patternHasOnlyWildCardChar(regExp, '|')) {
this.patternWithWildCardChar = null;
this.patternWithoutWildCardChar = null;
this.patternORWildChar = regExp.split("\|");
} else {
// 编译正则表达式
this.patternWithWildCardChar = Pattern.compile(regExp);
this.patternWithoutWildCardChar = null;
this.patternORWildChar = null;
}
} else {
// 没有任何通配符
this.patternWithWildCardChar = null;
this.patternWithoutWildCardChar = regExp;
this.patternORWildChar = null;
}
}
// 是否仅包含 wcc 通配符。
private static boolean patternHasOnlyWildCardChar(String pattern, char wcc) {
if (pattern == null) {
return false;
}
boolean ret = true;
boolean hasWildCard = false;
for (char pc : pattern.toCharArray()) {
if (wildCards.contains(pc)) {
hasWildCard = true;
ret = ret && (pc == wcc);
if (!ret) {
return false;
}
}
}
return ret && hasWildCard;
}
// 计算 cost
@Override
public int cost(Stack stack) throws SemanticException {
if (rulePatternIsValidWithoutWildCardChar()) {
return costPatternWithoutWildCardChar(stack);
}
if (rulePatternIsValidWithWildCardChar()) {
return costPatternWithWildCardChar(stack);
}
if (rulePatternIsValidWithORWildCardChar()) {
return costPatternWithORWildCardChar(stack);
}
// If we reached here, either :
// 1. patternWithWildCardChar and patternWithoutWildCardChar are both nulls.
// 2. patternWithWildCardChar and patternWithoutWildCardChar are both not nulls.
// This is an internal error and we should not let this happen, so throw an exception.
throw new SemanticException (
"Rule pattern is invalid for " + getName() + " : patternWithWildCardChar = " +
patternWithWildCardChar + " patternWithoutWildCardChar = " +
patternWithoutWildCardChar);
}
// 不包含任何通配符
boolean rulePatternIsValidWithoutWildCardChar() {
return patternWithWildCardChar == null && patternWithoutWildCardChar != null && this.patternORWildChar == null;
}
private int costPatternWithoutWildCardChar(Stack stack) throws SemanticException {
int numElems = (stack != null ? stack.size() : 0);
// No elements
if (numElems == 0) {
return -1;
}
int patLen = patternWithoutWildCardChar.length();
StringBuilder name = new StringBuilder(patLen + numElems);
for (int pos = numElems - 1; pos >= 0; pos--) {
String nodeName = stack.get(pos).getName() + "%";
name.insert(0, nodeName);
if (name.length() >= patLen) {
if (patternWithoutWildCardChar.contentEquals(name)) {
return patLen;
}
// 如果 name.length() > patLen, 或者长度相等,但是不匹配,则不用继续插入到 name.
break;
}
}
return -1;
}
boolean rulePatternIsValidWithWildCardChar() {
return patternWithoutWildCardChar == null && patternWithWildCardChar != null && this.patternORWildChar == null;
}
// opRules.put(new RuleRegExp("R1", "TS%.*RS%JOIN%")
private int costPatternWithWildCardChar(Stack stack) throws SemanticException {
int numElems = (stack != null ? stack.size() : 0);
StringBuilder name = new StringBuilder();
Matcher m = patternWithWildCardChar.matcher("");
for (int pos = numElems - 1; pos >= 0; pos--) {
String nodeName = stack.get(pos).getName() + "%";
name.insert(0, nodeName);
m.reset(name);
if (m.matches()) {
return name.length();
}
}
return -1;
}
rulePatternIsValidWithORWildCardChar
Example:TypeCheckProcFactory
// Either integer or number will be processed by
opRules.put(new RuleRegExp("R2", HiveParser.Number + "%|" +
HiveParser.IntegralLiteral + "%|" +
HiveParser.NumberLiteral + "%"),
tf.getNumExprProcessor());
opRules
.put(new RuleRegExp("R3", HiveParser.Identifier + "%|"
+ HiveParser.StringLiteral + "%|" + HiveParser.TOK_CHARSETLITERAL + "%|"
+ HiveParser.TOK_STRINGLITERALSEQUENCE + "%|"
+ "%|" + HiveParser.KW_IF + "%|" + HiveParser.KW_CASE + "%|"
+ HiveParser.KW_WHEN + "%|" + HiveParser.KW_IN + "%|"
+ HiveParser.KW_ARRAY + "%|" + HiveParser.KW_MAP + "%|"
+ HiveParser.KW_STRUCT + "%|" + HiveParser.KW_EXISTS + "%|"
+ HiveParser.TOK_SUBQUERY_OP_NOTIN + "%"),
tf.getStrExprProcessor());
opRules.put(new RuleRegExp("R4", HiveParser.KW_TRUE + "%|"
+ HiveParser.KW_FALSE + "%"), tf.getBoolExprProcessor());
opRules.put(new RuleRegExp("R5", HiveParser.TOK_DATELITERAL + "%|"
+ HiveParser.TOK_TIMESTAMPLITERAL + "%|"
+ HiveParser.TOK_TIMESTAMPLOCALTZLITERAL + "%"), tf.getDateTimeExprProcessor());
opRules.put(new RuleRegExp("R6", HiveParser.TOK_INTERVAL_YEAR_MONTH_LITERAL + "%|"
+ HiveParser.TOK_INTERVAL_DAY_TIME_LITERAL + "%|"
+ HiveParser.TOK_INTERVAL_YEAR_LITERAL + "%|"
+ HiveParser.TOK_INTERVAL_MONTH_LITERAL + "%|"
+ HiveParser.TOK_INTERVAL_DAY_LITERAL + "%|"
+ HiveParser.TOK_INTERVAL_HOUR_LITERAL + "%|"
+ HiveParser.TOK_INTERVAL_MINUTE_LITERAL + "%|"
+ HiveParser.TOK_INTERVAL_SECOND_LITERAL + "%"), tf.getIntervalExprProcessor());
boolean rulePatternIsValidWithORWildCardChar() {
return patternWithoutWildCardChar == null && patternWithWildCardChar == null && this.patternORWildChar != null;
}
private int costPatternWithORWildCardChar(Stack stack) throws SemanticException {
int numElems = (stack != null ? stack.size() : 0);
// No elements
if (numElems == 0) {
return -1;
}
// These DS are used to cache previously created String
Map cachedNames = new HashMap();
int maxDepth = numElems;
int maxLength = 0;
// For every pattern
for (String pattern : patternORWildChar) {
int patLen = pattern.length();
// If the stack has been explored already till that level,
// obtained cached String
if (cachedNames.containsKey(patLen)) {
if (pattern.contentEquals(cachedNames.get(patLen))) {
return patLen;
}
} else if (maxLength >= patLen) {
// We have already explored the stack deep enough, but
// we do not have a matching
continue;
} else {
// We are going to build the name
StringBuilder name = new StringBuilder(patLen + numElems);
if (maxLength != 0) {
name.append(cachedNames.get(maxLength));
}
for (int pos = maxDepth - 1; pos >= 0; pos--) {
String nodeName = stack.get(pos).getName() + "%";
name.insert(0, nodeName);
// We cache the values
cachedNames.put(name.length(), name.toString());
maxLength = name.length();
maxDepth--;
if (name.length() >= patLen) {
if (pattern.contentEquals(name)) {
return patLen;
}
break;
}
}
}
}
return -1;
}
new RuleRegExp(“R1”, “TS%.*RS%JOIN%”)



