栏目分类:
子分类:
返回
名师互学网用户登录
快速导航关闭
当前搜索
当前分类
子分类
实用工具
热门搜索
名师互学网 > IT > 前沿技术 > 大数据 > 大数据系统

Hive Rule 及子类分析

Hive Rule 及子类分析

Rule

Hive Rule 对应优化时的一个规则,是否应用这个优化,基于应用这个规则的代价。应用这个规则的代价越小越好。(正数的情况下,负数代表不适用此规则)

public interface Rule {

// 规则的代价,越小越好
  int cost(Stack stack) throws SemanticException;

// 规则的名称,用于调试
  String getName();
}

Rule 有 3 种子类,分别为 TypeRule,RuleExactMatch 和 RuleRegExp。

TypeRule

TypeRule 最简单,仅当 Node 的类型配置指定的 Type 有效。

public class TypeRule implements Rule {

  private Class nodeClass;
  // 构造时传递要匹配的类型
  public TypeRule(Class nodeClass) {
    this.nodeClass = nodeClass;
  }

 // 当节点的类型匹配时,返回 1,否则 -1.
  @Override
  public int cost(Stack stack) throws SemanticException {
    if (stack == null) {
      return -1;
    }
    if (nodeClass.isInstance(stack.peek())) {
      return 1;
    }
    return -1;
  }

  @Override
  public String getName() {
    return nodeClass.getName();
  }
}
RuleExactMatch

仅当 stack 中的元素的名字和 pattern 完全匹配,返回 1,否则返回 -1。

public class RuleExactMatch implements Rule {

  private final String ruleName;
  private final String[] pattern;

  public RuleExactMatch(String ruleName, String[] pattern) {
    this.ruleName = ruleName;
    this.pattern = pattern;
  }

  @Override
  public int cost(Stack stack) throws SemanticException {
    int numElems = (stack != null ? stack.size() : 0);
    if (numElems != pattern.length) {
      return -1;
    }
    for (int pos = numElems - 1; pos >= 0; pos--) {
      if(!stack.get(pos).getName().equals(pattern[pos])) {
        return -1;
      }
    }
    return numElems;
  }

  
  @Override
  public String getName() {
    return ruleName;
  }
}
RuleRegExp

可以包含通配符的匹配规则。

public class RuleRegExp implements Rule {

  private final String ruleName;
  private final Pattern patternWithWildCardChar;
  private final String patternWithoutWildCardChar;
  private String[] patternORWildChar;
  // 通配符定义
  private static final Set wildCards = new HashSet(Arrays.asList(
    '[', '^', '$', '*', ']', '+', '|', '(', '\', '.', '?', ')', '&'));
    
   public RuleRegExp(String ruleName, String regExp) {
    this.ruleName = ruleName;

    if (patternHasWildCardChar(regExp)) {
    // 如果仅含有 '|',是 or 匹配, 则不用编译成正则表达式(效率的原因)。
      if (patternHasOnlyWildCardChar(regExp, '|')) {
          this.patternWithWildCardChar = null;
          this.patternWithoutWildCardChar = null;
          this.patternORWildChar = regExp.split("\|");
      } else {
      // 编译正则表达式
        this.patternWithWildCardChar = Pattern.compile(regExp);
        this.patternWithoutWildCardChar = null;
        this.patternORWildChar = null;
      }
    } else {
    // 没有任何通配符
      this.patternWithWildCardChar = null;
      this.patternWithoutWildCardChar = regExp;
      this.patternORWildChar = null;
    }
  }
  
// 是否仅包含 wcc 通配符。
  private static boolean patternHasOnlyWildCardChar(String pattern, char wcc) {
    if (pattern == null) {
      return false;
    }
    boolean ret = true;
    boolean hasWildCard = false;
    for (char pc : pattern.toCharArray()) {
      if (wildCards.contains(pc)) {
        hasWildCard = true;
        ret = ret && (pc == wcc);
        if (!ret) {
          return false;
        }
      }
    }
    return ret && hasWildCard;
  }

 // 计算 cost
  @Override
  public int cost(Stack stack) throws SemanticException {
    if (rulePatternIsValidWithoutWildCardChar()) {
      return costPatternWithoutWildCardChar(stack);
    }
    if (rulePatternIsValidWithWildCardChar()) {
      return costPatternWithWildCardChar(stack);
    }
    if (rulePatternIsValidWithORWildCardChar()) {
      return costPatternWithORWildCardChar(stack);
    }
    // If we reached here, either :
    // 1. patternWithWildCardChar and patternWithoutWildCardChar are both nulls.
    // 2. patternWithWildCardChar and patternWithoutWildCardChar are both not nulls.
    // This is an internal error and we should not let this happen, so throw an exception.
    throw new SemanticException (
      "Rule pattern is invalid for " + getName() + " : patternWithWildCardChar = " +
      patternWithWildCardChar + " patternWithoutWildCardChar = " +
      patternWithoutWildCardChar);
  }

// 不包含任何通配符
boolean rulePatternIsValidWithoutWildCardChar() {
    return patternWithWildCardChar == null && patternWithoutWildCardChar != null && this.patternORWildChar == null;
  }
  
 private int costPatternWithoutWildCardChar(Stack stack) throws SemanticException {
    int numElems = (stack != null ? stack.size() : 0);

    // No elements
    if (numElems == 0) {
      return -1;
    }

    int patLen = patternWithoutWildCardChar.length();
    StringBuilder name = new StringBuilder(patLen + numElems);
    for (int pos = numElems - 1; pos >= 0; pos--) {
      String nodeName = stack.get(pos).getName() + "%";
      name.insert(0, nodeName);
      if (name.length() >= patLen) {
        if (patternWithoutWildCardChar.contentEquals(name)) {
          return patLen;
        }
        // 如果 name.length() > patLen, 或者长度相等,但是不匹配,则不用继续插入到 name.
        break;
      }
    }
    return -1;
  }
  
  boolean rulePatternIsValidWithWildCardChar() {
    return patternWithoutWildCardChar == null && patternWithWildCardChar != null && this.patternORWildChar == null;
  }
  // opRules.put(new RuleRegExp("R1", "TS%.*RS%JOIN%")
  private int costPatternWithWildCardChar(Stack stack) throws SemanticException {
    int numElems = (stack != null ? stack.size() : 0);
    StringBuilder name = new StringBuilder();
    Matcher m = patternWithWildCardChar.matcher("");
    for (int pos = numElems - 1; pos >= 0; pos--) {
      String nodeName = stack.get(pos).getName() + "%";
      name.insert(0, nodeName);
      m.reset(name);
      if (m.matches()) {
        return name.length();
      }
    }
    return -1;
  }
rulePatternIsValidWithORWildCardChar

Example:TypeCheckProcFactory

// Either integer or number will be processed by 
opRules.put(new RuleRegExp("R2", HiveParser.Number + "%|" +
        HiveParser.IntegralLiteral + "%|" +
        HiveParser.NumberLiteral + "%"),
        tf.getNumExprProcessor());
    opRules
        .put(new RuleRegExp("R3", HiveParser.Identifier + "%|"
        + HiveParser.StringLiteral + "%|" + HiveParser.TOK_CHARSETLITERAL + "%|"
        + HiveParser.TOK_STRINGLITERALSEQUENCE + "%|"
        + "%|" + HiveParser.KW_IF + "%|" + HiveParser.KW_CASE + "%|"
        + HiveParser.KW_WHEN + "%|" + HiveParser.KW_IN + "%|"
        + HiveParser.KW_ARRAY + "%|" + HiveParser.KW_MAP + "%|"
        + HiveParser.KW_STRUCT + "%|" + HiveParser.KW_EXISTS + "%|"
        + HiveParser.TOK_SUBQUERY_OP_NOTIN + "%"),
        tf.getStrExprProcessor());
    opRules.put(new RuleRegExp("R4", HiveParser.KW_TRUE + "%|"
        + HiveParser.KW_FALSE + "%"), tf.getBoolExprProcessor());
    opRules.put(new RuleRegExp("R5", HiveParser.TOK_DATELITERAL + "%|"
        + HiveParser.TOK_TIMESTAMPLITERAL + "%|"
        + HiveParser.TOK_TIMESTAMPLOCALTZLITERAL + "%"), tf.getDateTimeExprProcessor());
    opRules.put(new RuleRegExp("R6", HiveParser.TOK_INTERVAL_YEAR_MONTH_LITERAL + "%|"
        + HiveParser.TOK_INTERVAL_DAY_TIME_LITERAL + "%|"
        + HiveParser.TOK_INTERVAL_YEAR_LITERAL + "%|"
        + HiveParser.TOK_INTERVAL_MONTH_LITERAL + "%|"
        + HiveParser.TOK_INTERVAL_DAY_LITERAL + "%|"
        + HiveParser.TOK_INTERVAL_HOUR_LITERAL + "%|"
        + HiveParser.TOK_INTERVAL_MINUTE_LITERAL + "%|"
        + HiveParser.TOK_INTERVAL_SECOND_LITERAL + "%"), tf.getIntervalExprProcessor());
   boolean rulePatternIsValidWithORWildCardChar() {
    return patternWithoutWildCardChar == null && patternWithWildCardChar == null && this.patternORWildChar != null;
  }

  
  private int costPatternWithORWildCardChar(Stack stack) throws SemanticException {
    int numElems = (stack != null ? stack.size() : 0);

    // No elements
    if (numElems == 0) {
      return -1;
    }

    // These DS are used to cache previously created String
    Map cachedNames = new HashMap();
    int maxDepth = numElems;
    int maxLength = 0;

    // For every pattern
    for (String pattern : patternORWildChar) {
      int patLen = pattern.length();

      // If the stack has been explored already till that level,
      // obtained cached String
      if (cachedNames.containsKey(patLen)) {
        if (pattern.contentEquals(cachedNames.get(patLen))) {
          return patLen;
        }
      } else if (maxLength >= patLen) {
        // We have already explored the stack deep enough, but
        // we do not have a matching
        continue;
      } else {
        // We are going to build the name
        StringBuilder name = new StringBuilder(patLen + numElems);
        if (maxLength != 0) {
          name.append(cachedNames.get(maxLength));
        }
        for (int pos = maxDepth - 1; pos >= 0; pos--) {
          String nodeName = stack.get(pos).getName() + "%";
          name.insert(0, nodeName);

          // We cache the values
          cachedNames.put(name.length(), name.toString());
          maxLength = name.length();
          maxDepth--;

          if (name.length() >= patLen) {
            if (pattern.contentEquals(name)) {
              return patLen;
            }
            break;
          }
        }
        
      }
    }
    return -1;
  }

new RuleRegExp(“R1”, “TS%.*RS%JOIN%”)

转载请注明:文章转载自 www.mshxw.com
本文地址:https://www.mshxw.com/it/719564.html
我们一直用心在做
关于我们 文章归档 网站地图 联系我们

版权所有 (c)2021-2022 MSHXW.COM

ICP备案号:晋ICP备2021003244-6号