这是在Spark中使用扫描的示例:
import java.io.{DataOutputStream, ByteArrayOutputStream}import java.lang.Stringimport org.apache.hadoop.hbase.client.Scanimport org.apache.hadoop.hbase.HbaseConfigurationimport org.apache.hadoop.hbase.io.ImmutableBytesWritableimport org.apache.hadoop.hbase.client.Resultimport org.apache.hadoop.hbase.mapreduce.TableInputFormatimport org.apache.hadoop.hbase.util.base64def convertScanToString(scan: Scan): String = { val out: ByteArrayOutputStream = new ByteArrayOutputStream val dos: DataOutputStream = new DataOutputStream(out) scan.write(dos) base64.enpreBytes(out.toByteArray)}val conf = HbaseConfiguration.create()val scan = new Scan()scan.setCaching(500)scan.setCacheBlocks(false)conf.set(TableInputFormat.INPUT_TABLE, "table_name")conf.set(TableInputFormat.SCAN, convertScanToString(scan))val rdd = sc.newAPIHadoopRDD(conf, classOf[TableInputFormat], classOf[ImmutableBytesWritable], classOf[Result])rdd.count您需要将相关的库添加到Spark类路径,并确保它们与您的Spark兼容。提示:您可以
hbase classpath用来查找它们。



