select date,content_id,rec_queue_name_, -- w:word,到:结束,选第一个 regexp_extract(candidate_key_,'id=(\w+):',1) as candidate_key,count(1) as exposure from events where date >= '2021-10-27' and date <= '2021-11-01' and event = 'imp_detail_page' and channel = 'ssss_popular' and content_id = '7ebc54c538a745fcb898d08344dbf91f' and (rec_queue_name_ = 'zili-recent-liked-eges-cold-start-exploit-queue' or rec_queue_name_ = 'zili-recent-related-exploit-queue-v97') group by date,content_id,rec_queue_name_,regexp_extract(candidate_key_,'id=(\w+):',1)python
A(.*?)B 表示截取 A/B 中间的字符串
import re
txt = 'inews_panipuri:queue:type=eges_explicit_second_coldstart_v3:when=hour:id=10e7e8fa6c5644b6b929a6b8a93f8bba:language=en:ordering=sim'
bs64_str = re.findall("id=(.*?):", txt)[0]
print(bs64_str)
def get_id(txt):
try:
bs64_str = re.findall("id=(.*?):", txt)[0]
except Exception as e:
print(e)
print(txt)
data['id'] = data['candidate_key'].apply(lambda x:get_id(x))
data.head()



