病毒扫描包下的文件组成
__init__.py commit_differ.py:文件差异对比 db_oper.py:数据库操作 models.py:数据库模型 run_virus_scan.py:主启动文件 scan_settings.py:病毒扫描设置 thread_pool.py:线程池声明 virus_scan.py:病毒扫描函数scan_settings(关于扫描的设置)
class Settings(): __init__(): parse_config(): parse_scan_config(): parse_send_mail_config(): is_enabled():补充一:configParser:读取配置对象
parse_scan_config:分析扫描的配置。
该方法对配置中的virus_scan中的多个属性进行检查。
包括scan_command,virus_code,nonvirus_code,scan_interval,scan_size_limit,scan_skip_ext,threads.
大体结构如下:
if cfg.has_option('virus_scan', 'scan_command'):
self.scan_cmd = cfg.get('virus_scan', 'scan_command')
if not self.scan_cmd:
logger.info('[virus_scan] scan_command option is not found in %s, disable virus scan.' %
seaf_conf)
return False
parse_send_mail_config:对邮件的病毒扫描进行配置的信息。
引入了utils.config中的两个方法:get_opt_from_conf_or_env,parse_bool
get_opt_from_conf_or_env:在配置中查找属性,没有的话,如果有设置环境变量中的变量的话,调用os.environ(os模块中获取环境变量的方法)
def get_opt_from_conf_or_env(config, section, key, env_key=None, default=None):
'''Get option value from events.conf. If not specified in events.conf,
check the environment variable.
'''
try:
return config.get(section, key)
except configparser.NoOptionError:
if env_key is None:
return default
else:
return os.environ.get(env_key.upper(), default)
parse_bool:判断输入是否为true
符合要求类型:1和lower(v)="true"
返回parse_send_mail_config方法
通过get_opt_from_conf_or_env读取配置'SEAHUB EMAIL'中的 'enabled'和'seahubdir'属性。 然后根据配置返回mail时的virus scan配置
parse方法:读取config并调用上述方法进行设置。
commit_differ.py(差异对比)和seafobj中的commit_differ相似,为差异对比。
首先是辅助函数make_path(dirname,filename)
def make_path(dirname, filename):
if dirname == '/':
return dirname + filename
else:
return '/'.join((dirname, filename))
该函数用于连接文件夹名和文件名
class CommitDiffer
def __init__(self, repo_id, version, root1, root2):
self.repo_id = repo_id
self.version = version
self.root1 = root1
self.root2 = root2
def diff为差异对比函数
model.py(数据库模型)存放数据库模型
VirusScanRecord:repo_id(主键), scan_commit_id VirusFile:vid(主键,自增),repo_id, commit_id, file_path, has_deleted, has_ignoreddb_oper.py(数据库的操作)
文件结构
class DBOper: def __init__(self,settings) def get_repo_list(self) def get_scan_commit_id(self,repo_id) def update_vscan_record(self,repo_id,scan_commit_id) add_virus_record(self,records) def get_virus_files(session, repo_id, has_handled, start, limit) def delete_virus_file(session, vid) def operate_virus_file(session, vid, ignore) def get_virus_file_by_vid(session, vid)
def get_virus_file_by_vid(session,vid):从VirusFile表中通过vid来获取对象
q = session.query(VirusFile).filter(VirusFile.vid == vid) return q.first()
def operate_virus_file(sessoin,vid,ignore):修改指定vid的has_ignored属性
q = session.query(VirusFile).filter(VirusFile.vid == vid) r = q.first() r.has_ignored = ignore session.commit() return 0
def delete_virus_file(session, vid):修改指定vid的has_deleted属性为1
q = session.query(VirusFile).filter(VirusFile.vid == vid) r = q.first() r.has_deleted = 1 session.commit() return 0
def get_virus_files(session, repo_id, has_handled, start, limit):
获取VirusFile对象(start:起始位置,has_handled:属性要求,limit:条数限制)
q = session.query(VirusFile)
if repo_id:
q = q.filter(VirusFile.repo_id == repo_id)
if has_handled is not None:
if has_handled:
q = q.filter(or_(VirusFile.has_deleted == 1, VirusFile.has_ignored == 1))
else:
q = q.filter(and_(VirusFile.has_deleted == 0, VirusFile.has_ignored == 0))
q = q.slice(start, start+limit)
return q.all()
class DBOper
def get_scan_commit_id(self,repo_id):根据repo_id查询VirusScanRecord表中的scan_commit_id
q = session.query(VirusScanRecord).filter(VirusScanRecord.repo_id == repo_id) r = q.first() scan_commit_id = r.scan_commit_id if r else None return scan_commit_id
def update_vscan_record(self, repo_id, scan_commit_id):根据repo_id更新VirusScanRecord表中的scan_commit_id
session = self.edb_session()
try:
q = session.query(VirusScanRecord).filter(VirusScanRecord.repo_id == repo_id)
r = q.first()
if not r:
vrecord = VirusScanRecord(repo_id, scan_commit_id)
session.add(vrecord)
else:
r.scan_commit_id = scan_commit_id
session.commit()
def add_virus_record(self, records):添加
def get_repo_list:返回list
list中的对象结构:(repo_id, commit_id, scan_commit_id)thread_pool.py(线程池)
文件结构
class Worker(Thread): def __init__(self, do_work, task_queue): def run(self): class ThreadPool(object): def __init__(self, do_work, nworker=10): def start(self): def put_task(self, task): def join(self):
class Worker(Thread):
def run(self):线程运行
def run(self):
while True:
try:
task = self.task_queue.get()
if task is None:
break
self.do_work(task)
except Exception as e:
print(traceback.format_exc())
logger.warning('Failed to execute task: %s' % e)
finally:
self.task_queue.task_done()
**class ThreadPool(object)*线程池
def start(self):线程启动
def start(self):
for i in range(self.nworker):
Worker(self.do_work, self.task_queue).start()
def put_task(self):加任务到task_queue
def put_task(self, task):
self.task_queue.put(task)
def join(self):
def join(self):
self.task_queue.join()
# notify all thread to stop
for i in range(self.nworker):
self.task_queue.put(None)
virus_scan(病毒扫描)
文件结构
class ScanTask(object):
def __init__(self, repo_id, head_commit_id,scan_commit_id)
class VirusScan(object):
def start(self):
def scan_virus(self, scan_task):
def scan_file_virus(self, repo_id, file_id, file_path):
def send_email(self, vrecords):
def parse_scan_result(self, ret_code):
def should_scan_file(self, fpath, fsize):
def should_scan_file:判断文件是否该被扫描
超过一定大小和处于skip list中的类型文件返回False
def parse_scan_result:判断扫描结果
def parse_scan_result(self, ret_code):
rcode_str = str(ret_code)
for code in self.settings.nonvir_codes:
if rcode_str == code:
return 0
for code in self.settings.vir_codes:
if rcode_str == code:
return 1
return ret_code
补充二:subprocess:产生子进程,并连接到子进程的标准输入/输出/错误中去,还可以得到子进程的返回值。
def scan_file_virus(self, repo_id, file_id, file_path):扫描文件病毒
def scan_file_virus(self, repo_id, file_id, file_path):
try:
tfd, tpath = tempfile.mkstemp()
logger.debug("Created temp file '%s' for file '%s'", tpath, file_path)
seafile = fs_mgr.load_seafile(repo_id, 1, file_id)
for blk_id in seafile.blocks:
os.write(tfd, block_mgr.load_block(repo_id, 1, blk_id))
log_dir = os.path.join(os.environ.get('SEAFEVENTS_LOG_DIR', ''))
logfile = os.path.join(log_dir, 'virus_scan.log')
with open(logfile, 'a') as fp:
ret_code = subprocess.call([self.settings.scan_cmd, tpath], stdout=fp, stderr=fp)
return self.parse_scan_result(ret_code)
except Exception as e:
logger.warning('Virus scan for file %s encounter error: %s.',
file_path, e)
return -1
finally:
if tfd > 0:
os.close(tfd)
os.unlink(tpath)
def start(self):启动线程
repo_list = self.db_oper.get_repo_list()
if repo_list is None:
logger.debug("No repo, skip virus scan.")
return
thread_pool = ThreadPool(self.scan_virus, self.settings.threads)
thread_pool.start()
for row in repo_list:
repo_id, head_commit_id, scan_commit_id = row
if head_commit_id == scan_commit_id:
logger.debug('No change occur for repo %.8s, skip virus scan.',
repo_id)
continue
thread_pool.put_task(ScanTask(repo_id, head_commit_id, scan_commit_id))
thread_pool.join()
run_virus_scan.py(病毒扫描的启动文件)
判断是否需要文件扫描,如果需要,进行文件扫描。
setting = Settings(args.config_file)
if setting.is_enabled():
VirusScan(setting).start()
else:
logger.info('Virus scan is disabled.')
到此,关于病毒扫描的代码分析完毕。



