1、写段小程序,然后用strace命令查看程序运行情况。
#includeint mian() { printf("hello world.n"); return 0; }
strace ./hello
从上图可以看出,会先调用execve系统函数对可执行程序进行解析。execve的使用方法见:执行新程序 execve()_杨博东的博客-CSDN博客_execve
2、execve系统函数处理流程
SYSCALL_DEFINE3(execve,
const char __user *, filename,
const char __user *const __user *, argv,
const char __user *const __user *, envp)
{
return do_execve(getname(filename), argv, envp);
}
do_execve
do_execveat_common
bprm_execve
exec_binprm
search_binary_handler
fmt->load_binary(bprm);
因为除了elf的可执行程序以外,还有shell脚本。不同的文件解析的方法不一样,下面以elf可执行性文件为例。
static struct linux_binfmt elf_format = {
.module = THIS_MODULE,
.load_binary = load_elf_binary,
.load_shlib = load_elf_library,
.core_dump = elf_core_dump,
.min_coredump = ELF_EXEC_PAGESIZE,
};
static int load_elf_binary(struct linux_binprm *bprm)
{
struct file *interpreter = NULL;
unsigned long load_addr = 0, load_bias = 0;
int load_addr_set = 0;
unsigned long error;
struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
struct elf_phdr *elf_property_phdata = NULL;
unsigned long elf_bss, elf_brk;
int bss_prot = 0;
int retval, i;
unsigned long elf_entry;
unsigned long e_entry;
unsigned long interp_load_addr = 0;
unsigned long start_code, end_code, start_data, end_data;
unsigned long reloc_func_desc __maybe_unused = 0;
int executable_stack = EXSTACK_DEFAULT;
struct elfhdr *elf_ex = (struct elfhdr *)bprm->buf;
struct elfhdr *interp_elf_ex = NULL;
struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
struct mm_struct *mm;
struct pt_regs *regs;
retval = -ENOEXEC;
if (memcmp(elf_ex->e_ident, ELFMAG, SELFMAG) != 0)
goto out;
if (elf_ex->e_type != ET_EXEC && elf_ex->e_type != ET_DYN)
goto out;
if (!elf_check_arch(elf_ex))
goto out;
if (elf_check_fdpic(elf_ex))
goto out;
if (!bprm->file->f_op->mmap)
goto out;
elf_phdata = load_elf_phdrs(elf_ex, bprm->file);
if (!elf_phdata)
goto out;
elf_ppnt = elf_phdata;
for (i = 0; i < elf_ex->e_phnum; i++, elf_ppnt++) {
char *elf_interpreter;
if (elf_ppnt->p_type == PT_GNU_PROPERTY) {
elf_property_phdata = elf_ppnt;
continue;
}
if (elf_ppnt->p_type != PT_INTERP)
continue;
retval = -ENOEXEC;
if (elf_ppnt->p_filesz > PATH_MAX || elf_ppnt->p_filesz < 2)
goto out_free_ph;
retval = -ENOMEM;
elf_interpreter = kmalloc(elf_ppnt->p_filesz, GFP_KERNEL);
if (!elf_interpreter)
goto out_free_ph;
retval = elf_read(bprm->file, elf_interpreter, elf_ppnt->p_filesz,
elf_ppnt->p_offset);
if (retval < 0)
goto out_free_interp;
retval = -ENOEXEC;
if (elf_interpreter[elf_ppnt->p_filesz - 1] != ' ')
goto out_free_interp;
interpreter = open_exec(elf_interpreter);
kfree(elf_interpreter);
retval = PTR_ERR(interpreter);
if (IS_ERR(interpreter))
goto out_free_ph;
would_dump(bprm, interpreter);
interp_elf_ex = kmalloc(sizeof(*interp_elf_ex), GFP_KERNEL);
if (!interp_elf_ex) {
retval = -ENOMEM;
goto out_free_ph;
}
retval = elf_read(interpreter, interp_elf_ex,
sizeof(*interp_elf_ex), 0);
if (retval < 0)
goto out_free_dentry;
break;
out_free_interp:
kfree(elf_interpreter);
goto out_free_ph;
}
elf_ppnt = elf_phdata;
for (i = 0; i < elf_ex->e_phnum; i++, elf_ppnt++)
switch (elf_ppnt->p_type) {
case PT_GNU_STACK:
if (elf_ppnt->p_flags & PF_X)
executable_stack = EXSTACK_ENABLE_X;
else
executable_stack = EXSTACK_DISABLE_X;
break;
case PT_LOPROC ... PT_HIPROC:
retval = arch_elf_pt_proc(elf_ex, elf_ppnt,
bprm->file, false,
&arch_state);
if (retval)
goto out_free_dentry;
break;
}
if (interpreter) {
retval = -ELIBBAD;
if (memcmp(interp_elf_ex->e_ident, ELFMAG, SELFMAG) != 0)
goto out_free_dentry;
if (!elf_check_arch(interp_elf_ex) ||
elf_check_fdpic(interp_elf_ex))
goto out_free_dentry;
interp_elf_phdata = load_elf_phdrs(interp_elf_ex,
interpreter);
if (!interp_elf_phdata)
goto out_free_dentry;
elf_property_phdata = NULL;
elf_ppnt = interp_elf_phdata;
for (i = 0; i < interp_elf_ex->e_phnum; i++, elf_ppnt++)
switch (elf_ppnt->p_type) {
case PT_GNU_PROPERTY:
elf_property_phdata = elf_ppnt;
break;
case PT_LOPROC ... PT_HIPROC:
retval = arch_elf_pt_proc(interp_elf_ex,
elf_ppnt, interpreter,
true, &arch_state);
if (retval)
goto out_free_dentry;
break;
}
}
retval = parse_elf_properties(interpreter ?: bprm->file,
elf_property_phdata, &arch_state);
if (retval)
goto out_free_dentry;
retval = arch_check_elf(elf_ex,
!!interpreter, interp_elf_ex,
&arch_state);
if (retval)
goto out_free_dentry;
retval = begin_new_exec(bprm);
if (retval)
goto out_free_dentry;
SET_PERSONALITY2(*elf_ex, &arch_state);
if (elf_read_implies_exec(*elf_ex, executable_stack))
current->personality |= READ_IMPLIES_EXEC;
if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
current->flags |= PF_RANDOMIZE;
setup_new_exec(bprm);
retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
executable_stack);
if (retval < 0)
goto out_free_dentry;
elf_bss = 0;
elf_brk = 0;
start_code = ~0UL;
end_code = 0;
start_data = 0;
end_data = 0;
for(i = 0, elf_ppnt = elf_phdata;
i < elf_ex->e_phnum; i++, elf_ppnt++) {
int elf_prot, elf_flags;
unsigned long k, vaddr;
unsigned long total_size = 0;
unsigned long alignment;
if (elf_ppnt->p_type != PT_LOAD)
continue;
if (unlikely (elf_brk > elf_bss)) {
unsigned long nbyte;
retval = set_brk(elf_bss + load_bias,
elf_brk + load_bias,
bss_prot);
if (retval)
goto out_free_dentry;
nbyte = ELF_PAGEOFFSET(elf_bss);
if (nbyte) {
nbyte = ELF_MIN_ALIGN - nbyte;
if (nbyte > elf_brk - elf_bss)
nbyte = elf_brk - elf_bss;
if (clear_user((void __user *)elf_bss +
load_bias, nbyte)) {
}
}
}
elf_prot = make_prot(elf_ppnt->p_flags, &arch_state,
!!interpreter, false);
elf_flags = MAP_PRIVATE;
vaddr = elf_ppnt->p_vaddr;
if (load_addr_set) {
elf_flags |= MAP_FIXED;
} else if (elf_ex->e_type == ET_EXEC) {
elf_flags |= MAP_FIXED_NOREPLACE;
} else if (elf_ex->e_type == ET_DYN) {
if (interpreter) {
load_bias = ELF_ET_DYN_base;
if (current->flags & PF_RANDOMIZE)
load_bias += arch_mmap_rnd();
alignment = maximum_alignment(elf_phdata, elf_ex->e_phnum);
if (alignment)
load_bias &= ~(alignment - 1);
elf_flags |= MAP_FIXED_NOREPLACE;
} else
load_bias = 0;
load_bias = ELF_PAGESTART(load_bias - vaddr);
}
if (!load_addr_set) {
total_size = total_mapping_size(elf_phdata,
elf_ex->e_phnum);
if (!total_size) {
retval = -EINVAL;
goto out_free_dentry;
}
}
error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
elf_prot, elf_flags, total_size);
if (BAD_ADDR(error)) {
retval = IS_ERR((void *)error) ?
PTR_ERR((void*)error) : -EINVAL;
goto out_free_dentry;
}
if (!load_addr_set) {
load_addr_set = 1;
load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
if (elf_ex->e_type == ET_DYN) {
load_bias += error -
ELF_PAGESTART(load_bias + vaddr);
load_addr += load_bias;
reloc_func_desc = load_bias;
}
}
k = elf_ppnt->p_vaddr;
if ((elf_ppnt->p_flags & PF_X) && k < start_code)
start_code = k;
if (start_data < k)
start_data = k;
if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
elf_ppnt->p_memsz > TASK_SIZE ||
TASK_SIZE - elf_ppnt->p_memsz < k) {
retval = -EINVAL;
goto out_free_dentry;
}
k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
if (k > elf_bss)
elf_bss = k;
if ((elf_ppnt->p_flags & PF_X) && end_code < k)
end_code = k;
if (end_data < k)
end_data = k;
k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
if (k > elf_brk) {
bss_prot = elf_prot;
elf_brk = k;
}
}
e_entry = elf_ex->e_entry + load_bias;
elf_bss += load_bias;
elf_brk += load_bias;
start_code += load_bias;
end_code += load_bias;
start_data += load_bias;
end_data += load_bias;
retval = set_brk(elf_bss, elf_brk, bss_prot);
if (retval)
goto out_free_dentry;
if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
retval = -EFAULT;
goto out_free_dentry;
}
if (interpreter) {
elf_entry = load_elf_interp(interp_elf_ex,
interpreter,
load_bias, interp_elf_phdata,
&arch_state);
if (!IS_ERR((void *)elf_entry)) {
interp_load_addr = elf_entry;
elf_entry += interp_elf_ex->e_entry;
}
if (BAD_ADDR(elf_entry)) {
retval = IS_ERR((void *)elf_entry) ?
(int)elf_entry : -EINVAL;
goto out_free_dentry;
}
reloc_func_desc = interp_load_addr;
allow_write_access(interpreter);
fput(interpreter);
kfree(interp_elf_ex);
kfree(interp_elf_phdata);
} else {
elf_entry = e_entry;
if (BAD_ADDR(elf_entry)) {
retval = -EINVAL;
goto out_free_dentry;
}
}
kfree(elf_phdata);
set_binfmt(&elf_format);
#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
retval = ARCH_SETUP_ADDITIONAL_PAGES(bprm, elf_ex, !!interpreter);
if (retval < 0)
goto out;
#endif
retval = create_elf_tables(bprm, elf_ex,
load_addr, interp_load_addr, e_entry);
if (retval < 0)
goto out;
mm = current->mm;
mm->end_code = end_code;
mm->start_code = start_code;
mm->start_data = start_data;
mm->end_data = end_data;
mm->start_stack = bprm->p;
if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
if (IS_ENABLED(CONFIG_ARCH_HAS_ELF_RANDOMIZE) &&
elf_ex->e_type == ET_DYN && !interpreter) {
mm->brk = mm->start_brk = ELF_ET_DYN_base;
}
mm->brk = mm->start_brk = arch_randomize_brk(mm);
#ifdef compat_brk_randomized
current->brk_randomized = 1;
#endif
}
if (current->personality & MMAP_PAGE_ZERO) {
error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
MAP_FIXED | MAP_PRIVATE, 0);
}
regs = current_pt_regs();
#ifdef ELF_PLAT_INIT
ELF_PLAT_INIT(regs, reloc_func_desc);
#endif
finalize_exec(bprm);
START_THREAD(elf_ex, regs, elf_entry, bprm->p);
retval = 0;
out:
return retval;
out_free_dentry:
kfree(interp_elf_ex);
kfree(interp_elf_phdata);
allow_write_access(interpreter);
if (interpreter)
fput(interpreter);
out_free_ph:
kfree(elf_phdata);
goto out;
}
1、解析程序elf头。readelf -h hello
使用UE打开目标程序文件,二进制显示。
2、通过load_elf_phdrs加载目标程序的程序头表。readelf -l hello
装载程序的时候,先解析程序头表(program headers)。 gcc在编译时,除非显示的使用-static标签,否则所有程序的链接都是动态链接的,也就是说需要解释器。
3、处理解析器段。readelf -l hello && ldd hello
解析字段,查找到解析器的路径
4、读取并检查解析器的程序头表
根据解析器的路径打开解析器,检测解析器文件
5、装载目标程序的segment。readelf -l hello
在链接阶段,链接器以section为单位,但是在装载程序的时候以segment为单位。segment中包含多个同一属性(rwxp)的section,更加合理使用内存空间。
6、填入程序的入口地址
if (interpreter) {
load_bias = ELF_ET_DYN_base;
if (current->flags & PF_RANDOMIZE)
load_bias += arch_mmap_rnd();
alignment = maximum_alignment(elf_phdata, elf_ex->e_phnum);
if (alignment)
load_bias &= ~(alignment - 1);
elf_flags |= MAP_FIXED_NOREPLACE;
} else
load_bias = 0;
避免程序的地址被猜测,可以根据PF_RANDOMIZE标志,获取一个随机的基地址load_bias。
7、装载目标程序的参数,环境变量等必要信息。如:argc,argv,envc
start_code += load_bias; end_code += load_bias; start_data += load_bias; end_data += load_bias;
mm = current->mm; mm->end_code = end_code; mm->start_code = start_code; mm->start_data = start_data; mm->end_data = end_data; mm->start_stack = bprm->p;
8、准备进入新程序入口
START_THREAD(elf_ex, regs, elf_entry, bprm->p);
其中START_THREAD宏定义如下,函数的入口地址elf_entry变量就是start_thread的第二个入参pc
#define start_thread(regs,pc,sp)
({
unsigned long r7, r8, r9;
if (IS_ENABLED(CONFIG_BINFMT_ELF_FDPIC)) {
r7 = regs->ARM_r7;
r8 = regs->ARM_r8;
r9 = regs->ARM_r9;
}
memset(regs->uregs, 0, sizeof(regs->uregs));
if (IS_ENABLED(CONFIG_BINFMT_ELF_FDPIC) &&
current->personality & FDPIC_FUNCPTRS) {
regs->ARM_r7 = r7;
regs->ARM_r8 = r8;
regs->ARM_r9 = r9;
regs->ARM_r10 = current->mm->start_data;
} else if (!IS_ENABLED(CONFIG_MMU))
regs->ARM_r10 = current->mm->start_data;
if (current->personality & ADDR_LIMIT_32BIT)
regs->ARM_cpsr = USR_MODE;
else
regs->ARM_cpsr = USR26_MODE;
if (elf_hwcap & HWCAP_THUMB && pc & 1)
regs->ARM_cpsr |= PSR_T_BIT;
regs->ARM_cpsr |= PSR_ENDSTATE;
regs->ARM_pc = pc & ~1;
regs->ARM_sp = sp;
})
返回用户空间时就进入新程序的入口地址,执行新程序



