path_openat()
Path_openat() is function that search for files by path in order to fill a file struct.The procedure for searching is based on a structure we disscussed before, which is struct nameidata.Nameidata is a temporary variable and it not only saves key data for each time's search but also hold the result and it will be used to fill the file struct.It is temporary because it will be restored immidiatly after the function is finished.
So how does path_openat walk through the directory and finally reach the destination?The algorithm is called path_walk, walk along the path.
Path_walkThis is how path_openat was defined:
static struct file *path_openat(int dfd, struct filename *pathname,
struct nameidata *nd, const struct open_flags *op, int flags)
dfd is the start point for this path walk, and pathname, of course, is the path towards the destination.nd is the pointer to struct nameidata and it saves the context and data needed for walking, also holds the result of the walk, including the inode and other data. flags however, decide how the data in nameidata will be handled.
struct nameidata {
struct path path;
struct qstr last;
struct path root;
struct inode *inode;
unsigned int flags;
unsigned seq;
int last_type;
unsigned depth;
char *saved_names[MAX_NESTED_linkS + 1];
};
Allow me to show code of nameidata again.path is the last node that was handled in the path, and last is the node being operated now.
Here is function path_openat, which also located in fs/namei.c because the function is actually methods for nameidata(even though it's not real object-oriented).
static struct file *path_openat(struct nameidata *nd,
const struct open_flags *op, unsigned flags)
{
struct file *file;
int error;
file = alloc_empty_file(op->open_flag, current_cred());
if (IS_ERR(file))
return file;
if (unlikely(file->f_flags & __O_TMPFILE)) {
error = do_tmpfile(nd, flags, op, file);
} else if (unlikely(file->f_flags & O_PATH)) {
error = do_o_path(nd, flags, file);
} else {
const char *s = path_init(nd, flags);
while (!(error = link_path_walk(s, nd)) &&
(s = open_last_lookups(nd, file, op)) != NULL)
;
if (!error)
error = do_open(nd, file, op);
terminate_walk(nd);
}
if (likely(!error)) {
if (likely(file->f_mode & FMODE_OPENED))
return file;
WARN_ON(1);
error = -EINVAL;
}
fput(file);
if (error == -EOPENSTALE) {
if (flags & LOOKUP_RCU)
error = -ECHILD;
else
error = -ESTALE;
}
return ERR_PTR(error);
}
The parameter unsigned flags appointed at the way this openat will be done, in a RCU way or not RCU way, which use lock to make sure that when dentry is safely accessed at high concurrency which is likely to happen.RCU is the solution for such problem to keep data in struct safe, and if RCU is not available for some reason, the system will use the old way to try again, and that is why we see this:
filp = path_openat(&nd, op, flags | LOOKUP_RCU); if (unlikely(filp == ERR_PTR(-ECHILD))) filp = path_openat(&nd, op, flags);
In the former sentence the function will try a RCU look_up way, and if error occurs, it will try again in an old way, which will cost more time than the former.
In path_openat(), these steps will be done in order.
1.alloc_empty_filp() will create a struct file and alloc memory.
2.path_init() will initialize nameidata and get ready for path walk.
3.The function will call link_path_walk() to search layer by layer.Each name that was included in / and / is a layer, so the algorithm push through these / to analyse the path.In the end of the walk, dentry of nameidata will be the father index before the last part of the path, and filename of nameidata will be set as the last part of the path.
For example for a path walk with the path "/root/linux/data", the denrty of nameidata will be /root/linux/ and filename will be data.
Let's see for the code.
static int link_path_walk(const char *name, struct nameidata *nd)
{
int depth = 0; // depth <= nd->depth
int err;
nd->last_type = LAST_ROOT;
nd->flags |= LOOKUP_PARENT;
if (IS_ERR(name))
return PTR_ERR(name);
while (*name=='/')//to avoid many
name++;
if (!*name) {
nd->dir_mode = 0; // short-circuit the 'hardening' idiocy
return 0;
}
for(;;) {
struct user_namespace *mnt_userns;
const char *link;
u64 hash_len;
int type;
mnt_userns = mnt_user_ns(nd->path.mnt);
err = may_lookup(mnt_userns, nd);
if (err)
return err;
hash_len = hash_name(nd->path.dentry, name);
type = LAST_NORM;
if (name[0] == '.') switch (hashlen_len(hash_len)) {
case 2:
if (name[1] == '.') {
type = LAST_DOTDOT;
nd->state |= ND_JUMPED;
}
break;
case 1:
type = LAST_DOT;
}
if (likely(type == LAST_NORM)) {
struct dentry *parent = nd->path.dentry;
nd->state &= ~ND_JUMPED;
if (unlikely(parent->d_flags & DCACHE_OP_HASH)) {
struct qstr this = { { .hash_len = hash_len }, .name = name };
err = parent->d_op->d_hash(parent, &this);
if (err < 0)
return err;
hash_len = this.hash_len;
name = this.name;
}
}
nd->last.hash_len = hash_len;
nd->last.name = name;
nd->last_type = type;
name += hashlen_len(hash_len);
if (!*name)
goto OK;
do {
name++;
} while (unlikely(*name == '/'));
if (unlikely(!*name)) {
OK:
if (!depth) {
nd->dir_uid = i_uid_into_mnt(mnt_userns, nd->inode);
nd->dir_mode = nd->inode->i_mode;
nd->flags &= ~LOOKUP_PARENT;
return 0;
}
name = nd->stack[--depth].name;
link = walk_component(nd, 0);
} else {
link = walk_component(nd, WALK_MORE);
}
if (unlikely(link)) {
if (IS_ERR(link))
return PTR_ERR(link);
nd->stack[depth++].name = name;
name = link;
continue;
}
if (unlikely(!d_can_lookup(nd->path.dentry))) {
if (nd->flags & LOOKUP_RCU) {
if (!try_to_unlazy(nd))
return -ECHILD;
}
return -ENOTDIR;
}
}
}
Let till these apart.
if (name[0] == '.') switch (hashlen_len(hash_len)) {
case 2:
if (name[1] == '.') {
type = LAST_DOTDOT;
nd->state |= ND_JUMPED;
}
break;
case 1:
type = LAST_DOT;
}
This part is to handle the . in the path, and we all know that dotdot means walking to his father directory. So type will be changed to last_dotdot or last_dot and wait for further operation.
if (likely(type == LAST_NORM)) {
struct dentry *parent = nd->path.dentry;
nd->state &= ~ND_JUMPED;
if (unlikely(parent->d_flags & DCACHE_OP_HASH)) {
struct qstr this = { { .hash_len = hash_len }, .name = name };
err = parent->d_op->d_hash(parent, &this);
if (err < 0)
return err;
hash_len = this.hash_len;
name = this.name;
}
}
If type=last_norm which means it's NORMAL filename then nd(nameidata)'s dentry will be set as *parent as we said before.hash_len and name will also be pushed forward.
LAST_NORM:Last component is normal filename
LAST_ROOT:/
LAST_DOT:Last component is DOT(.)
LAST_DOTDOT:Last component is DOTDOT
LAST_BIND:Last component is BIND to special file system
nd->last.hash_len = hash_len; nd->last.name = name; nd->last_type = type; name += hashlen_len(hash_len); if (!*name) goto OK;
For each time's walk, update nd's last hash_len,last.name and last_type because it's been pushed to the next slash.
Then name += hashlen_len(hash_len) actually push name to the next slash.If there is no more name to find, goto OK.
* If it wasn't NUL, we know it was '/'. Skip that
* slash, and continue until no more slashes.
do {
name++;
} while (unlikely(*name == '/'));
In this part, though is notlikely to happen, we skip the slashes.
if (unlikely(!*name)) {
OK:
if (!depth) {
nd->dir_uid = i_uid_into_mnt(mnt_userns, nd->inode);
nd->dir_mode = nd->inode->i_mode;
nd->flags &= ~LOOKUP_PARENT;
return 0;
}
name = nd->stack[--depth].name;
link = walk_component(nd, 0);
} else {
link = walk_component(nd, WALK_MORE);
}
If name is finished, everything finally goes to OK part.



