[Linux 高并发服务器]文件IO

此博客是根据牛客的项目课写的，使用了pdf里的资料
各位可以去牛客官网学习一下

另外注意，本博客和牛客的教程以32位系统为例，如果记忆一些默认数组，请明确前提！

[Linux 高并发服务器]文件IO

[Linux 高并发服务器]文件IO
- 标准C库IO函数与Linux系统IO函数
- 标准C库函数
- - FILE结构体
- 补充知识
- - 虚拟地址空间
  - 文件描述符
- Linux系统IO函数
- - open
  - read & write
  - lseek
  - stat & lstat
- 文件属性操作函数
- - access
  - chmod
  - chown
  - truncate
- 目录操作函数
- - mkdir
  - rmdir
  - rename
  - chdir & getcwd
- 目录遍历函数
- - 统计目录下文件数量
  - struct dirent 结构体
- 文件描述符相关函数
- - dup
  - dup2
  - fcntl

标准C库IO函数与Linux系统IO函数

标准C库IO函数相较于Linux系统IO函数，具有跨平台的优势，它可以针对不同的系统调用相应的API来实现同样的操作。（JAVA通过虚拟机实现跨平台）
标准C库IO函数与Linux系统IO函数是调用与被调用的关系
标准C库IO函数比Linux系统IO函数效率高（缓冲区的功劳）
一般网络通信用Linux系统IO函数，对磁盘读写的时候用标准C库的IO函数

标准C库IO函数的核心在于缓冲区，如果直接用Linux系统内核的read和write函数，每次读写都要重新访问一次磁盘，访问磁盘需要花费很多时间，IO的缓冲区很大程度减少了对磁盘的访问次数，提高了read和write函数的使用效率

标准C库函数

FILE结构体

文件指针FILE结构体代码如下

struct _IO_FILE
{
  int _flags;		

  
  char *_IO_read_ptr;	
  char *_IO_read_end;	
  char *_IO_read_base;	
  char *_IO_write_base;	
  char *_IO_write_ptr;	
  char *_IO_write_end;	
  char *_IO_buf_base;	
  char *_IO_buf_end;	

  
  char *_IO_save_base; 
  char *_IO_backup_base;  
  char *_IO_save_end; 

  struct _IO_marker *_markers;

  struct _IO_FILE *_chain;

  int _fileno;//文件描述符
  int _flags2;
  __off_t _old_offset; 

  
  unsigned short _cur_column;
  signed char _vtable_offset;
  char _shortbuf[1];

  _IO_lock_t *_lock;
#ifdef _IO_USE_OLD_IO_FILE
};

补充知识虚拟地址空间

虚拟地址空间是不存在的，但是可以用来解释一些程序加载时的内存问题，以及程序当中堆和栈的一些概念
对于上面的情景，我们有4G内存和三个程序，三个程序分别需要内存1G,2G,2G。我们先把1G和2G的文件加载进去，第三个程序就加载不进来了。但是如果1G程序结束了，第三个文件能否加载呢？虚拟地址空间可以用来解释这些问题。

文件描述符

首先区分一下程序和进程
程序占用磁盘空间不占用内存空间，但是进程是占用。
程序运行起来的时候操作系统会为其分配资源，创建一个进程

Linux哲学：一切皆文件
那么如何定位到文件呢，文件描述符可以帮助我们
文件描述符位位于虚拟地址空间的内核区的PCB模块
文件描述符表就是一个数组，存储了很多文件描述符，这样进程可以同时打开多个文件。这个数组的大小默认为1024,所以最多同时打开文件个数为1024。（32位系统，4G虚拟地址空间情况下）

不同的文件描述符可以对应同一个文件

Linux系统IO函数

用的时候现查就完事了，发现自己的archlinux没有把man装完整就装了个最初是的man-db，又装了man-pages相关的。
不同page存储不同手册，下面从这个博客里找了一张截图

open

open有两个函数，但是这并不是函数重载。mode_t mode是一个可选参数
前者主要用于打开已有文件
后者主要用于创建一个新的文件，因为是创建新文件，所以我们还要设置各个用户组对这个文件的权限

如果发生错误，我们可以使用perro函数打印发生的错误

#include 
#include 
#include 
#include 
#include 

int main() {

    // 打开一个文件
    int fd = open("a.txt", O_RDONLY);

    if(fd == -1) {
        perror("open");
    }
    // 读写操作

    // 关闭
    close(fd);

    return 0;
}

对比前者，这个open多了个mode_t mode参数用来设置文件权限
我们采用3位8进制来表示不同用户/用户组对这个文件的权限

我们使用ll查看目录下方文件信息的时候，前面有一串字母，我们以截图中第一行为例
lrwxrwxrwx中第一个字母表示文件类型，后面每三个一组表示不同用户/用户组对该文件的权限。
依次为：当前用户，当前用户组，其他组
r读权限
w写权限
x执行权限
对于单个用户组，我们可以用三位二进制表示权限
例如：rwx对应二进制111,我们可以采用8进制表示，也就是7
那么对于三个用户组:rwxrwxrwx,对应的8进制数就是777

但是我们直接设置权限可能发生不合理的情况，因此再传入mode参数后，还需要进一步处理
最终的值为mode & ~umask
umask用于抹去一些不合理的权限设置，我们可以直接输入umask查看当前用户的umask

具体例子可以看下面的代码

另外对于flag参数，多了一个可选选项用于处理没有文件的情况

#include 
#include 
#include 
#include 
#include 

int main() {

    // 创建一个新的文件
    int fd = open("create.txt", O_RDWR | O_CREAT, 0777);

    if(fd == -1) {
        perror("open");
    }

    // 关闭
    close(fd);

    return 0;
}

通过上方代码，我们可以发现flag参数我们使用按位或设置多个权限，其原理如下。flag参数是一个32位整数，每一位为一个标记位置。
我们原来有一个读的标记，现在加入一个创建的标记，我们只需要按位或过去，那么最终权限的创建标记就变成1了

read & write

使用read和write函数实现文件拷贝

#include 
#include 
#include 
#include 
#include 

int main() {

    // 1.通过open打开english.txt文件
    int srcfd = open("english.txt", O_RDONLY);
    if(srcfd == -1) {
        perror("open");
        return -1;
    }

    // 2.创建一个新的文件（拷贝文件）
    int destfd = open("cpy.txt", O_WRonLY | O_CREAT, 0664);
    if(destfd == -1) {
        perror("open");
        return -1;
    }

    // 3.频繁的读写操作
    char buf[1024] = {0};
    int len = 0;
    while((len = read(srcfd, buf, sizeof(buf))) > 0) {
        write(destfd, buf, len);
    }

    // 4.关闭文件
    close(destfd);
    close(srcfd);


    return 0;
}

lseek

控制文件指针，实现众多功能

移动文件指针到文件头
lseek(fd, 0, SEEK_SET);
获取当前文件指针的位置
lseek(fd, 0, SEEK_CUR);
获取文件长度
lseek(fd, 0, SEEK_END);
拓展文件的长度，当前文件10b, 110b, 增加了100个字节
lseek(fd, 100, SEEK_END)
注意：需要写一次数据

下面的代码是以拓展文件的长度为例子。


#include 
#include 
#include 
#include 
#include 

int main() {

    int fd = open("hello.txt", O_RDWR);

    if(fd == -1) {
        perror("open");
        return -1;
    }

    // 扩展文件的长度
    int ret = lseek(fd, 100, SEEK_END);
    if(ret == -1) {
        perror("lseek");
        return -1;
    }

    // 写入一个空数据
    write(fd, " ", 1);

    // 关闭文件
    close(fd);

    return 0;
}

那么扩展文件长度有什么用呢
假设我们需要下载5G的资料，但是同时我们还要使用磁盘，这有可能造成下到一半磁盘不够了。这时候我们可以lseek实现扩展出5G的文件，然后往扩展出来的文件当中写入数据

stat & lstat

这两个函数用于返回文件的一些相关信息。实际上Linux自己也有stat命令，案例如下。


#include 
#include 
#include 
#include 

int main() {

    struct stat statbuf;

    int ret = stat("a.txt", &statbuf);

    if(ret == -1) {
        perror("stat");
        return -1;
    }

    printf("size: %ldn", statbuf.st_size);


    return 0;
}

查看state结构体代码（建议直接看后面的图片）


#if !defined _SYS_STAT_H && !defined _FCNTL_H
# error "Never include  directly; use  instead."
#endif

#ifndef _BITS_STRUCT_STAT_H
#define _BITS_STRUCT_STAT_H	1

struct stat
  {
    __dev_t st_dev;		
#ifndef __x86_64__
    unsigned short int __pad1;
#endif
#if defined __x86_64__ || !defined __USE_FILE_OFFSET64
    __ino_t st_ino;		
#else
    __ino_t __st_ino;			
#endif
#ifndef __x86_64__
    __mode_t st_mode;			
    __nlink_t st_nlink;			
#else
    __nlink_t st_nlink;		
    __mode_t st_mode;		
#endif
    __uid_t st_uid;		
    __gid_t st_gid;		
#ifdef __x86_64__
    int __pad0;
#endif
    __dev_t st_rdev;		
#ifndef __x86_64__
    unsigned short int __pad2;
#endif
#if defined __x86_64__ || !defined __USE_FILE_OFFSET64
    __off_t st_size;			
#else
    __off64_t st_size;			
#endif
    __blksize_t st_blksize;	
#if defined __x86_64__  || !defined __USE_FILE_OFFSET64
    __blkcnt_t st_blocks;		
#else
    __blkcnt64_t st_blocks;		
#endif
#ifdef __USE_XOPEN2K8
    
    struct timespec st_atim;		
    struct timespec st_mtim;		
    struct timespec st_ctim;		
# define st_atime st_atim.tv_sec	
# define st_mtime st_mtim.tv_sec
# define st_ctime st_ctim.tv_sec
#else
    __time_t st_atime;			
    __syscall_ulong_t st_atimensec;	
    __time_t st_mtime;			
    __syscall_ulong_t st_mtimensec;	
    __time_t st_ctime;			
    __syscall_ulong_t st_ctimensec;	
#endif
#ifdef __x86_64__
    __syscall_slong_t __glibc_reserved[3];
#else
# ifndef __USE_FILE_OFFSET64
    unsigned long int __glibc_reserved4;
    unsigned long int __glibc_reserved5;
# else
    __ino64_t st_ino;			
# endif
#endif
  };

#ifdef __USE_LARGEFILE64

struct stat64
  {
    __dev_t st_dev;		
# ifdef __x86_64__
    __ino64_t st_ino;		
    __nlink_t st_nlink;		
    __mode_t st_mode;		
# else
    unsigned int __pad1;
    __ino_t __st_ino;			
    __mode_t st_mode;			
    __nlink_t st_nlink;			
# endif
    __uid_t st_uid;		
    __gid_t st_gid;		
# ifdef __x86_64__
    int __pad0;
    __dev_t st_rdev;		
    __off_t st_size;		
# else
    __dev_t st_rdev;			
    unsigned int __pad2;
    __off64_t st_size;			
# endif
    __blksize_t st_blksize;	
    __blkcnt64_t st_blocks;	
# ifdef __USE_XOPEN2K8
    
    struct timespec st_atim;		
    struct timespec st_mtim;		
    struct timespec st_ctim;		
# else
    __time_t st_atime;			
    __syscall_ulong_t st_atimensec;	
    __time_t st_mtime;			
    __syscall_ulong_t st_mtimensec;	
    __time_t st_ctime;			
    __syscall_ulong_t st_ctimensec;	
# endif
# ifdef __x86_64__
    __syscall_slong_t __glibc_reserved[3];
# else
    __ino64_t st_ino;			
# endif
  };
#endif


#define	_STATBUF_ST_BLKSIZE
#define _STATBUF_ST_RDEV

#define _STATBUF_ST_NSEC

#endif

其中st_mode的信息存储方式如下
如果要判断某个权限，我们使用按位与的操作看01即可
如果要判断某个文件类型，我们需要先和掩码按位与

(st_mode & S_IFMT)==S_IFREG

stat和lstat的区别是，如果b.txt链接到a.txt那么stat获取的是a.txt的信息，而lstat获取的是b.txt的信息，也就是链接的信息

在这个博客中使用stat模拟实现了linux的ls -l命令，感兴趣可以看看

文件属性操作函数 access


#include 
#include 

int main() {

    int ret = access("a.txt", F_OK);
    if(ret == -1) {
        perror("access");
    }

    printf("文件存在！！!n");

    return 0;
}

chmod

#include 
#include 
int main() {

    int ret = chmod("a.txt", 0777);

    if(ret == -1) {
        perror("chmod");
        return -1;
    }

    return 0;
}

chown

使用下面两个命令可以查询用户和组的id

vim /etc/passwd
vim /etc/group

chown也就是change owner改变拥有者和用户组

int chown(const char *pathname, uid_t owner, gid_t group);

truncate


#include 
#include 
#include 

int main() {

    int ret = truncate("b.txt", 5);

    if(ret == -1) {
        perror("truncate");
        return -1;
    }

    return 0;
}

目录操作函数 mkdir


#include 
#include 
#include 

int main() {

    int ret = mkdir("aaa", 0777);//777前面要加0才能表示8进制不然会有问题

    if(ret == -1) {
        perror("mkdir");
        return -1;
    }

    return 0;
}

权限0777,不要漏掉0,不然默认10进制
一个目录一定要有可执行权限才能进入目录
最终权限为mode & ~umask

rmdir

删除空目录

int rmdir(const char *pathname);

rename

更改目录名

#include 

int main() {

    int ret = rename("aaa", "bbb");

    if(ret == -1) {
        perror("rename");
        return -1;
    }

    return 0;
}

chdir & getcwd

chdir修改进程的工作目录，类似shell当中的cd
getcwd:类似shell中的pwd，获取当前的工作目录

#include 
#include 
#include 
#include 
#include 

int main() {

    // 获取当前的工作目录
    char buf[128];
    getcwd(buf, sizeof(buf));
    printf("当前的工作目录是：%sn", buf);

    // 修改工作目录
    int ret = chdir("/home/nowcoder/Linux/lesson13");
    if(ret == -1) {
        perror("chdir");
        return -1;
    } 

    // 创建一个新的文件
    int fd = open("chdir.txt", O_CREAT | O_RDWR, 0664);
    if(fd == -1) {
        perror("open");
        return -1;
    }

    close(fd);

    // 获取当前的工作目录
    char buf1[128];
    getcwd(buf1, sizeof(buf1));
    printf("当前的工作目录是：%sn", buf1);
    
    return 0;
}

目录遍历函数

万物皆文件，目录也可以看作是一个文件
对于一个目录我们也有打开，读取，关闭的相关函数，他们分别为

统计目录下文件数量

首先，我们需要判断输入参数是否合法，如果合法进入功能函数。
因为一个目录下还有目录，所以我们需要有个递归。
然后我们要一个个统计文件，所以我们函数里要有循环，那么这个函数的基本结构就是下面这样的

首先打开目录

进入循环，循环出去条件为读取到末尾
循环内部一个判断分支
如果是目录，就递归这个函数
如果是文件，计数器加1

最后关闭目录

返回计数器

#include 
#include 
#include 
#include 
#include 

int getFileNum(const char * path);

// 读取某个目录下所有的普通文件的个数
int main(int argc, char * argv[]) {

    if(argc < 2) {
        printf("%s pathn", argv[0]);
        return -1;
    }

    int num = getFileNum(argv[1]);

    printf("普通文件的个数为：%dn", num);

    return 0;
}

// 用于获取目录下所有普通文件的个数
int getFileNum(const char * path) {

    // 1.打开目录
    DIR * dir = opendir(path);

    if(dir == NULL) {
        perror("opendir");
        exit(0);
    }

    struct dirent *ptr;

    // 记录普通文件的个数
    int total = 0;

    while((ptr = readdir(dir)) != NULL) {

        // 获取名称
        char * dname = ptr->d_name;

        // 忽略掉. 和..
        if(strcmp(dname, ".") == 0 || strcmp(dname, "..") == 0) {
            continue;
        }

        // 判断是否是普通文件还是目录
        if(ptr->d_type == DT_DIR) {
            // 目录,需要继续读取这个目录
            char newpath[256];
            sprintf(newpath, "%s/%s", path, dname);
            total += getFileNum(newpath);
        }

        if(ptr->d_type == DT_REG) {
            // 普通文件
            total++;
        }


    }

    // 关闭目录
    closedir(dir);

    return total;
}

struct dirent 结构体

文件描述符相关函数 dup

复制文件描述符


#include 
#include 
#include 
#include 
#include 
#include 

int main() {

    int fd = open("a.txt", O_RDWR | O_CREAT, 0664);

    int fd1 = dup(fd);

    if(fd1 == -1) {
        perror("dup");
        return -1;
    }

    printf("fd : %d , fd1 : %dn", fd, fd1);

    close(fd);

    char * str = "hello,world";
    int ret = write(fd1, str, strlen(str));
    if(ret == -1) {
        perror("write");
        return -1;
    }

    close(fd1);

    return 0;
}

dup2

重定向文件描述符
两个文件a,b,两个文件描述符fd,fd1
原来fd指向a,fd1指向b
使用dup2之后可以吧fd1指向a,以后fd1读写操作就是对a文件的操作

#include 
#include 
#include 
#include 
#include 
#include 

int main() {

    int fd = open("1.txt", O_RDWR | O_CREAT, 0664);
    if(fd == -1) {
        perror("open");
        return -1;
    }

    int fd1 = open("2.txt", O_RDWR | O_CREAT, 0664);
    if(fd1 == -1) {
        perror("open");
        return -1;
    }

    printf("fd : %d, fd1 : %dn", fd, fd1);

    int fd2 = dup2(fd, fd1);
    if(fd2 == -1) {
        perror("dup2");
        return -1;
    }

    // 通过fd1去写数据，实际操作的是1.txt，而不是2.txt
    char * str = "hello, dup2";
    int len = write(fd1, str, strlen(str));

    if(len == -1) {
        perror("write");
        return -1;
    }

    printf("fd : %d, fd1 : %d, fd2 : %dn", fd, fd1, fd2);

    close(fd);
    close(fd1);

    return 0;
}

fcntl

复制文件描述符
设置/获取文件的状态


#include 
#include 
#include 
#include 

int main() {

    // 1.复制文件描述符
    // int fd = open("1.txt", O_RDONLY);
    // int ret = fcntl(fd, F_DUPFD);

    // 2.修改或者获取文件状态flag
    int fd = open("1.txt", O_RDWR);
    if(fd == -1) {
        perror("open");
        return -1;
    }

    // 获取文件描述符状态flag
    int flag = fcntl(fd, F_GETFL);
    if(flag == -1) {
        perror("fcntl");
        return -1;
    }
    flag |= O_APPEND;   // flag = flag | O_APPEND

    // 修改文件描述符状态的flag，给flag加入O_APPEND这个标记
    int ret = fcntl(fd, F_SETFL, flag);
    if(ret == -1) {
        perror("fcntl");
        return -1;
    }

    char * str = "nihao";
    write(fd, str, strlen(str));

    close(fd);

    return 0;
}

阻塞和非阻塞：描述的是函数调用的行为。

阻塞：阻塞调用是指调用结果返回之前，当前线程会被挂起。函数只有在得到结果之后才会返回干不完不准回来

非阻塞：非阻塞和阻塞的概念相对应，指在不能立刻得到结果之前，该函数不会阻塞当前线程，而会立刻返回。

[Linux 高并发服务器]文件IO

Linux相关栏目本月热门文章