新年快乐。欢迎来到本书的最后一个实验。
《深入理解计算机系统》实验八Proxy Lab下载和官方文档机翻请看:
https://blog.csdn.net/weixin_43362650/article/details/122770330
我觉得这个文档对整个实验很有帮助。
实验任务编写一个简单的HTTP代理服务器
代理服务器英文全称是Proxy Server,其功能就是代理网络用户去取得网络信息。形象的说:它是网络信息的中转站。在一般情况下,我们使用网络浏览器直接去连接其他Internet站点取得网络信息时,须送出Request信号来得到回答,然后对方再把信息以bit方式传送回来。代理服务器是介于浏览器和Web服务器之间的一台服务器,有了它之后,浏览器不是直接到Web服务器去取回网页而是向代理服务器发出请求,Request信号会先送到代理服务器,由代理服务器来取回浏览器所需要的信息并传送给你的浏览器。–百度百科
原本客户端与服务器的关系。
加上代理后就是
代理服务器要做的就是接收客户端发送的请求,经过自己的处理后发送请求到服务端,服务端响应的数据到代理服务器后转发回给客户端。
本实验的代理服务器分为3个阶段
- 第一部分:实现顺序web代理第二部分:处理多个并发请求第三部分:缓存web对象
代码如下(详情看注释)
#include#include "csapp.h" #define MAX_CACHE_SIZE 1049000 #define MAX_OBJECT_SIZE 102400 static const char *user_agent_hdr = "User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:10.0.3) Gecko/20120305 Firefox/10.0.3rn"; void doit(int clientfd); int parse_uri(char *uri,char *hostname,char *path,char *port,char *request_head); void read_requesthdrs(rio_t *rp,int fd); void return_content(int serverfd, int clientfd); int main(int argc,char **argv) { int listenfd,connfd; char hostname[MAXLINE],port[MAXLINE]; socklen_t clientlen; struct sockaddr_storage clientaddr; if(argc != 2){ fprintf(stderr, "usage: %s n", argv[0]); exit(1); } listenfd = Open_listenfd(argv[1]); while(1){ clientlen = sizeof(clientaddr); connfd = Accept(listenfd,(SA *)&clientaddr,&clientlen); Getnameinfo((SA *)&clientaddr,clientlen,hostname,MAXLINE,port,MAXLINE,0); printf("Accepted connection from (%s, %s)n",hostname,port); doit(connfd); Close(connfd); } } void doit(int clientfd){ char buf[MAXLINE],method[MAXLINE],uri[MAXLINE],version[MAXLINE]; char hostname[MAXLINE],path[MAXLINE],port[MAXLINE],request_head[MAXLINE]; int serverfd; rio_t rio; Rio_readinitb(&rio,clientfd); Rio_readlineb(&rio,buf,MAXLINE); sscanf(buf,"%s %s %s",method,uri,version); if(strcasecmp(method,"GET")){ printf("Not implemented"); return; } parse_uri(uri,hostname,path,port,request_head); serverfd = Open_clientfd(hostname,port); Rio_writen(serverfd,request_head,strlen(request_head)); read_requesthdrs(&rio,serverfd); return_content(serverfd,clientfd); } int parse_uri(char *uri,char *hostname,char *path,char *port,char *request_head){ sprintf(port,"80"); //默认值 char *end,*bp; char *tail = uri+strlen(uri); //uri的最后一个字符,不是' '。 char *bg = strstr(uri,"//"); bg = (bg!=NULL ? bg+2 : uri); //取hostname的开头。 end = bg; //取hostname的结尾。 while(*end != '/' && *end != ':') end++; strncpy(hostname,bg,end-bg); bp = end + 1; //取port的开头 if(*end == ':'){ //==':'说明uri中有port end++; bp = strstr(bg,"/"); //取port的结尾 strncpy(port,end,bp-end); end = bp; //取uri的开头 } strncpy(path,end,(int)(tail-end)+1); sprintf(request_head,"GET %s HTTP/1.0rnHost: %srn",path,hostname); return 1; } void read_requesthdrs(rio_t *rp,int fd){ char buf[MAXLINE]; sprintf(buf, "%s", user_agent_hdr); Rio_writen(fd, buf, strlen(buf)); sprintf(buf, "Connection: closern"); Rio_writen(fd, buf, strlen(buf)); sprintf(buf, "Proxy-Connection: closern"); Rio_writen(fd, buf, strlen(buf)); for(Rio_readlineb(rp,buf,MAXLINE);strcmp(buf,"rn");Rio_readlineb(rp,buf,MAXLINE)){ if(strncmp("Host",buf,4) == 0 || strncmp("User-Agent",buf,10) == 0 || strncmp("Connection",buf,10) == 0 || strncmp("Proxy-Connection",buf,16) == 0) continue; printf("%s",buf); Rio_writen(fd,buf,strlen(buf)); } Rio_writen(fd,buf,strlen(buf)); return; } void return_content(int serverfd, int clientfd){ size_t n; char buf[MAXLINE]; rio_t srio; Rio_readinitb(&srio,serverfd); while((n = Rio_readlineb(&srio,buf,MAXLINE)) != 0){ Rio_writen(clientfd,buf,n); } }
编译
可能有些环境没安装好,我第一次就是没有“curl”。
linux> apt-get install curl
运行
可以看到第一部分已经完成。
参考《CS:APP3e》12.3.8 基于线程的并发服务器修改第一部分的代码(修改了main函数和添加了thread函数)
#include#include "csapp.h" #define MAX_CACHE_SIZE 1049000 #define MAX_OBJECT_SIZE 102400 static const char *user_agent_hdr = "User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:10.0.3) Gecko/20120305 Firefox/10.0.3rn"; void doit(int clientfd); int parse_uri(char *uri,char *hostname,char *path,char *port,char *request_head); void read_requesthdrs(rio_t *rp,int fd); void return_content(int serverfd, int clientfd); void *thread(void *vargp); int main(int argc,char **argv) { int listenfd; int *connfd; char hostname[MAXLINE],port[MAXLINE]; socklen_t clientlen; struct sockaddr_storage clientaddr; pthread_t tid; if(argc != 2){ fprintf(stderr, "usage: %s n", argv[0]); exit(1); } listenfd = Open_listenfd(argv[1]); while(1){ clientlen = sizeof(clientaddr); connfd = Malloc(sizeof(int)); *connfd = Accept(listenfd,(SA *)&clientaddr,&clientlen); Getnameinfo((SA *)&clientaddr,clientlen,hostname,MAXLINE,port,MAXLINE,0); printf("Accepted connection from (%s, %s)n",hostname,port); Pthread_create(&tid,NULL,thread,connfd); } } void *thread(void *vargp){ int connfd = *((int *)vargp); Pthread_detach(pthread_self()); Free(vargp); doit(connfd); Close(connfd); return NULL; } void doit(int clientfd){ char buf[MAXLINE],method[MAXLINE],uri[MAXLINE],version[MAXLINE]; char hostname[MAXLINE],path[MAXLINE],port[MAXLINE],request_head[MAXLINE]; int serverfd; rio_t rio; Rio_readinitb(&rio,clientfd); Rio_readlineb(&rio,buf,MAXLINE); sscanf(buf,"%s %s %s",method,uri,version); if(strcasecmp(method,"GET")){ printf("Not implemented"); return; } parse_uri(uri,hostname,path,port,request_head); serverfd = Open_clientfd(hostname,port); Rio_writen(serverfd,request_head,strlen(request_head)); read_requesthdrs(&rio,serverfd); return_content(serverfd,clientfd); } int parse_uri(char *uri,char *hostname,char *path,char *port,char *request_head){ sprintf(port,"80"); //默认值 char *end,*bp; char *tail = uri+strlen(uri); //uri的最后一个字符,不是' '。 char *bg = strstr(uri,"//"); bg = (bg!=NULL ? bg+2 : uri); //取hostname的开头。 end = bg; //取hostname的结尾。 while(*end != '/' && *end != ':') end++; strncpy(hostname,bg,end-bg); bp = end + 1; //取port的开头 if(*end == ':'){ //==':'说明uri中有port end++; bp = strstr(bg,"/"); //取port的结尾 strncpy(port,end,bp-end); end = bp; //取uri的开头 } strncpy(path,end,(int)(tail-end)+1); sprintf(request_head,"GET %s HTTP/1.0rnHost: %srn",path,hostname); return 1; } void read_requesthdrs(rio_t *rp,int fd){ char buf[MAXLINE]; sprintf(buf, "%s", user_agent_hdr); Rio_writen(fd, buf, strlen(buf)); sprintf(buf, "Connection: closern"); Rio_writen(fd, buf, strlen(buf)); sprintf(buf, "Proxy-Connection: closern"); Rio_writen(fd, buf, strlen(buf)); for(Rio_readlineb(rp,buf,MAXLINE);strcmp(buf,"rn");Rio_readlineb(rp,buf,MAXLINE)){ if(strncmp("Host",buf,4) == 0 || strncmp("User-Agent",buf,10) == 0 || strncmp("Connection",buf,10) == 0 || strncmp("Proxy-Connection",buf,16) == 0) continue; printf("%s",buf); Rio_writen(fd,buf,strlen(buf)); } Rio_writen(fd,buf,strlen(buf)); return; } void return_content(int serverfd, int clientfd){ size_t n; char buf[MAXLINE]; rio_t srio; Rio_readinitb(&srio,serverfd); while((n = Rio_readlineb(&srio,buf,MAXLINE)) != 0){ Rio_writen(clientfd,buf,n); } }
运行
可以看到第二部分已经完成
采用读者优先,代码如下
#include#include "csapp.h" #define MAX_CACHE_SIZE 1049000 #define MAX_OBJECT_SIZE 102400 #define MAX_CACHE 10 static const char *user_agent_hdr = "User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:10.0.3) Gecko/20120305 Firefox/10.0.3rn"; void doit(int clientfd); int parse_uri(char *uri,char *hostname,char *path,char *port,char *request_head); void read_requesthdrs(rio_t *rp,int fd); void return_content(int serverfd, int clientfd,char *url); void *thread(void *vargp); int maxlrucache(); struct RWLOCK_T{ sem_t lock; //基本锁 sem_t writeLock; //写着锁 int readcnt; //读者个数 }; struct CACHE{ int lruNumber; //引用次数,根据大小排位。大的表示最近引用过 char url[MAXLINE]; //通过url唯一标识对应content char content[MAX_OBJECT_SIZE]; }; struct CACHE cache[MAX_CACHE]; //缓存,最多有MAX_CACHE个 struct RWLOCK_T* rw; //读写者锁指针 void rwlock_init(); //初始化读写者锁指针 char *readcache(char *url); //读缓存 void writecache(char *buf,char *url); //写缓存 int main(int argc,char **argv) { int listenfd; int *connfd; char hostname[MAXLINE],port[MAXLINE]; socklen_t clientlen; struct sockaddr_storage clientaddr; pthread_t tid; if(argc != 2){ fprintf(stderr, "usage: %s n", argv[0]); exit(1); } rw = Malloc(sizeof(struct RWLOCK_T)); rwlock_init(); listenfd = Open_listenfd(argv[1]); while(1){ clientlen = sizeof(clientaddr); connfd = Malloc(sizeof(int)); *connfd = Accept(listenfd,(SA *)&clientaddr,&clientlen); Getnameinfo((SA *)&clientaddr,clientlen,hostname,MAXLINE,port,MAXLINE,0); printf("Accepted connection from (%s, %s)n",hostname,port); Pthread_create(&tid,NULL,thread,connfd); } } void *thread(void *vargp){ int connfd = *((int *)vargp); Pthread_detach(pthread_self()); Free(vargp); doit(connfd); Close(connfd); return NULL; } void doit(int clientfd){ char buf[MAXLINE],method[MAXLINE],uri[MAXLINE],version[MAXLINE]; char hostname[MAXLINE],path[MAXLINE],port[MAXLINE],request_head[MAXLINE]; int serverfd; rio_t rio; Rio_readinitb(&rio,clientfd); Rio_readlineb(&rio,buf,MAXLINE); sscanf(buf,"%s %s %s",method,uri,version); if(strcasecmp(method,"GET")){ printf("Not implemented"); return; } char *content = readcache(uri); if(content != NULL){ Rio_writen(clientfd,content,strlen(content)); free(content); }else{ parse_uri(uri,hostname,path,port,request_head); serverfd = Open_clientfd(hostname,port); Rio_writen(serverfd,request_head,strlen(request_head)); read_requesthdrs(&rio,serverfd); return_content(serverfd,clientfd,uri); } } int parse_uri(char *uri,char *hostname,char *path,char *port,char *request_head){ sprintf(port,"80"); //默认值 char *end,*bp; char *tail = uri+strlen(uri); //uri的最后一个字符,不是' '。 char *bg = strstr(uri,"//"); bg = (bg!=NULL ? bg+2 : uri); //取hostname的开头。 end = bg; //取hostname的结尾。 while(*end != '/' && *end != ':') end++; strncpy(hostname,bg,end-bg); bp = end + 1; //取port的开头 if(*end == ':'){ //==':'说明uri中有port end++; bp = strstr(bg,"/"); //取port的结尾 strncpy(port,end,bp-end); end = bp; //取uri的开头 } strncpy(path,end,(int)(tail-end)+1); sprintf(request_head,"GET %s HTTP/1.0rnHost: %srn",path,hostname); return 1; } void read_requesthdrs(rio_t *rp,int fd){ char buf[MAXLINE]; sprintf(buf, "%s", user_agent_hdr); Rio_writen(fd, buf, strlen(buf)); sprintf(buf, "Connection: closern"); Rio_writen(fd, buf, strlen(buf)); sprintf(buf, "Proxy-Connection: closern"); Rio_writen(fd, buf, strlen(buf)); for(Rio_readlineb(rp,buf,MAXLINE);strcmp(buf,"rn");Rio_readlineb(rp,buf,MAXLINE)){ if(strncmp("Host",buf,4) == 0 || strncmp("User-Agent",buf,10) == 0 || strncmp("Connection",buf,10) == 0 || strncmp("Proxy-Connection",buf,16) == 0) continue; printf("%s",buf); Rio_writen(fd,buf,strlen(buf)); } Rio_writen(fd,buf,strlen(buf)); return; } void return_content(int serverfd, int clientfd,char *uri){ size_t n,size = 0; char buf[MAXLINE],content[MAX_OBJECT_SIZE]; rio_t srio; Rio_readinitb(&srio,serverfd); while((n = Rio_readlineb(&srio,buf,MAXLINE)) != 0){ Rio_writen(clientfd,buf,n); if(n + size <= MAX_OBJECT_SIZE){ sprintf(content + size,"%s",buf); size += n; }else{ size = MAX_OBJECT_SIZE + 1; } } writecache(content,uri); } void rwlock_init(){ rw->readcnt = 0; sem_init(&rw->lock,0,1); sem_init(&rw->writeLock,0,1); } void writecache(char *buf,char *url){ sem_wait(&rw->writeLock); //等待获得写者锁 int index; for(index = 0;index < MAX_CACHE;index++){ if(cache[index].lruNumber == 0){ break; } } if(index == MAX_CACHE){ int minlru = cache[0].lruNumber; for(int i = 1;i < MAX_CACHE;i++){ if(cache[i].lruNumber < minlru){ minlru = cache[i].lruNumber; index = i; } } } cache[index].lruNumber = maxlrucache()+1; strcpy(cache[index].url,url); strcpy(cache[index].content,buf); sem_post(&rw->writeLock); //释放锁 return; } char *readcache(char *url){ sem_wait(&rw->lock); //读者等待并获取锁 if(rw->readcnt == 1) sem_wait(&rw->writeLock); //读者在读,不允许有写者 rw->readcnt++; sem_post(&rw->lock); //释放锁 char *content = NULL; for(int i = 0;i < MAX_CACHE;i++){ if(strcmp(url,cache[i].url) == 0){ content = (char *)Malloc(strlen(cache[i].content)); strcpy(content,cache[i].content); int maxlru = maxlrucache(); //获取最大的lru cache[i].lruNumber = maxlru+1; //+1成最大的lru break; } } sem_wait(&rw->lock); //等待并获取锁 rw->readcnt--; if(rw->readcnt == 0) //没有读者了,释放写者锁 sem_post(&rw->writeLock); sem_post(&rw->lock); //释放锁 return content; } int maxlrucache(){ int i; int max=0; for(i = 0;i max){ max = cache[i].lruNumber; } } return max; }
运行结果
可以看到第三部分已经完成
本实验的检测分数不是太严格,实现基本功能就可以满分了。比如我把锁部分的代码删除一样也是满分,所有上面的代码不一定是严格正确的。



