- 一.SpringBoot+Netty实现简单聊天室
- 1.项目创建
- 1.WebChatApplication
- 2.SessionGroup
- 3.SocketSession
- 4.User
- 5.WebSocketServer
- 6.WebSocketTextHandler
- 7.index.html
- 2.结果
- 二.动态网页的信息爬取
- 1.自动填充百度网页的查询关键字,完成自动搜索
- 2.爬取动态网页数据
- 3.requests+Selenum爬取京东图书
- 三.总结
- 四.参考文献
package com.example.webchat;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.context.ConfigurableApplicationContext;
import org.springframework.core.env.Environment;
import java.net.InetAddress;
import java.net.UnknownHostException;
@SpringBootApplication
public class WebChatApplication {
public static void main(String[] args) throws UnknownHostException {
ConfigurableApplicationContext application = SpringApplication.run(WebChatApplication.class, args);
Environment env = application.getEnvironment();
String host = InetAddress.getLocalHost().getHostAddress();
String port = env.getProperty("server.port");
System.out.println("[----------------------------------------------------------]");
System.out.println("聊天室启动成功!点击进入:t http://" + host + ":" + port);
System.out.println("[----------------------------------------------------------");
WebSocketServer.inst().run(53134);
}
}
2.SessionGroup
package com.example.webchat;
import com.google.gson.Gson;
import io.netty.channel.ChannelFuture;
import io.netty.channel.ChannelFutureListener;
import io.netty.channel.ChannelHandlerContext;
import io.netty.channel.group.ChannelGroup;
import io.netty.channel.group.ChannelGroupFuture;
import io.netty.channel.group.DefaultChannelGroup;
import io.netty.handler.codec.http.websocketx.TextWebSocketframe;
import io.netty.util.concurrent.ImmediateEventExecutor;
import org.springframework.util.StringUtils;
import java.util.Iterator;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
public final class SessionGroup {
private static SessionGroup singleInstance = new SessionGroup();
// 组的映射
private ConcurrentHashMap groupMap = new ConcurrentHashMap<>();
public static SessionGroup inst() {
return singleInstance;
}
public void shutdownGracefully() {
Iterator groupIterator = groupMap.values().iterator();
while (groupIterator.hasNext()) {
ChannelGroup group = groupIterator.next();
group.close();
}
}
public void sendToOthers(Map result, SocketSession s) {
// 获取组
ChannelGroup group = groupMap.get(s.getGroup());
if (null == group) {
return;
}
Gson gson=new Gson();
String json = gson.toJson(result);
// 自己发送的消息不返回给自己
// Channel channel = s.getChannel();
// 从组中移除通道
// group.remove(channel);
ChannelGroupFuture future = group.writeAndFlush(new TextWebSocketframe(json));
future.addListener(f -> {
System.out.println("完成发送:"+json);
// group.add(channel);//发送消息完毕重新添加。
});
}
public void addSession(SocketSession session) {
String groupName = session.getGroup();
if (StringUtils.isEmpty(groupName)) {
// 组为空,直接返回
return;
}
ChannelGroup group = groupMap.get(groupName);
if (null == group) {
group = new DefaultChannelGroup(ImmediateEventExecutor.INSTANCE);
groupMap.put(groupName, group);
}
group.add(session.getChannel());
}
public void closeSession(SocketSession session, String echo) {
ChannelFuture sendFuture = session.getChannel().writeAndFlush(new TextWebSocketframe(echo));
sendFuture.addListener(new ChannelFutureListener() {
public void operationComplete(ChannelFuture future) {
System.out.println("关闭连接:"+echo);
future.channel().close();
}
});
}
public void closeSession(SocketSession session) {
ChannelFuture sendFuture = session.getChannel().close();
sendFuture.addListener(new ChannelFutureListener() {
public void operationComplete(ChannelFuture future) {
System.out.println("发送所有完成:"+session.getUser().getNickname());
}
});
}
public void sendMsg(ChannelHandlerContext ctx, String msg) {
ChannelFuture sendFuture = ctx.writeAndFlush(new TextWebSocketframe(msg));
sendFuture.addListener(f -> {//发送监听
System.out.println("对所有发送完成:"+msg);
});
}
}
3.SocketSession
package com.example.webchat;
import io.netty.channel.Channel;
import io.netty.channel.ChannelHandlerContext;
import io.netty.util.AttributeKey;
import java.util.HashMap;
import java.util.Map;
import java.util.UUID;
public class SocketSession {
public static final AttributeKey SESSION_KEY = AttributeKey.valueOf("SESSION_KEY");
// 通道
private Channel channel;
// 用户
private User user;
// session唯一标示
private final String sessionId;
private String group;
private Map map = new HashMap();
public SocketSession(Channel channel) {//注意传入参数channel。不同客户端会有不同channel
this.channel = channel;
this.sessionId = buildNewSessionId();
channel.attr(SocketSession.SESSION_KEY).set(this);
}
// 反向导航
public static SocketSession getSession(ChannelHandlerContext ctx) {//注意ctx,不同的客户端会有不同ctx
Channel channel = ctx.channel();
return channel.attr(SocketSession.SESSION_KEY).get();
}
// 反向导航
public static SocketSession getSession(Channel channel) {
return channel.attr(SocketSession.SESSION_KEY).get();
}
public String getId() {
return sessionId;
}
private static String buildNewSessionId() {
String uuid = UUID.randomUUID().toString();
return uuid.replaceAll("-", "");
}
public synchronized void set(String key, Object value) {
map.put(key, value);
}
public synchronized T get(String key) {
return (T) map.get(key);
}
public boolean isValid() {
return getUser() != null ? true : false;
}
public User getUser() {
return user;
}
public void setUser(User user) {
this.user = user;
}
public String getGroup() {
return group;
}
public void setGroup(String group) {
this.group = group;
}
public Channel getChannel() {
return channel;
}
}
4.User
package com.example.webchat;
import java.util.Objects;
public class User {
public String id;
public String nickname;
public User(String id, String nickname) {
super();
this.id = id;
this.nickname = nickname;
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getNickname() {
return nickname;
}
public void setNickname(String nickname) {
this.nickname = nickname;
}
@Override
public boolean equals(Object o) {
if (this == o)
return true;
if (o == null || getClass() != o.getClass())
return false;
User user = (User) o;
return id.equals(user.getId());
}
@Override
public int hashCode() {
return Objects.hash(id);
}
public String getUid() {
return id;
}
}
5.WebSocketServer
package com.example.webchat;
import io.netty.bootstrap.ServerBootstrap;
import io.netty.channel.Channel;
import io.netty.channel.ChannelInitializer;
import io.netty.channel.ChannelPipeline;
import io.netty.channel.EventLoopGroup;
import io.netty.channel.nio.NioEventLoopGroup;
import io.netty.channel.socket.SocketChannel;
import io.netty.channel.socket.nio.NioServerSocketChannel;
import io.netty.handler.codec.http.HttpObjectAggregator;
import io.netty.handler.codec.http.HttpServerCodec;
import io.netty.handler.codec.http.websocketx.WebSocketServerProtocolHandler;
import io.netty.handler.codec.http.websocketx.extensions.compression.WebSocketServerCompressionHandler;
import io.netty.handler.stream.ChunkedWriteHandler;
import io.netty.handler.timeout.IdleStateHandler;
import java.util.concurrent.TimeUnit;
public class WebSocketServer {
private static WebSocketServer wbss;
private static final int READ_IDLE_TIME_OUT = 60; // 读超时
private static final int WRITE_IDLE_TIME_OUT = 0;// 写超时
private static final int ALL_IDLE_TIME_OUT = 0; // 所有超时
public static WebSocketServer inst() {
return wbss = new WebSocketServer();
}
public void run(int port) {
EventLoopGroup bossGroup = new NioEventLoopGroup();
EventLoopGroup workerGroup = new NioEventLoopGroup();
ServerBootstrap b = new ServerBootstrap();
b.group(bossGroup, workerGroup).channel(NioServerSocketChannel.class)
.childHandler(new ChannelInitializer () {
@Override
protected void initChannel(SocketChannel ch) throws Exception {
ChannelPipeline pipeline = ch.pipeline();
// Netty自己的http解码器和编码器,报文级别 HTTP请求的解码和编码
pipeline.addLast(new HttpServerCodec());
// ChunkedWriteHandler 是用于大数据的分区传输
// 主要用于处理大数据流,比如一个1G大小的文件如果你直接传输肯定会撑暴jvm内存的;
// 增加之后就不用考虑这个问题了
pipeline.addLast(new ChunkedWriteHandler());
// HttpObjectAggregator 是完全的解析Http消息体请求用的
// 把多个消息转换为一个单一的完全FullHttpRequest或是FullHttpResponse,
// 原因是HTTP解码器会在每个HTTP消息中生成多个消息对象HttpRequest/HttpResponse,HttpContent,LastHttpContent
pipeline.addLast(new HttpObjectAggregator(64 * 1024));
// WebSocket数据压缩
pipeline.addLast(new WebSocketServerCompressionHandler());
// WebSocketServerProtocolHandler是配置websocket的监听地址/协议包长度限制
pipeline.addLast(new WebSocketServerProtocolHandler("/ws", null, true, 10 * 1024));
// 当连接在60秒内没有接收到消息时,就会触发一个 IdleStateEvent 事件,
// 此事件被 HeartbeatHandler 的 userEventTriggered 方法处理到
pipeline.addLast(
new IdleStateHandler(READ_IDLE_TIME_OUT, WRITE_IDLE_TIME_OUT, ALL_IDLE_TIME_OUT, TimeUnit.SECONDS));
// WebSocketServerHandler、TextWebSocketframeHandler 是自定义逻辑处理器,
pipeline.addLast(new WebSocketTextHandler());
}
});
Channel ch = b.bind(port).syncUninterruptibly().channel();
ch.closeFuture().syncUninterruptibly();
// 返回与当前Java应用程序关联的运行时对象
Runtime.getRuntime().addShutdownHook(new Thread() {
@Override
public void run() {
SessionGroup.inst().shutdownGracefully();
bossGroup.shutdownGracefully();
workerGroup.shutdownGracefully();
}
});
}
}
6.WebSocketTextHandler
package com.example.webchat; import com.google.gson.Gson; import com.google.gson.reflect.TypeToken; import io.netty.channel.ChannelHandlerContext; import io.netty.channel.SimpleChannelInboundHandler; import io.netty.handler.codec.http.websocketx.TextWebSocketframe; import io.netty.handler.codec.http.websocketx.WebSocketServerProtocolHandler; import io.netty.handler.timeout.IdleState; import io.netty.handler.timeout.IdleStateEvent; import java.util.HashMap; import java.util.Map; import static com.fasterxml.jackson.databind.type.LogicalType.Map; public class WebSocketTextHandler extends SimpleChannelInboundHandler7.index.html{ @Override protected void channelRead0(ChannelHandlerContext ctx, TextWebSocketframe msg) throws Exception { SocketSession session = SocketSession.getSession(ctx); TypeToken > typeToken = new TypeToken >() { }; Gson gson=new Gson(); java.util.Map map = gson.fromJson(msg.text(), typeToken.getType()); User user = null; switch (map.get("type")) { case "msg": Map result = new HashMap<>(); user = session.getUser(); result.put("type", "msg"); result.put("msg", map.get("msg")); result.put("sendUser", user.getNickname()); SessionGroup.inst().sendToOthers(result, session); break; case "init": String room = map.get("room"); session.setGroup(room); String nick = map.get("nick"); user = new User(session.getId(), nick); session.setUser(user); SessionGroup.inst().addSession(session); break; } } @Override public void userEventTriggered(ChannelHandlerContext ctx, Object evt) throws Exception { // 是否握手成功,升级为 Websocket 协议 if (evt == WebSocketServerProtocolHandler.ServerHandshakeStateEvent.HANDSHAKE_COMPLETE) { // 握手成功,移除 HttpRequestHandler,因此将不会接收到任何消息 // 并把握手成功的 Channel 加入到 ChannelGroup 中 new SocketSession(ctx.channel()); } else if (evt instanceof IdleStateEvent) { IdleStateEvent stateEvent = (IdleStateEvent) evt; if (stateEvent.state() == IdleState.READER_IDLE) { System.out.println("bb22"); } } else { super.userEventTriggered(ctx, evt); } } }
在桌面新建一个html文件
群聊天室
群名:
昵称:
2.结果
二.动态网页的信息爬取
1.自动填充百度网页的查询关键字,完成自动搜索
打开百度网页
F12百度网页的源码找到搜素框的id以及搜素按钮的id
代码:
from selenium import webdriver
driver = webdriver.Chrome(r"E:chromedriver.exe")
# 进入网页
driver.get("https://www.baidu.com/")
# noinspection PyDeprecation
# 填充搜索框
search = driver.find_element_by_id("kw")
search.send_keys("重交")
# noinspection PyDeprecation
# 模拟点击
send_button = driver.find_element_by_id("su")
send_button.click()
结果:
代码
from selenium import webdriver
driver = webdriver.Chrome(r"E:chromedriver.exe")
# 名言所在网站
driver.get("https://quotes.toscrape.com/js/")
# 表头
csvHeaders = ['作者', '名言']
# 所有数据
subjects = []
# 单个数据
subject = []
# 获取所有含有quote的标签
res_list = driver.find_elements_by_class_name("quote")
# 分离出需要的内容
for tmp in res_list:
subject.append(tmp.find_element_by_class_name("author").text)
subject.append(tmp.find_element_by_class_name("text").text)
print(subject)
subjects.append(subject)
subject = []
结果:
代码:
import csv
import time
from selenium import webdriver
driver = webdriver.Edge(r"E:chromedriver.exe")
driver.set_window_size(1920, 1080)
# 京东网站
driver.get("https://www.jd.com/")
# 输入需要查找的关键字
key = driver.find_element_by_id("key").send_keys("python编程")
time.sleep(1)
# 点击搜素按钮
button = driver.find_element_by_class_name("button").click()
time.sleep(1)
# 获取所有窗口
windows = driver.window_handles
# 切换到最新的窗口
driver.switch_to.window(windows[-1])
time.sleep(1)
# js语句
js = 'return document.body.scrollHeight'
# 获取body高度
max_height = driver.execute_script(js)
max_height = (int(max_height / 1000)) * 1000
# 当前滚动条高度
tmp_height = 1000
# 所有书籍的字典
res_dict = {}
# 需要爬取的数量
num = 200
while len(res_dict) < num:
# 当切换网页后重新设置高度
tmp_height = 1000
while tmp_height < max_height:
# 向下滑动
js = "window.scrollBy(0,1000)"
driver.execute_script(js)
tmp_height += 1000
# 书籍列表
J_goodsList = driver.find_element_by_id("J_goodsList")
ul = J_goodsList.find_element_by_tag_name("ul")
# 所有书籍
res_list = ul.find_elements_by_tag_name("li")
# 把没有记录过的书籍加入字典
for res in res_list:
# 以书名为键,价格为值
# 两种方式获取指定标签值
res_dict[res.find_element_by_class_name('p-name').find_element_by_tag_name('em').text]
= res.find_element_by_xpath("//div[@class='p-price']//i").text
if len(res_dict) == num:
break
time.sleep(2)
if len(res_dict) == num:
break
# 下一页按钮所在父标签
J_bottomPage = driver.find_element_by_id("J_bottomPage")
# 下一页按钮
next_button = J_bottomPage.find_element_by_class_name("pn-next").click()
# 切换窗口
windows = driver.window_handles
driver.switch_to.window(windows[-1])
time.sleep(3)
# 表头
csvHeaders = ['书名', '价格']
# 所有书籍
csvRows = []
# 书籍
row = []
# 字典转列表
for key, value in res_dict.items():
row.append(key)
row.append(value)
csvRows.append(row)
row = []
# 保存爬取结果
with open('./jd_books.csv', 'w', newline='') as file:
fileWriter = csv.writer(file)
fileWriter.writerow(csvHeaders)
fileWriter.writerows(csvRows)
结果:
爬取成功,且能自动翻滚页面
csv保存成功
先试着把代码跑起来,再进行下一步学习。
四.参考文献https://blog.csdn.net/m0_51120713/article/details/121855991?spm=1001.2014.3001.5501.
https://blog.csdn.net/xyf_fate/article/details/122000970?spm=1001.2014.3001.5501.



