您可以使用硒来刮除Twitter或Facebook之类的无限滚动网站。
步骤1:使用pip安装Selenium
pip install selenium
第2步:使用下面的代码自动执行无限滚动并提取源代码
from selenium import webdriverfrom selenium.webdriver.common.by import Byfrom selenium.webdriver.common.keys import Keysfrom selenium.webdriver.support.ui import Selectfrom selenium.webdriver.support.ui import WebDriverWaitfrom selenium.common.exceptions import TimeoutExceptionfrom selenium.webdriver.support import expected_conditions as ECfrom selenium.common.exceptions import NoSuchElementExceptionfrom selenium.common.exceptions import NoalertPresentExceptionimport sysimport unittest, time, reclass Sel(unittest.TestCase): def setUp(self): self.driver = webdriver.Firefox() self.driver.implicitly_wait(30) self.base_url = "https://twitter.com" self.verificationErrors = [] self.accept_next_alert = True def test_sel(self): driver = self.driver delay = 3 driver.get(self.base_url + "/search?q=stckoverflow&src=typd") driver.find_element_by_link_text("All").click() for i in range(1,100): self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") time.sleep(4) html_source = driver.page_source data = html_source.enpre('utf-8')if __name__ == "__main__": unittest.main()步骤3:根据需要打印数据。



