| |
main.py
import requests
response = requests.get("https://www.bing.com/")
main.py
import requests
from bs4 import BeautifulSoup
# initialize the list of discovered urls
# with the first page to visit
urls = ["https://www.xbiqugew.com"]
# until all pages have been visited
while len(urls) != 0:
# get the page to visit from the list
current_url = urls.pop()
# crawling logic
response = requests.get(current_url)
soup = BeautifulSoup(response.content, "html.parser")
link_elements = soup.select("a[href]")
for link_element in link_elements:
url = link_element['href']
if "https://www.xbiqugew.com" in url:
urls.append(url)
print(urls)
系列课程并未全部上架,处于先行测试阶段