I want to take website name from user input and maximum no. of pages that he want to crawl for crawling website...but can't getting any solution..here's my code
import requests
from bs4 import *
from urllib import request
url1 = input("Enter url you want to crawl:")
max_pages1 = int(input("Enter no. of pages you want to crawl:"))
def web_crawler(max_pages,url):
page = 1
while page <= max_pages:
url4 = str(url) + str(page)
url_get = requests.get(url4)
plain_text = url_get.text
soup = BeautifulSoup(plain_text,"html.parser")
for a in soup.findAll('a',{'rel':'bookmark'}):
href = a.get('href')
title = a.string
#print(title)
print(href)
#info_about_web_pages(href)
page +=1
def info_about_web_pages(url):
url_get = requests.get(url)
plain_text = url_get.text
soup = BeautifulSoup(plain_text,"html.parser" )
links = set()
for about in soup.findAll('a'):
href = about.get('href')
links.update([href])
print(links)
web_crawler(max_pages1,url1)
It shows me nothing in output