Currently I'm trying to read a webpage and then process the content but I also want to implement exception handling for when the client is offline. This is the code I have so far:
from urllib.request import Request, urlopen
from lxml import html
def get_webSite(link): # returns a HTML page which can be read by the xPath
req = Request(link, headers={'User-Agent': 'Mozilla/5.0'})
return html.fromstring(urlopen(req).read())
try:
htmlPage = get_webSite("www.example.com")
print(htmlPage)
except urllib.error.URLError as e:
print("You're offline, try again later")
Problem is that python does not seem to recognize the "urllib.error.URLError" which get's thrown if there is no internet connection. Intellij always tells me "Unresolved reference urllib"
This is the output of the console without any internet connection:
"C:\Program Files\Python39\py
thon.exe" C:/test/test.py
Traceback (most recent call last):
File "C:\Program Files\Python39\lib\urllib\request.py", line 1342, in do_open
h.request(req.get_method(), req.selector, req.data, headers,
File "C:\Program Files\Python39\lib\http\client.py", line 1255, in request
self._send_request(method, url, body, headers, encode_chunked)
File "C:\Program Files\Python39\lib\http\client.py", line 1301, in _send_request
self.endheaders(body, encode_chunked=encode_chunked)
File "C:\Program Files\Python39\lib\http\client.py", line 1250, in endheaders
self._send_output(message_body, encode_chunked=encode_chunked)
File "C:\Program Files\Python39\lib\http\client.py", line 1010, in _send_output
self.send(msg)
File "C:\Program Files\Python39\lib\http\client.py", line 950, in send
self.connect()
File "C:\Program Files\Python39\lib\http\client.py", line 1417, in connect
super().connect()
File "C:\Program Files\Python39\lib\http\client.py", line 921, in connect
self.sock = self._create_connection(
File "C:\Program Files\Python39\lib\socket.py", line 822, in create_connection
for res in getaddrinfo(host, port, 0, SOCK_STREAM):
File "C:\Program Files\Python39\lib\socket.py", line 953, in getaddrinfo
for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
socket.gaierror: [Errno 11001] getaddrinfo failed
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:/test/test.py", line 17, in <module>
htmlPage = get_webSite("https://www.google.com")
File "C:/test/test.py", line 10, in get_webSite
return html.fromstring(urlopen(req).read())
File "C:\Program Files\Python39\lib\urllib\request.py", line 214, in urlopen
return opener.open(url, data, timeout)
File "C:\Program Files\Python39\lib\urllib\request.py", line 517, in open
response = self._open(req, data)
File "C:\Program Files\Python39\lib\urllib\request.py", line 534, in _open
result = self._call_chain(self.handle_open, protocol, protocol +
File "C:\Program Files\Python39\lib\urllib\request.py", line 494, in _call_chain
result = func(*args)
File "C:\Program Files\Python39\lib\urllib\request.py", line 1385, in https_open
return self.do_open(http.client.HTTPSConnection, req,
File "C:\Program Files\Python39\lib\urllib\request.py", line 1345, in do_open
raise URLError(err)
urllib.error.URLError: <urlopen error [Errno 11001] getaddrinfo failed>
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:/test/test.py", line 19, in <module>
except urllib.error.URLError as e:
NameError: name 'urllib' is not defined
Process finished with exit code 1
I'm only able to catch this error with "except:" but not with "except urllib.error.URLError:". How can I solve that?