Here is a sample script of how to read the websocket traffic with Selenium and python, using chrome and the logging settings (see this post and this one )
Be aware that the chrome log gets emptied every time you read the logs.
Also: you need to define the helping function log_entries_to_dict() before you use it
[I simply posted the definition of the function below the example code to make the example better readable]
# list to save the log entries
log_entries = list()
# Initialize Chrome WebDriver with performance logging enabled
chrome_options = webdriver.ChromeOptions()
# enable logging for websocket capture (in performance)
chrome_options.add_argument('--enable-logging')
chrome_options.add_argument('--log-level=0')
chrome_options.set_capability('goog:loggingPrefs',{'performance': 'ALL'})
driver = webdriver.Chrome(options=chrome_options)
# Navigate to the target website
driver.get("'https://dashboard.mywebsite.com/login'")
# do something with the website like fill input fields and push buttons
# Read the captured network log entries (they are captured in the background)
# the driver-log is emptied on each get_log method !
log_entries.extend(driver.get_log("performance"))
# simplify the log entries
log_entries_deserialized = log_entries_to_dict(log_entries)
# limit the log entries to traffic connected to websockets
network_str = "network.websocket" # low-key string
websocket_traffic = [_ for _ in log_entries_deserialized if _['method'].lower().startswith(network_str)]
# analyse the websocket_traffic
# then again: do something with the website
# Now read the newly captured network log entries and extend the list containing the old log entries
log_entries.extend(driver.get_log("performance"))
log_entries_deserialized = log_entries_to_dict(log_entries)
websocket_traffic = [_ for _ in log_entries_deserialized if _['method'].lower().startswith(network_str)]
# again: analyse the websocket traffic
# repeat as often as needed
def log_entries_to_dict(inp_list : list, optimize_urls = True) -> list:
"""converts a list of json-log entries into a list of python dicts"""
list_out = list()
dicts_request_id_and_url = dict()
for list_entry in inp_list:
try:
obj_serialized = list_entry.get("message")
obj = json.loads(obj_serialized)
message = obj.get("message")
method = message.get("method")
url = message.get("params", {}).get("documentURL") # reverts to None if the key 'url' was not found
tmp_dict = dict()
tmp_dict['method'] = method
tmp_dict['level'] = list_entry['level']
tmp_dict['timestamp'] = list_entry['timestamp']
tmp_dict['webview'] = obj['webview']
tmp_dict['requestId'] = None # will be overwritten if there is an entry in message['params']
tmp_dict['url'] = url
for key in message['params']:
tmp_dict[key] = message['params'][key]
if optimize_urls:
# as the URL is not always transferred (actually only on the start of a connection),
# we assign the URLs to each package, based on the requestID
# Purpose: to make it easier for a human to follow a datastream
#
# this assignment can be skipped for performance reasons (see the keyword in the function parameters)
# first we need to see if there is a request IDs:
if tmp_dict['requestId'] is None:
continue
# check if the request IDs documentURL is already present in the dict
if url is not None:
if tmp_dict['requestId'] not in dicts_request_id_and_url:
dicts_request_id_and_url[tmp_dict['requestId']] = url
continue
else:
if tmp_dict['requestId'] in dicts_request_id_and_url:
tmp_dict['url'] = dicts_request_id_and_url[tmp_dict['requestId']]
list_out.append(copy.copy(tmp_dict))
except Exception as e:
raise e from None
return list_out