I am new to Python, and have written a threaded script below, which takes each line of a file, and passes it to the get_result function. The get_result function should output the url and status code if it is a 200 or 301.
The code is as follows:
import requests
import Queue
import threading
import re
import time
start_time = int(time.time())
regex_to_use = re.compile(r"^")
def get_result(q, partial_url):
partial_url = regex_to_use.sub("%s" % "http://www.domain.com/", partial_url)
r = requests.get(partial_url)
status = r.status_code
#result = "nothing"
if status == 200 or status == 301:
result = str(status) + " " + partial_url
print(result)
#need list of urls from file
file_list = [line.strip() for line in open('/home/shares/inbound/seo/feb-404s/list.csv', 'r')]
q = Queue.Queue()
for url in file_list:
#for each partial. send to the processing function get_result
t = threading.Thread(target=get_result, args=(q, url))
t.start()
end_time = int(time.time())
exec_time = end_time - start_time
print("execution time was " + str(exec_time))
I used Queue and threading, but what is happening is that the print of "execution time was x " is being output before the threads finish outputting data.
I.e. typical output is:
200 www.domain.com/ok-url
200 www.domain.com/ok-url-1
200 www.domain.com/ok-url-2
execution time was 3
200 www.domain.com/ok-url-4
200 www.domain.com/ok-ur-5
200 www.domain.com/ok-url-6
How is this happening, and I would like to know how can I have the script execution show at the end of the script, i.e. once all urls have been processed and output?
Thanks to the answer given below by utdemir, here's the updated code with join.
import requests
import Queue
import threading
import re
import time
start_time = int(time.time())
regex_to_use = re.compile(r"^")
def get_result(q, partial_url):
partial_url = regex_to_use.sub("%s" % "http://www.domain.com/", partial_url)
r = requests.get(partial_url)
status = r.status_code
#result = "nothing"
if status == 200 or status == 301:
result = str(status) + " " + partial_url
print(result)
#need list of urls from file
file_list = [line.strip() for line in open('/home/shares/inbound/seo/feb-404s/list.csv', 'r')]
q = Queue.Queue()
threads_list = []
for url in file_list:
#for each partial. send to the processing function get_result
t = threading.Thread(target=get_result, args=(q, url))
threads_list.append(t)
t.start()
for thread in threads_list:
thread.join()
end_time = int(time.time())
exec_time = end_time - start_time
print("execution time was " + str(exec_time))