I have been exploring multiprocessor programming in python and the differences it has with multithreading and the doubt I had was regarding writing to a file. So here is what I found, for the following code, nothing gets written to the file, which I think is because an open file handler is not shared across processes.
import multiprocessing
import sys
import datetime
import time
def worker(fd, index):
fd.write("worker %d %s\n" % (index, datetime.datetime.now()))
time.sleep(5 - index)
fd.write("worker %d again %s\n" % (index, datetime.datetime.now()))
if __name__ == '__main__':
fd = open(sys.argv[1], "w")
threads = list()
for i in xrange(5):
th = multiprocessing.Process(target=worker, args=(fd, i,))
threads.append(th)
th.start()
for each in threads:
each.join()
fd.close()
while the following code works fine because of shared memory between threads.
import threading
import sys
import datetime
def worker(fd, index):
fd.write("worker %d %s\n" % (index, datetime.datetime.now()))
time.sleep(5 - index)
fd.write("worker %d again %s\n" % (index, datetime.datetime.now()))
if __name__ == '__main__':
fd = open(sys.argv[1], "w")
threads = list()
for i in xrange(5):
th = threading.Thread(target=worker, args=(fd, i,))
threads.append(th)
th.start()
for each in threads:
each.join()
fd.close()
I wanted to use multiple process instead of threads to write to the same file, so I implemented the following. I assumed that I might have to use locks to restrict access to the file by different processes, however the following seems to work fine without it.
import multiprocessing
import sys
import datetime
import time
def write_to_file(text, file_name):
fd = open(file_name, "a")
fd.write(text)
fd.close()
def worker(file_name, index):
while True:
write_to_file("worker %d %s\n" % (index, datetime.datetime.now()), file_name)
time.sleep(5 - index)
write_to_file("worker %d %s again\n" % (index, datetime.datetime.now()), file_name)
if __name__ == '__main__':
file_name = sys.argv[1]
fd = open(file_name, 'w')
fd.write("test input\n")
fd.close()
jobs = []
for i in xrange(5):
process = multiprocessing.Process(target=worker, args=(file_name, i,))
process.start()
jobs.append(process)
for j in jobs:
j.join()
My doubt is around this. Is the "open" call here blocking and already protected or do I need to implement locking around a call to "write_to_file"? In essence, will an "open" call block for one process while the file is being written to by another process?