I've been struggling with this, and have read numerous threads, but I can't seem to get this working. I need to save a UTF-8 CSV file.
Firstly, here's my super-simple approach:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import csv
import sys
import codecs
f = codecs.open("output.csv", "w", "utf-8-sig")
writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
cells = ["hello".encode("utf-8"), "nǐ hǎo".encode("utf-8"), "你好".encode("utf-8")]
writer.writerow(cells)
That results in an error:
Traceback (most recent call last):
File "./makesimplecsv.py", line 10, in <module>
cells = ["hello".encode("utf-8"), "nǐ hǎo".encode("utf-8"), "你好".encode("utf-8")]
UnicodeDecodeError: 'ascii' codec can't decode byte 0xc7 in position 1: ordinal not in range(128)
I've also tried using the UnicodeWriter class that's listed in the Python docs (https://docs.python.org/2/library/csv.html#examples ):
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import csv
import sys
import codecs
import cStringIO
class UnicodeWriter:
"""
A CSV writer which will write rows to CSV file "f",
which is encoded in the given encoding.
"""
def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
# Redirect output to a queue
self.queue = cStringIO.StringIO()
self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
self.stream = f
self.encoder = codecs.getincrementalencoder(encoding)()
def writerow(self, row):
self.writer.writerow([s.encode("utf-8") for s in row])
# Fetch UTF-8 output from the queue ...
data = self.queue.getvalue()
data = data.decode("utf-8")
# ... and reencode it into the target encoding
data = self.encoder.encode(data)
# write to the target stream
self.stream.write(data)
# empty queue
self.queue.truncate(0)
def writerows(self, rows):
for row in rows:
self.writerow(row)
f = codecs.open("output.csv", "w", "utf-8-sig")
writer = UnicodeWriter(f)
cells = ["hello".encode("utf-8"), "nǐ hǎo".encode("utf-8"), "你好".encode("utf-8")]
writer.writerow(cells)
That results in the same error:
Traceback (most recent call last):
File "./makesimplecsvwithunicodewriter.sh", line 40, in <module>
cells = ["hello".encode("utf-8"), "nǐ hǎo".encode("utf-8"), "你好".encode("utf-8")]
UnicodeDecodeError: 'ascii' codec can't decode byte 0xc7 in position 1: ordinal not in range(128)
I thought I'd gone through the checklist of things I've found in other similar questions:
- My file has an encoding statement.
- I'm opening the file for writing with UTF-8.
- I'm encoding the individual strings in UTF-8 before I pass them to the CSV writer.
- I've tried with and without adding a UTF-8 BOM, but that doesn't seem to make any difference, or indeed be critical, from what I've read.
Any ideas on what I'm doing wrong?