Comment of hago already mentioned to filter Unicode characters which are not part of GBK, but I would like to give a full example using MySQL Connector/Python.
# -*- coding: utf-8 -*-
import mysql.connector
cnx = mysql.connector.connect(
database='test', charset='gbk', use_unicode=False
)
cur = cnx.cursor()
cur.execute("DROP TABLE IF EXISTS gbktest")
table = (
"CREATE TABLE gbktest ("
"id INT AUTO_INCREMENT KEY, "
"c1 VARCHAR(40)"
") CHARACTER SET 'gbk'"
)
cur.execute(table)
data = {
'c1': u'\u2022国家标准'.encode('gbk', 'ignore')
}
cur.execute("INSERT INTO gbktest (c1) VALUES (%(c1)s)", data)
cnx.commit()
cur.execute("SELECT id, c1 FROM gbktest")
rows = cur.fetchall()
# Terminal using UTF-8 encoding:
#print rows[0][1].decode('gbk')
# Terminal using GBK encoding:
print rows[0][1]
The last two lines need to be commented/uncommented depending on whether your Terminal is using UTF-8 or GBK encoding.