My Python code pulls locally stored XML file and saves it in a SQL Server database. However, I see that file stored in database is missing the initial XML version statement:
<?xml version="1.0" encoding="utf-8"?>
In order to retain the initial XML version statement, I came across lxml documentation which states to add parameter xml_declaration=True. Using this parameter, I can see the XML version printed in python console. But, when I try to use the same parameter to store it in the database, I get an error:
Type: bytes cannot be serialized exception
Can anyone please help me resolve this exception?
Python code:
print("Connecting..")
# Establish a connection between Python and SQL Server
conn = pyodbc.connect('Driver={SQL Server};'
'Server=TestServer;'
'Database=test;'
'Trusted_Connection=yes;')
print("DB Connected..")
# Open the workbook and define the worksheet
path = 'C:\\Arelle-master\\arelle\\plugin\\TestPlugin\\TestExcel.xlsx'
book = xlrd.open_workbook(path)
print("Excel Loaded into xlrd..")
# Get XML File
XMLFilePath = open('C:\\TestPlugin\\HelloWorld.xml')
x = etree.parse(XMLFilePath)
print(etree.tostring(x, pretty_print=True, xml_declaration=True)) # Shows correct version of XML file in console.
CreateTable = """
create table test.dbo.StoreInfo
(
col1 varchar(100),
col2 varchar(100),
col3 varchar(100),
col4 varchar(100),
MyXML XML
)
"""
# execute create table
cursor = conn.cursor()
try:
cursor.execute(CreateTable)
conn.commit()
except pyodbc.ProgrammingError:
pass
print("Table Created..")
InsertQuery = """
INSERT INTO test.dbo.StoreInfo (
col1,
col2,
col3,
col4,
XBRLFile
) VALUES (?, ?, ?, ?, ?)"""
# Grab existing row count in the database for validation later
# cursor.execute("SELECT count(*) FROM test.dbo.StoreInfo")
# before_import = cursor.fetchone()
for r in range(1, sheet.nrows):
col1 = sheet.cell(r, 0).value
col2 = sheet.cell(r, 1).value
col3 = sheet.cell(r, 2).value
col4 = sheet.cell(r, 3).value
col5 = etree.tostring(etree.tostring(x, xml_declaration=True)) # Code throws exception at this line.
# Assign values from each row
values = (col1,col2,col3,col4,col5)
# Execute SQL Insert Query
cursor.execute(InsertQuery, values)
Exception:
col5 = etree.tostring(etree.tostring(x, xml_declaration=True))
File "src\lxml\etree.pyx", line 3391, in lxml.etree.tostring
TypeError: Type 'bytes' cannot be serialized.
Any idea how to resolve this error?