EDIT: If I use the f.write() operator on my code:
from bs4 import BeautifulSoup
import glob
import os
import contextlib
@contextlib.contextmanager
def trade_spider():
os.chdir(r"C:\Users\6930p\FLO'S DATEIEN\Master FAU\Sommersemester 2016\02_Masterarbeit\04_Testumgebung\01_Probedateien für Analyseaspekt\Independent Auditors Report")
for file in glob.iglob('**/*.html', recursive=True):
with open(file, encoding="utf8") as f:
contents = f.read()
soup = BeautifulSoup(contents, "html.parser")
for item in soup.findAll("ix:nonfraction"):
if item['name'].endswith("AuditFeesExpenses"):
print(file.split(os.path.sep)[-1], end="| ")
print(item['name'], end="| ")
print(item.get_text())
f.write(item)
break
trade_spider()
I get the output of the first HTML file that has been parsed, but afterwards I get these error messages:
Prod224_0010_00178176_20131231.html| ns19:AuditFeesExpenses| 3,420
Traceback (most recent call last):
File "C:/Users/6930p/PycharmProjects/untitled/Versuch/SomeTesting.py", line 23, in <module>
trade_spider()
File "C:\Users\6930p\Anaconda3\lib\contextlib.py", line 133, in helper
return _GeneratorContextManager(func, args, kwds)
File "C:\Users\6930p\Anaconda3\lib\contextlib.py", line 38, in __init__
self.gen = func(*args, **kwds)
File "C:/Users/6930p/PycharmProjects/untitled/Versuch/SomeTesting.py", line 21, in trade_spider
f.write(item)
TypeError: write() argument must be str, not Tag
What am I doing wrong? Seems like he doesn't get into the 'for' operation correctly?