I'm trying to merge 2 lists output into one list from a for loop. Tried append() and extend() to no avail.
Here's my code:
c_files = ['file1', 'file2']
doc_count = 0
comb_cran = []
for fname in c_files:
with open(fname,'r') as cr:
cran = cr.read()
doc_count = doc_count + 1
docID = os.path.basename(fname)
#TOKENIZING :
# remove SGML Tags
removedsgml_cran = BeautifulSoup(cran, "html.parser")
clean_cran = removedsgml_cran.get_text()
# remove non-alphanumeric
non_alpha = re.compile('([^\s\w]|_)+')
alpha = non_alpha.sub(' ', clean_cran)
alpha_lower = alpha.lower()
# word Tokenizing using nltk
tokenized = word_tokenize(alpha_lower)
# combine the list
#comb_cran.extend(tokenized)
cword_removed = [(w, docID, 1) for w in tokenized if not w in c_words]
print(cword_removed)
and the results:
[('1', 'cranfield0001', 1), ('experimental', 'cranfield0001', 1), ('investigation', 'cranfield0001', 1), ('aerodynamics', 'cranfield0001', 1), ('slipstream', 'cranfield0001', 1), ('brenckman', 'cranfield0001', 1), ('ae', 'cranfield0001', 1), ('scs', 'cranfield0001', 1), ('25', 'cranfield0001', 1), ('1958', 'cranfield0001', 1), ('324', 'cranfield0001', 1), ('experimental', 'cranfield0001', 1), ('study', 'cranfield0001', 1), ('propeller', 'cranfield0001', 1), ('slipstream', 'cranfield0001', 1), ('made', 'cranfield0001', 1), ('order', 'cranfield0001', 1), ('determine', 'cranfield0001', 1), ('spanwise', 'cranfield0001', 1), ('distribution', 'cranfield0001', 1), ('lift', 'cranfield0001', 1), ('increase', 'cranfield0001', 1), ('due', 'cranfield0001', 1), ('slipstream', 'cranfield0001', 1), ('angles', 'cranfield0001', 1), ('attack', 'cranfield0001', 1), ('free', 'cranfield0001', 1), ('stream', 'cranfield0001', 1), ('slipstream', 'cranfield0001', 1), ('velocity', 'cranfield0001', 1), ('ratios', 'cranfield0001', 1), ('results', 'cranfield0001', 1), ('intended', 'cranfield0001', 1), ('evaluation', 'cranfield0001', 1), ('basis', 'cranfield0001', 1), ('theoretical', 'cranfield0001', 1), ('treatments', 'cranfield0001', 1), ('problem', 'cranfield0001', 1), ('comparative', 'cranfield0001', 1), ('span', 'cranfield0001', 1), ('loading', 'cranfield0001', 1), ('curves', 'cranfield0001', 1), ('supporting', 'cranfield0001', 1), ('evidence', 'cranfield0001', 1), ('showed', 'cranfield0001', 1), ('substantial', 'cranfield0001', 1), ('lift', 'cranfield0001', 1), ('increment', 'cranfield0001', 1), ('produced', 'cranfield0001', 1), ('slipstream', 'cranfield0001', 1), ('due', 'cranfield0001', 1), ('destalling', 'cranfield0001', 1), ('boundary', 'cranfield0001', 1), ('layer', 'cranfield0001', 1), ('control', 'cranfield0001', 1), ('effect', 'cranfield0001', 1), ('integrated', 'cranfield0001', 1), ('remaining', 'cranfield0001', 1), ('lift', 'cranfield0001', 1), ('increment', 'cranfield0001', 1), ('subtracting', 'cranfield0001', 1), ('destalling', 'cranfield0001', 1), ('lift', 'cranfield0001', 1), ('found', 'cranfield0001', 1), ('agree', 'cranfield0001', 1), ('potential', 'cranfield0001', 1), ('flow', 'cranfield0001', 1), ('theory', 'cranfield0001', 1), ('empirical', 'cranfield0001', 1), ('evaluation', 'cranfield0001', 1), ('destalling', 'cranfield0001', 1), ('effects', 'cranfield0001', 1), ('made', 'cranfield0001', 1), ('specific', 'cranfield0001', 1), ('configuration', 'cranfield0001', 1), ('experiment', 'cranfield0001', 1)]
[('2', 'cranfield0002', 1), ('simple', 'cranfield0002', 1), ('shear', 'cranfield0002', 1), ('flow', 'cranfield0002', 1), ('past', 'cranfield0002', 1), ('flat', 'cranfield0002', 1), ('plate', 'cranfield0002', 1), ('incompressible', 'cranfield0002', 1), ('fluid', 'cranfield0002', 1), ('small', 'cranfield0002', 1), ('viscosity', 'cranfield0002', 1), ('yili', 'cranfield0002', 1), ('department', 'cranfield0002', 1), ('aeronautical', 'cranfield0002', 1), ('engineering', 'cranfield0002', 1), ('rensselaer', 'cranfield0002', 1), ('polytechnic', 'cranfield0002', 1), ('institute', 'cranfield0002', 1), ('troy', 'cranfield0002', 1), ('study', 'cranfield0002', 1), ('high', 'cranfield0002', 1), ('speed', 'cranfield0002', 1), ('viscous', 'cranfield0002', 1), ('flow', 'cranfield0002', 1), ('past', 'cranfield0002', 1), ('dimensional', 'cranfield0002', 1), ('curved', 'cranfield0002', 1), ('shock', 'cranfield0002', 1), ('wave', 'cranfield0002', 1), ('emitting', 'cranfield0002', 1), ('nose', 'cranfield0002', 1), ('leading', 'cranfield0002', 1), ('edge', 'cranfield0002', 1), ('exists', 'cranfield0002', 1), ('inviscid', 'cranfield0002', 1), ('rotational', 'cranfield0002', 1), ('flow', 'cranfield0002', 1), ('region', 'cranfield0002', 1), ('shock', 'cranfield0002', 1), ('wave', 'cranfield0002', 1), ('boundary', 'cranfield0002', 1), ('layer', 'cranfield0002', 1), ('situation', 'cranfield0002', 1), ('arises', 'cranfield0002', 1), ('instance', 'cranfield0002', 1), ('study', 'cranfield0002', 1), ('hypersonic', 'cranfield0002', 1), ('viscous', 'cranfield0002', 1), ('flow', 'cranfield0002', 1), ('past', 'cranfield0002', 1), ('flat', 'cranfield0002', 1), ('plate', 'cranfield0002', 1), ('situation', 'cranfield0002', 1), ('prandtl', 'cranfield0002', 1), ('classical', 'cranfield0002', 1), ('boundary', 'cranfield0002', 1), ('layer', 'cranfield0002', 1), ('problem', 'cranfield0002', 1), ('prandtl', 'cranfield0002', 1), ('original', 'cranfield0002', 1), ('problem', 'cranfield0002', 1), ('inviscid', 'cranfield0002', 1), ('free', 'cranfield0002', 1), ('stream', 'cranfield0002', 1), ('boundary', 'cranfield0002', 1), ('layer', 'cranfield0002', 1), ('irrotational', 'cranfield0002', 1), ('hypersonic', 'cranfield0002', 1), ('boundary', 'cranfield0002', 1), ('layer', 'cranfield0002', 1), ('problem', 'cranfield0002', 1), ('inviscid', 'cranfield0002', 1), ('free', 'cranfield0002', 1), ('stream', 'cranfield0002', 1), ('considered', 'cranfield0002', 1), ('rotational', 'cranfield0002', 1), ('effects', 'cranfield0002', 1), ('vorticity', 'cranfield0002', 1), ('recently', 'cranfield0002', 1), ('discussed', 'cranfield0002', 1), ('ferri', 'cranfield0002', 1), ('libby', 'cranfield0002', 1), ('present', 'cranfield0002', 1), ('paper', 'cranfield0002', 1), ('simple', 'cranfield0002', 1), ('shear', 'cranfield0002', 1), ('flow', 'cranfield0002', 1), ('past', 'cranfield0002', 1), ('flat', 'cranfield0002', 1), ('plate', 'cranfield0002', 1), ('fluid', 'cranfield0002', 1), ('small', 'cranfield0002', 1), ('viscosity', 'cranfield0002', 1), ('investigated', 'cranfield0002', 1), ('shown', 'cranfield0002', 1), ('problem', 'cranfield0002', 1), ('treated', 'cranfield0002', 1), ('boundary', 'cranfield0002', 1), ('layer', 'cranfield0002', 1), ('approximation', 'cranfield0002', 1), ('feature', 'cranfield0002', 1), ('free', 'cranfield0002', 1), ('stream', 'cranfield0002', 1), ('constant', 'cranfield0002', 1), ('vorticity', 'cranfield0002', 1), ('discussion', 'cranfield0002', 1), ('restricted', 'cranfield0002', 1), ('dimensional', 'cranfield0002', 1), ('incompressible', 'cranfield0002', 1), ('steady', 'cranfield0002', 1), ('flow', 'cranfield0002', 1)]
The results are correct, however they are separate lists printed from each loop. I need them to be in a single list.
I tried putting the print outside of the loop, then it prints only the second list.
list1 + list2?