I have a wordcloud generator that I have working in a Jupyter notebook. I would like to build a front end for it so you can paste the text in a text box, click submit and it displays the wordcloud. Basically what this chap has done here.
I am looking for some help in ammending my code so that instead of displaying the wordcloud in the Jupyter notebook, it will render the image of the wordcloud on an HTML page. I am using Django to build the front end.
This is the code I have which generates an image of my wordcloud in my Jupyter notebook.
from wordcloud import WordCloud
from PIL import Image
import matplotlib.pyplot as plt
import nltk
# sun only once -> nltk.download('punkt')
#nltk.download('wordnet') -> only do this once
from nltk.stem.porter import PorterStemmer
from nltk.stem import WordNetLemmatizer
ps = PorterStemmer()
wnl = WordNetLemmatizer()
def stem(string):
stemstring = ""
nltk_tokens = nltk.word_tokenize(string)
for word in nltk_tokens:
if word in dontstem:
p = word
elif word == 'printing':
p = 'print'
elif word == 'e-mailing':
p = 'email'
elif word == 'e-mails':
p = 'email'
elif word == 'e-mail':
p = 'email'
elif word == 'installation':
p = 'install'
#If the lemmatized word ends in a 'e' then lemmatize instead of stem as stem cuts the 'e'.
elif wnl.lemmatize(word).endswith('e'):
p = wnl.lemmatize(word)
elif wnl.lemmatize(word).endswith('y'):
p = wnl.lemmatize(word)
elif wnl.lemmatize(word).endswith('er'):
p = wnl.lemmatize(word)
elif wnl.lemmatize(word).endswith('ing'):
p = wnl.lemmatize(word)
else:
p = ps.stem(word)
stemstring += p + ' '
return stemstring
#We use a srt.split() to only count whole words as we don't want to count words inside words. This can happen below.
def count_substring(string,sub_string):
count=0
for word in string.split():
if word == sub_string:
count+=1
return(count)
#As we have a phrase which can be made up of two words we use this counting method as it is unlikely that the phrase is contained in another word.
def count_substring_phrases(string,sub_string):
count=0
for i in range(len(string)-len(sub_string)+1):
if(string[i:i+len(sub_string)] == sub_string ):
count+=1
return(count)
#The function for counting all the words
def countWords(string, phrases, stopWords, dostem):
newList = {}
for p in phrases:
if count_substring_phrases(string,p) > 0:
newList[p] = count_substring_phrases(string,p)
string = string.replace(p,'')
else:
pass
if dostem == True:
string = stem(string)
for word in string.split():
if word in stopWords:
pass
#Hack to exclude any word under 4 characters.
elif len(word) < 2:
pass
else:
count_substring(string,word)
newList[word] = count_substring(string,word)
return(newList)
MyData= dict(countWords(text, phrases, stopWords, True))
wc = WordCloud(scale=10, max_words=100).generate_from_frequencies(MyData)
plt.figure(figsize=(32,18))
plt.imshow(wc, interpolation="bilinear", aspect='auto')
plt.show()
Here is my views.py file. As you can see I can get the value fron the form field and send it back to the page. What I need to do now is get the value from the form field, run it through the wordcloud function, generate the image of the wordcloud then send that back to the page so I can display it.
from django.shortcuts import render
from wordcloudgen.forms import CharForm
from wordcloudgen.wordcloud import *
def cloud_gen(request):
if request.method == 'POST':
form = CharForm(request.POST)
if form.is_valid():
text = form.cleaned_data['post']
phrases = ''
stopWords = ''
args = {'form':form, 'text':text}
return render(request, 'wordcloudgen/cloud_gen.html', args)
else:
form = CharForm()
return render(request, 'wordcloudgen/cloud_gen.html', {'form':form})
I would think that I need to change something in the wordcloud code around here:
MyData= dict(countWords(text, phrases, stopWords, True))
wc = WordCloud(scale=10, max_words=100).generate_from_frequencies(MyData)
plt.figure(figsize=(32,18))
plt.imshow(wc, interpolation="bilinear", aspect='auto')
plt.show()
and then add something to the view to call the wordcloud function, save the image it outputs somehow and then pass it to my args variable so I can call it on the HTML template with something like {% image %}.
Notes: For now some of the arguments in the countWords function are hardcoded to empty strings. Right now there is only one input field in the form which will be for the text when I have everything working I will then go and add in inputs for all the other arguments and options, sizes of the graph to output etc.
Thanks