3
import tkinter as tk
from tkinter import messagebox
import re,sys
from urllib.parse import urlparse
import requests,time
from bs4 import BeautifulSoup
class App(tk.Tk):
    def __init__(self):
        super().__init__()
        self.title("Quotes Scraper v1")
        self.geometry("400x250")
        button = tk.Button(self, text="Collect Links",bd=3, 
            activebackground="grey90",command=self.start_collecting)
        button.place(x=130,y=120)

    def start_collecting(self):
        url = "http://quotes.toscrape.com/"
        res=requests.get(url)
        is res.status_code!=200:
            sys.exit('Check Internet')
        self.clean_screen_plus()
        quotes = []
        while True:
            soup = BeautifulSoup(res.text,'lxml')
            self.update_status()
            quotes+=[i.span.text.strip() for i in soup.findAll('div',{'class':'quote'})]
            try:
                next_page = 'http://quotes.toscrape.com/'+ soup.find('li',{'class':'next'}).a['href']
                time.sleep(5)
                res = requests.get(next_page)
            except AttributeError:
                break
        self.destroy()

    def clean_screen_plus(self,):
        for widget in self.winfo_children():
            widget.destroy()
        self.geometry("300x100")
        self.resizable(False, False)
        self.status = tk.Label(self, text="Collecting")
        self.status.grid()
        self.update_idletasks()

    def update_status(self):

        current_status = self.status["text"]
        if current_status.endswith("..."): 
            current_status = "Collecting"

        else: 
            current_status += "."

        # Update the message
        self.status["text"] = current_status
        self.update_idletasks()
        self.after(1000, update_status) #updates every 1 sec
        print(current_status)
App().mainloop()

Hello, I have this code, How can I keep upgrading the tkinter Label while the while loop is running, I.e I want the status to keep changing every second to update status regardless where I am at in the while loop.

expected output on the tkinter is Collecting.\n {len(quotes)} then Collecting.. {len(quotes)} ... until the while loop finishes then just self.destroy()

2
  • res = sess.get(next_page),what's this?Should it be res = res.get(next_page)? Commented Apr 4, 2020 at 15:15
  • @jizhihaoSAMA yes, I fixed it, I have sesion in my main code but the code for building it was too long to include Commented Apr 4, 2020 at 15:17

1 Answer 1

2

You should use a thread to scrape the page else it will block it.

This code worked for me,(Although it need to waste some time,I have add annotataion in my code):

import tkinter as tk
from tkinter import messagebox
import re,sys
from urllib.parse import urlparse
import requests,time
from bs4 import BeautifulSoup
import threading

class App(tk.Tk):
    def __init__(self):
        super().__init__()
        self.title("Quotes Scraper v1")
        self.geometry("400x250")
        button = tk.Button(self, text="Collect Links",bd=3,
            activebackground="grey90",command=self.start_thread) # start a thread instead of starting scrap the page.
        button.place(x=130,y=120)

    def start_thread(self):
        threading.Thread(target=self.start_collecting).start() # create a thread to scrap a page in the background

    def start_collecting(self): # this is your work.
        url = "http://quotes.toscrape.com/"
        res = requests.get(url)
        if res.status_code != 200:
            sys.exit('Check Internet')

        self.clean_screen_plus()
        quotes = []
        while True:
            soup = BeautifulSoup(res.text, 'lxml')
            self.update_status()
            quotes += [i.span.text.strip() for i in soup.findAll('div', {'class': 'quote'})]
            try:
                next_page = 'http://quotes.toscrape.com/' + soup.find('li', {'class': 'next'}).a['href']
                time.sleep(5)
                res = requests.get(next_page)
            except AttributeError:
                break
        self.destroy()

    def clean_screen_plus(self):
        for widget in self.winfo_children():
            widget.destroy()
        self.geometry("300x100")
        self.resizable(False, False)
        self.status = tk.Label(self, text="Collecting")
        self.status.grid()

    def update_status(self):

        current_status = self.status["text"]
        if current_status.endswith("..."):
            current_status = "Collecting"

        else:
            current_status += "."

        # Update the message
        self.status["text"] = current_status
        self.update_idletasks()
        self.after(1000, self.update_status) #updates every 1 sec
        print(current_status)
App().mainloop()
Sign up to request clarification or add additional context in comments.

4 Comments

exactly what I was looking for, can you explain why this works. I am not sure why one thread makes it work :)
@User1984 Because you should know that Tk().mainloop() like a while loop.Only when it break(maybe destroy()).The next code will run.The same as the while loop in your code.You use while loop to send get request.only when it break,it will do another work.But my thread is a non-block thread.It won't block your code.It is a measure to achieve parallelism.You can also read the official document about thread.
by this means, any function inside the while will be run in parallel, is that right?
@User1984 Yes,But you should use non-block thread and start it at first

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.