Fixed the program stop bug. Adapted webpic script for multi-tasking + added comments

This commit is contained in:
Jérémi N ‘EndMove’ 2022-09-04 15:28:28 +02:00
parent 7002439532
commit 03a6d9b54f
Signed by: EndMove
GPG Key ID: 65C4A02E1F5371A4
6 changed files with 267 additions and 172 deletions

View File

@ -17,14 +17,15 @@ class HomeController:
__main_controller = None __main_controller = None
__view = None __view = None
__webpic = None __webpic = None
__download_task = None
# Constructor # Constructor
def __init__(self, controller: MainController, webpic) -> None: def __init__(self, controller: MainController, webpic) -> None:
""" """
Constructor Constructor
:controller: -> The main application cpntroller. * :controller: -> The main application cpntroller.
:webpic: -> The webpicdownloader instance. * :webpic: -> The webpicdownloader instance.
""" """
# Setub variables # Setub variables
self.__main_controller = controller self.__main_controller = controller
@ -39,7 +40,7 @@ class HomeController:
[function for view] [function for view]
=> Define the view of this controller. => Define the view of this controller.
:view: -> The view that this controller manage. * :view: -> The view that this controller manage.
""" """
self.__view = view self.__view = view
self.__webpic.set_messenger_callback(view.add_log) self.__webpic.set_messenger_callback(view.add_log)
@ -51,7 +52,7 @@ class HomeController:
[event function for view] [event function for view]
=> Call this event method when the user requests to change the window. => Call this event method when the user requests to change the window.
:frame: -> The frame we want to launch. * :frame: -> The frame we want to launch.
""" """
self.__main_controller.change_frame(frame) self.__main_controller.change_frame(frame)
@ -60,20 +61,16 @@ class HomeController:
[event function for view] [event function for view]
=> Call this event method when the user requests to download => Call this event method when the user requests to download
:url: -> The url of the website to use for pic-download.\n * :url: -> The url of the website to use for pic-download.
:name: -> The name of the folder in which put pictures. * :name: -> The name of the folder in which put pictures.
""" """
# Define the download task function (to call in a AsyncTask) if url.strip() and name.strip():
def download_task(): self.__download_task = AsyncTask(
self.__view.clear_logs() rcallback=self.__async_task_start,
if self.__webpic.download(url, name): rargs=(url, name),
self.__view.show_success_message("The download has been successfully completed.") qcallback=self.__async_task_stop
else: )
self.__view.show_error_message("A critical error preventing the download occurred, check the logs.") self.__download_task.start()
# Verify variable and start AsyncTask
if url.strip() and name.strip() :
AsyncTask(download_task)
else: else:
self.__view.show_error_message("Opss, the url or folder name are not valid!") self.__view.show_error_message("Opss, the url or folder name are not valid!")
# END View events # END View events
@ -84,5 +81,35 @@ class HomeController:
[event function for controller] [event function for controller]
=> Call this event when a request to exit is thrown. => Call this event when a request to exit is thrown.
""" """
print("Quit... homecontroller") self.__download_task.stop()
print("Quit... homecontroller END")
# END Controller methods # END Controller methods
# START Task methods
def __async_task_start(self, url, name) -> None:
"""
[CallBack start function]
=> Start Callback function for asynctask, be careful once executed in asynctask this
function will keep its controller context. In short it's as if the thread was
launched in the controller and the execution never left it.
* :url: -> Url for webpic.
* :name: -> Working dir name for webpic.
"""
print("start callback called")
self.__view.clear_logs()
if self.__webpic.download(url, name):
self.__view.show_success_message("The download has been successfully completed.")
else:
self.__view.show_error_message("A critical error preventing the download occurred, check the logs.")
def __async_task_stop(self) -> None:
"""
[CallBack stop function]
=> End Callback function for asynctask, be careful once executed in asynctask this
function will keep its controller context. In short it's as if the thread was
launched in the controller and the execution never left it.
"""
print("stop callback called")
self.__webpic.stop()
# END Task methods

View File

@ -46,11 +46,12 @@ class MainController:
def on_quite(self) -> None: def on_quite(self) -> None:
""" """
[event function for view] [event function for view]
=> Event launch when you ask to quit the program. => Event launch when you ask to quit the program. This event is propagated
to the subscribers, they can eventually cancel the event
""" """
for callback in self.__quite_event_subscribers: for callback in self.__quite_event_subscribers:
callback() if callback():
print("on_quite") # TODO remove return
self.__view.close_window() # End the program self.__view.close_window() # End the program
def on_check_for_update(self) -> None: def on_check_for_update(self) -> None:

View File

@ -3,7 +3,7 @@ from controller.HomeController import HomeController
from controller.InfoController import InfoController from controller.InfoController import InfoController
from controller.MainController import MainController from controller.MainController import MainController
from controller.Frames import Frames from controller.Frames import Frames
from model.webpic import WebPicDownloader from model.WebPicDownloader import WebPicDownloader
from view.HomeView import HomeView from view.HomeView import HomeView
from view.InfoView import InfoView from view.InfoView import InfoView
from view.MainWindow import MainWindow from view.MainWindow import MainWindow

191
model/WebPicDownloader.py Normal file
View File

@ -0,0 +1,191 @@
import os
from urllib import request
from urllib.error import HTTPError, URLError
from bs4 import BeautifulSoup, Tag, ResultSet
class WebPicDownloader():
"""
WebPicDownloader
webpicdownloader is a simple tool able to find and download all pictures on a webpage.
This tool is customizable and allows to define the working folder, the headers present
in the http requests which will be emitted, a call back function named messenger which
will be called at each event, and in addition the script is thread safe and allows to
be stopped in the middle of treatment with a simple call to the stop function.
@author EndMove <contact@endmove.eu>
@version 1.1.1
"""
# Variables
path: str = None # Main working folder directory
headers: dict = None # Header parameters
messenger = None # Event callback function
thread_run: bool = None # Idicate if the task can still run (for use in a thread)
# Constructor
def __init__(self, path: str = None, headers: dict = None, messenger = None) -> None:
"""
Constructor
=> TODO
* :path: -> Folder where the tool will download the images.
* :headers: -> Dictionary allowing to define the different parameters present in the header of the requests sent by WebPic.
"""
self.path = path if path else os.getcwd()
self.headers = headers if headers else {
'User-Agent': "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36"
}
self.messenger = messenger if messenger else lambda msg: print(msg)
self.thread_run = True
# Internal functions
def __get_html(self, url: str) -> str:
"""
Internal Function #do-not-use#
=> Allow to retrieve the HTML content of a website.
* :url: -> The url of the site for which we want to get the content of the HTML page.
* RETURN -> Web page content.
"""
req = request.Request(url, headers=self.headers)
response = request.urlopen(req)
return response.read().decode('utf-8')
def __find_all_img(self, html: str) -> ResultSet:
"""
Internal Function #do-not-use#
=> Allow to retrieve all images of an html page.
* :html: -> Html code in which to search for image balises.
* RETURN -> Iterable with all image balises.
"""
soup = BeautifulSoup(html, 'html.parser')
return soup.find_all('img')
def __find_img_link(self, img: Tag) -> str:
"""
Internal Function #do-not-use#
=> Allow to retrieve the link of a picture.
* :img: -> Image tag {@code bs4.Tag} for which to search the link of an image.
* RETURN -> Image link.
"""
if img.get('src'):
link = img.get('src')
elif img.get('data-src'):
link = img.get('data-src')
elif img.get('data-srcset'):
link = img.get('data-srcset')
elif img.get('data-fallback-src'):
link = img.get('data-fallback-src')
else:
raise ValueError("Unable to find image url")
if not 'http' in link:
raise ValueError("Bad image url")
return link
def __find_image_type(self, img_link: str) -> str:
"""
Internal Function #do-not-use#
=> Allow to retrieve the right image type (png, jpeg...)
* :img_link: -> Lien de l'image pourllaquel trouver le bon type.
* RETURN -> Type of image.
"""
type = img_link.split('.')[-1]
if '?' in type:
type = type.split('?')[0]
return type
def __download_img(self, url: str, filename: str) -> None:
"""
Internal Function #do-not-use#
=> Allow to download a picture from internet
* :url: -> Image url on the web.
* :filename: -> Full path with name of the future image.
"""
req = request.Request(url, headers=self.headers)
raw_img = request.urlopen(req).read()
with open(filename, 'wb') as img:
img.write(raw_img)
def __initialize_folder(self, folder_path: str) -> None:
"""
Internal Function #do-not-use#
=> Checks if the folder in which to place the images to be uploaded exists and if
not chalk it up. An exception is raised if this folder already exists.
* :folder_path: -> Full path to the working folder (for the download task).
"""
if not os.path.exists(folder_path):
os.mkdir(folder_path)
else:
raise ValueError("The folder already exists, it may already contain images")
# Public functions
def set_messenger_callback(self, callback) -> None:
"""
Setter to define the callback function called when new messages arrive.
* :callback: -> the callback function to call when a message event is emited.
"""
self.messenger = callback
def stop(self) -> None:
"""
Stop the downloading and processing of images (method for use in a thread).
"""
self.thread_run = False
def download(self, url: str, folder_name: str) -> bool:
"""
Start downloading all pictures of a website
* :url: -> The url of the website to annalyse.
* :folder_name: -> The name of the folder in which to upload the photos.
* RETURN -> True if success, False else.
"""
try:
count = 0 # count to 0
folder_path = f"{self.path}/{folder_name}/" # format path
html = self.__get_html(url) # website html
images = self.__find_all_img(html) # find all img balises ing html
self.thread_run = True # set thread_run to true
self.__initialize_folder(folder_path) # initialize formatted path
self.messenger(f"WebPicDownloader found {len(images)} images on the website.")
for i, img in enumerate(images):
if not self.thread_run:
exit()
try:
self.messenger(f"Start downloading image {i}.")
img_link = self.__find_img_link(img)
self.__download_img(img_link, f"{folder_path}image-{i}.{self.__find_image_type(img_link)}")
self.messenger(f"Download of image {i}, done!")
count += 1
except Exception as err:
self.messenger(f"ERROR: Unable to process image {i} -> err[{err}].")
self.messenger(f"WebPicDownloader has processed {count} images out of {len(images)}.")
return True
except HTTPError as err:
self.messenger(f"ERROR: An http error occured -> err[{err}].")
except (ValueError, URLError) as err:
self.messenger(f"ERROT: An error occured with the url -> err[{err}].")
except Exception as err:
self.messenger(f"ERROR: An unknown error occured -> err[{err}]")
return False
if __name__ == "__main__":
wpd = WebPicDownloader()
wpd.set_messenger_callback(lambda msg: print(f"--> {msg}"))
while True:
url = input("Website URL ? ")
name = input("Folder name ? ")
wpd.download(url, name)
if "n" == input("Do you want to continue [Y/n] ? ").lower():
break
print("Good bye !")

View File

@ -1,132 +0,0 @@
import os
from urllib import request
from urllib.error import HTTPError, URLError
from bs4 import BeautifulSoup, Tag, ResultSet
class WebPicDownloader():
"""
WebPicDownloader
webpicdownloader is a simple tool able to
find and download all pictures on a webpage.
@author EndMove <contact@endmove.eu>
@version 1.1.0
"""
# Variables
path: str = None
messenger = None
headers: dict = None
# Constructor
def __init__(self, path: str = os.getcwd()) -> None:
"""Constructor"""
self.path = path
self.messenger = lambda message: print(message)
self.headers = {'User-Agent': "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36"}
# Internal functions
def __get_html(self, url: str) -> str:
"""Allow to retrieve the HTML content of a website"""
req = request.Request(url, headers=self.headers)
response = request.urlopen(req)
return response.read().decode('utf-8')
def __find_all_img(self, html: str) -> ResultSet:
"""Allow to retrieve all images of an html page"""
soup = BeautifulSoup(html, 'html.parser')
return soup.find_all('img')
def __find_img_link(self, img: Tag) -> str:
"""Allow to retrieve the link of a picture"""
if img.get('src'):
link = img.get('src')
elif img.get('data-src'):
link = img.get('data-src')
elif img.get('data-srcset'):
link = img.get('data-srcset')
elif img.get('data-fallback-src'):
link = img.get('data-fallback-src')
else:
raise ValueError("Unable to find image url")
if not 'http' in link:
raise ValueError("Bad image url")
return link
def __find_image_type(self, img_link: str) -> str:
"""Allow to retrieve the right image type"""
type = img_link.split('.')[-1]
if '?' in type:
type = type.split('?')[0]
return type
def __download_img(self, url: str, filename: str) -> None:
"""Allow to download a picture from internet"""
req = request.Request(url, headers=self.headers)
raw_img = request.urlopen(req).read()
with open(filename, 'wb') as img:
img.write(raw_img)
def __initialize_folder(self, folder_path: str) -> None:
"""Init the folder on which put downloaded images"""
if not os.path.exists(folder_path):
os.mkdir(folder_path)
else:
raise ValueError("the folder already exists, it may already contain images")
# Public functions
def set_messenger_callback(self, callback) -> None:
"""
Setter to define the callback function called when new messages arrive.
:callback: -> the callback function to call when a message event is emited.
"""
self.messenger = callback
def download(self, url: str, folder_name: str) -> bool:
"""
Start downloading all pictures of a website
:url: -> The url of the website to annalyse.\n
:folder_name: -> The name of the folder in which to upload the photos.
"""
try:
count = 0
folder_path = f"{self.path}/{folder_name}/"
html = self.__get_html(url)
images = self.__find_all_img(html)
self.__initialize_folder(folder_path)
self.messenger(f"WebPicDownloader found {len(images)} images on the website.")
for i, img in enumerate(images):
try:
self.messenger(f"Start downloading image {i}.")
img_link = self.__find_img_link(img)
self.__download_img(img_link, f"{folder_path}image-{i}.{self.__find_image_type(img_link)}")
self.messenger(f"Download of image {i}, done!")
count += 1
except Exception as err:
self.messenger(f"ERROR: Unable to process image {i} -> err[{err}].")
self.messenger(f"WebPicDownloader has processed {count} images out of {len(images)}.")
return True
except HTTPError as err:
self.messenger(f"ERROR: An http error occured -> err[{err}].")
except (ValueError, URLError) as err:
self.messenger(f"ERROT: An error occured with the url -> err[{err}].")
except Exception as err:
self.messenger(f"ERROR: An unknown error occured -> err[{err}]")
return False
if __name__ == "__main__":
wpd = WebPicDownloader()
wpd.set_messenger_callback(lambda msg: print(f"--> {msg}"))
while True:
url = input("Website URL ? ")
name = input("Folder name ? ")
wpd.download(url, name)
if "n" == input("Do you want to continue [Y/n] ? ").lower():
break
print("Good bye !")

View File

@ -10,37 +10,45 @@ class AsyncTask(threading.Thread):
@author Jérémi Nihart / EndMove @author Jérémi Nihart / EndMove
@link https://git.endmove.eu/EndMove/WebPicDownloader @link https://git.endmove.eu/EndMove/WebPicDownloader
@version 1.0.0 @version 1.0.1
@since 2022-09-01 @since 2022-09-04
""" """
# Variables # Variables
__stop = None __run_callback = None
__callback = None __run_args: list = None
__args: list = None __quite_callback = None
__quite_args = None
# Constructor # Constructor
def __init__(self, callback, args=()) -> None: def __init__(self, rcallback, rargs=None, qcallback=None, qargs=None) -> None:
""" """
Constructor Constructor
=> Spacify here the function that should be launched asynchronously. Then use the => Indicate in the constructors, the parameters for launching the process, as
function {AsyncTask.start()} to start the thread and the processing. well as the stop otpions. Then use the function {AsyncTask.start()} to start
the thread and the processing.
[!]: The function {AsyncTask.run()} is reserved for the thread and should not be run [!]: The function {AsyncTask.run()} is reserved for the thread and should not be run
from outside. from outside.
:callback: -> Is the function to launch asynchronously. * :rcallback: -> Asynchronous start function.
:args: -> Argument to pass to the function when executing it. * :rargs: -> Arguments for the asyncrone startup function.
* :qcallback: -> Stop function to stop asynchronous processing.
* :qargs: -> Argument for the stop function.
""" """
super().__init__() super().__init__()
self.__stop = threading.Event() self.__run_callback = rcallback
self.__callback = callback self.__run_args = rargs if rargs else ()
self.__args = args self.__quite_callback = qcallback if qcallback else lambda: print("exiting thread")
self.__quite_args = qargs if qargs else ()
def run(self) -> None: def run(self) -> None:
""" """
[!] : This function should not be used! Start the task with {AsyncTask.start()}! [Internal function of (threading.Thread)]
[!] : This function must not be used! Start the task with {AsyncTask.start()} !
""" """
self.__callback(*self.__args) self.__run_callback(*self.__run_args)
def stop(self) -> None: def stop(self) -> None:
# TODO """
self.__stop.set() Stop the running task, make sure you have previously defined the stop function.
"""
self.__quite_callback(*self.__quite_args)