diff --git a/controller/HomeController.py b/controller/HomeController.py index 65fca5f..2f0a090 100644 --- a/controller/HomeController.py +++ b/controller/HomeController.py @@ -17,14 +17,15 @@ class HomeController: __main_controller = None __view = None __webpic = None + __download_task = None # Constructor def __init__(self, controller: MainController, webpic) -> None: """ Constructor - :controller: -> The main application cpntroller. - :webpic: -> The webpicdownloader instance. + * :controller: -> The main application cpntroller. + * :webpic: -> The webpicdownloader instance. """ # Setub variables self.__main_controller = controller @@ -39,7 +40,7 @@ class HomeController: [function for view] => Define the view of this controller. - :view: -> The view that this controller manage. + * :view: -> The view that this controller manage. """ self.__view = view self.__webpic.set_messenger_callback(view.add_log) @@ -51,7 +52,7 @@ class HomeController: [event function for view] => Call this event method when the user requests to change the window. - :frame: -> The frame we want to launch. + * :frame: -> The frame we want to launch. """ self.__main_controller.change_frame(frame) @@ -60,20 +61,16 @@ class HomeController: [event function for view] => Call this event method when the user requests to download - :url: -> The url of the website to use for pic-download.\n - :name: -> The name of the folder in which put pictures. + * :url: -> The url of the website to use for pic-download. + * :name: -> The name of the folder in which put pictures. """ - # Define the download task function (to call in a AsyncTask) - def download_task(): - self.__view.clear_logs() - if self.__webpic.download(url, name): - self.__view.show_success_message("The download has been successfully completed.") - else: - self.__view.show_error_message("A critical error preventing the download occurred, check the logs.") - - # Verify variable and start AsyncTask - if url.strip() and name.strip() : - AsyncTask(download_task) + if url.strip() and name.strip(): + self.__download_task = AsyncTask( + rcallback=self.__async_task_start, + rargs=(url, name), + qcallback=self.__async_task_stop + ) + self.__download_task.start() else: self.__view.show_error_message("Opss, the url or folder name are not valid!") # END View events @@ -84,5 +81,35 @@ class HomeController: [event function for controller] => Call this event when a request to exit is thrown. """ - print("Quit... homecontroller") - # END Controller methods \ No newline at end of file + self.__download_task.stop() + print("Quit... homecontroller END") + # END Controller methods + + # START Task methods + def __async_task_start(self, url, name) -> None: + """ + [CallBack start function] + => Start Callback function for asynctask, be careful once executed in asynctask this + function will keep its controller context. In short it's as if the thread was + launched in the controller and the execution never left it. + + * :url: -> Url for webpic. + * :name: -> Working dir name for webpic. + """ + print("start callback called") + self.__view.clear_logs() + if self.__webpic.download(url, name): + self.__view.show_success_message("The download has been successfully completed.") + else: + self.__view.show_error_message("A critical error preventing the download occurred, check the logs.") + + def __async_task_stop(self) -> None: + """ + [CallBack stop function] + => End Callback function for asynctask, be careful once executed in asynctask this + function will keep its controller context. In short it's as if the thread was + launched in the controller and the execution never left it. + """ + print("stop callback called") + self.__webpic.stop() + # END Task methods \ No newline at end of file diff --git a/controller/MainController.py b/controller/MainController.py index 5c8f6b7..7538188 100644 --- a/controller/MainController.py +++ b/controller/MainController.py @@ -46,11 +46,12 @@ class MainController: def on_quite(self) -> None: """ [event function for view] - => Event launch when you ask to quit the program. + => Event launch when you ask to quit the program. This event is propagated + to the subscribers, they can eventually cancel the event """ for callback in self.__quite_event_subscribers: - callback() - print("on_quite") # TODO remove + if callback(): + return self.__view.close_window() # End the program def on_check_for_update(self) -> None: diff --git a/main.py b/main.py index 132e7e6..7e3e4bf 100644 --- a/main.py +++ b/main.py @@ -3,7 +3,7 @@ from controller.HomeController import HomeController from controller.InfoController import InfoController from controller.MainController import MainController from controller.Frames import Frames -from model.webpic import WebPicDownloader +from model.WebPicDownloader import WebPicDownloader from view.HomeView import HomeView from view.InfoView import InfoView from view.MainWindow import MainWindow diff --git a/model/WebPicDownloader.py b/model/WebPicDownloader.py new file mode 100644 index 0000000..b7efd98 --- /dev/null +++ b/model/WebPicDownloader.py @@ -0,0 +1,191 @@ +import os +from urllib import request +from urllib.error import HTTPError, URLError +from bs4 import BeautifulSoup, Tag, ResultSet + + +class WebPicDownloader(): + """ + WebPicDownloader + + webpicdownloader is a simple tool able to find and download all pictures on a webpage. + This tool is customizable and allows to define the working folder, the headers present + in the http requests which will be emitted, a call back function named messenger which + will be called at each event, and in addition the script is thread safe and allows to + be stopped in the middle of treatment with a simple call to the stop function. + + @author EndMove + @version 1.1.1 + """ + # Variables + path: str = None # Main working folder directory + headers: dict = None # Header parameters + messenger = None # Event callback function + thread_run: bool = None # Idicate if the task can still run (for use in a thread) + + # Constructor + def __init__(self, path: str = None, headers: dict = None, messenger = None) -> None: + """ + Constructor + => TODO + + * :path: -> Folder where the tool will download the images. + * :headers: -> Dictionary allowing to define the different parameters present in the header of the requests sent by WebPic. + """ + self.path = path if path else os.getcwd() + self.headers = headers if headers else { + 'User-Agent': "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36" + } + self.messenger = messenger if messenger else lambda msg: print(msg) + self.thread_run = True + + # Internal functions + def __get_html(self, url: str) -> str: + """ + Internal Function #do-not-use# + => Allow to retrieve the HTML content of a website. + + * :url: -> The url of the site for which we want to get the content of the HTML page. + * RETURN -> Web page content. + """ + req = request.Request(url, headers=self.headers) + response = request.urlopen(req) + return response.read().decode('utf-8') + + def __find_all_img(self, html: str) -> ResultSet: + """ + Internal Function #do-not-use# + => Allow to retrieve all images of an html page. + + * :html: -> Html code in which to search for image balises. + * RETURN -> Iterable with all image balises. + """ + soup = BeautifulSoup(html, 'html.parser') + return soup.find_all('img') + + def __find_img_link(self, img: Tag) -> str: + """ + Internal Function #do-not-use# + => Allow to retrieve the link of a picture. + + * :img: -> Image tag {@code bs4.Tag} for which to search the link of an image. + * RETURN -> Image link. + """ + if img.get('src'): + link = img.get('src') + elif img.get('data-src'): + link = img.get('data-src') + elif img.get('data-srcset'): + link = img.get('data-srcset') + elif img.get('data-fallback-src'): + link = img.get('data-fallback-src') + else: + raise ValueError("Unable to find image url") + if not 'http' in link: + raise ValueError("Bad image url") + return link + + def __find_image_type(self, img_link: str) -> str: + """ + Internal Function #do-not-use# + => Allow to retrieve the right image type (png, jpeg...) + + * :img_link: -> Lien de l'image pourllaquel trouver le bon type. + * RETURN -> Type of image. + """ + type = img_link.split('.')[-1] + if '?' in type: + type = type.split('?')[0] + return type + + def __download_img(self, url: str, filename: str) -> None: + """ + Internal Function #do-not-use# + => Allow to download a picture from internet + + * :url: -> Image url on the web. + * :filename: -> Full path with name of the future image. + """ + req = request.Request(url, headers=self.headers) + raw_img = request.urlopen(req).read() + with open(filename, 'wb') as img: + img.write(raw_img) + + def __initialize_folder(self, folder_path: str) -> None: + """ + Internal Function #do-not-use# + => Checks if the folder in which to place the images to be uploaded exists and if + not chalk it up. An exception is raised if this folder already exists. + + * :folder_path: -> Full path to the working folder (for the download task). + """ + if not os.path.exists(folder_path): + os.mkdir(folder_path) + else: + raise ValueError("The folder already exists, it may already contain images") + + # Public functions + def set_messenger_callback(self, callback) -> None: + """ + Setter to define the callback function called when new messages arrive. + + * :callback: -> the callback function to call when a message event is emited. + """ + self.messenger = callback + + def stop(self) -> None: + """ + Stop the downloading and processing of images (method for use in a thread). + """ + self.thread_run = False + + def download(self, url: str, folder_name: str) -> bool: + """ + Start downloading all pictures of a website + + * :url: -> The url of the website to annalyse. + * :folder_name: -> The name of the folder in which to upload the photos. + * RETURN -> True if success, False else. + """ + try: + count = 0 # count to 0 + folder_path = f"{self.path}/{folder_name}/" # format path + html = self.__get_html(url) # website html + images = self.__find_all_img(html) # find all img balises ing html + + self.thread_run = True # set thread_run to true + self.__initialize_folder(folder_path) # initialize formatted path + self.messenger(f"WebPicDownloader found {len(images)} images on the website.") + + for i, img in enumerate(images): + if not self.thread_run: + exit() + try: + self.messenger(f"Start downloading image {i}.") + img_link = self.__find_img_link(img) + self.__download_img(img_link, f"{folder_path}image-{i}.{self.__find_image_type(img_link)}") + self.messenger(f"Download of image {i}, done!") + count += 1 + except Exception as err: + self.messenger(f"ERROR: Unable to process image {i} -> err[{err}].") + + self.messenger(f"WebPicDownloader has processed {count} images out of {len(images)}.") + return True + except HTTPError as err: + self.messenger(f"ERROR: An http error occured -> err[{err}].") + except (ValueError, URLError) as err: + self.messenger(f"ERROT: An error occured with the url -> err[{err}].") + except Exception as err: + self.messenger(f"ERROR: An unknown error occured -> err[{err}]") + return False + +if __name__ == "__main__": + wpd = WebPicDownloader() + wpd.set_messenger_callback(lambda msg: print(f"--> {msg}")) + while True: + url = input("Website URL ? ") + name = input("Folder name ? ") + wpd.download(url, name) + if "n" == input("Do you want to continue [Y/n] ? ").lower(): + break + print("Good bye !") \ No newline at end of file diff --git a/model/webpic.py b/model/webpic.py deleted file mode 100644 index 4a4757c..0000000 --- a/model/webpic.py +++ /dev/null @@ -1,132 +0,0 @@ -import os -from urllib import request -from urllib.error import HTTPError, URLError -from bs4 import BeautifulSoup, Tag, ResultSet - - -class WebPicDownloader(): - """ - WebPicDownloader - - webpicdownloader is a simple tool able to - find and download all pictures on a webpage. - - @author EndMove - @version 1.1.0 - """ - # Variables - path: str = None - messenger = None - headers: dict = None - - # Constructor - def __init__(self, path: str = os.getcwd()) -> None: - """Constructor""" - self.path = path - self.messenger = lambda message: print(message) - self.headers = {'User-Agent': "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36"} - - # Internal functions - def __get_html(self, url: str) -> str: - """Allow to retrieve the HTML content of a website""" - req = request.Request(url, headers=self.headers) - response = request.urlopen(req) - return response.read().decode('utf-8') - - def __find_all_img(self, html: str) -> ResultSet: - """Allow to retrieve all images of an html page""" - soup = BeautifulSoup(html, 'html.parser') - return soup.find_all('img') - - def __find_img_link(self, img: Tag) -> str: - """Allow to retrieve the link of a picture""" - if img.get('src'): - link = img.get('src') - elif img.get('data-src'): - link = img.get('data-src') - elif img.get('data-srcset'): - link = img.get('data-srcset') - elif img.get('data-fallback-src'): - link = img.get('data-fallback-src') - else: - raise ValueError("Unable to find image url") - if not 'http' in link: - raise ValueError("Bad image url") - return link - - def __find_image_type(self, img_link: str) -> str: - """Allow to retrieve the right image type""" - type = img_link.split('.')[-1] - if '?' in type: - type = type.split('?')[0] - return type - - def __download_img(self, url: str, filename: str) -> None: - """Allow to download a picture from internet""" - req = request.Request(url, headers=self.headers) - raw_img = request.urlopen(req).read() - with open(filename, 'wb') as img: - img.write(raw_img) - - def __initialize_folder(self, folder_path: str) -> None: - """Init the folder on which put downloaded images""" - if not os.path.exists(folder_path): - os.mkdir(folder_path) - else: - raise ValueError("the folder already exists, it may already contain images") - - # Public functions - def set_messenger_callback(self, callback) -> None: - """ - Setter to define the callback function called when new messages arrive. - - :callback: -> the callback function to call when a message event is emited. - """ - self.messenger = callback - - def download(self, url: str, folder_name: str) -> bool: - """ - Start downloading all pictures of a website - - :url: -> The url of the website to annalyse.\n - :folder_name: -> The name of the folder in which to upload the photos. - """ - try: - count = 0 - folder_path = f"{self.path}/{folder_name}/" - html = self.__get_html(url) - images = self.__find_all_img(html) - - self.__initialize_folder(folder_path) - self.messenger(f"WebPicDownloader found {len(images)} images on the website.") - - for i, img in enumerate(images): - try: - self.messenger(f"Start downloading image {i}.") - img_link = self.__find_img_link(img) - self.__download_img(img_link, f"{folder_path}image-{i}.{self.__find_image_type(img_link)}") - self.messenger(f"Download of image {i}, done!") - count += 1 - except Exception as err: - self.messenger(f"ERROR: Unable to process image {i} -> err[{err}].") - - self.messenger(f"WebPicDownloader has processed {count} images out of {len(images)}.") - return True - except HTTPError as err: - self.messenger(f"ERROR: An http error occured -> err[{err}].") - except (ValueError, URLError) as err: - self.messenger(f"ERROT: An error occured with the url -> err[{err}].") - except Exception as err: - self.messenger(f"ERROR: An unknown error occured -> err[{err}]") - return False - -if __name__ == "__main__": - wpd = WebPicDownloader() - wpd.set_messenger_callback(lambda msg: print(f"--> {msg}")) - while True: - url = input("Website URL ? ") - name = input("Folder name ? ") - wpd.download(url, name) - if "n" == input("Do you want to continue [Y/n] ? ").lower(): - break - print("Good bye !") \ No newline at end of file diff --git a/util/AsyncTask.py b/util/AsyncTask.py index 120b50b..05f9021 100644 --- a/util/AsyncTask.py +++ b/util/AsyncTask.py @@ -10,37 +10,45 @@ class AsyncTask(threading.Thread): @author Jérémi Nihart / EndMove @link https://git.endmove.eu/EndMove/WebPicDownloader - @version 1.0.0 - @since 2022-09-01 + @version 1.0.1 + @since 2022-09-04 """ # Variables - __stop = None - __callback = None - __args: list = None + __run_callback = None + __run_args: list = None + __quite_callback = None + __quite_args = None # Constructor - def __init__(self, callback, args=()) -> None: + def __init__(self, rcallback, rargs=None, qcallback=None, qargs=None) -> None: """ Constructor - => Spacify here the function that should be launched asynchronously. Then use the - function {AsyncTask.start()} to start the thread and the processing. + => Indicate in the constructors, the parameters for launching the process, as + well as the stop otpions. Then use the function {AsyncTask.start()} to start + the thread and the processing. [!]: The function {AsyncTask.run()} is reserved for the thread and should not be run from outside. - :callback: -> Is the function to launch asynchronously. - :args: -> Argument to pass to the function when executing it. + * :rcallback: -> Asynchronous start function. + * :rargs: -> Arguments for the asyncrone startup function. + * :qcallback: -> Stop function to stop asynchronous processing. + * :qargs: -> Argument for the stop function. """ super().__init__() - self.__stop = threading.Event() - self.__callback = callback - self.__args = args + self.__run_callback = rcallback + self.__run_args = rargs if rargs else () + self.__quite_callback = qcallback if qcallback else lambda: print("exiting thread") + self.__quite_args = qargs if qargs else () def run(self) -> None: """ - [!] : This function should not be used! Start the task with {AsyncTask.start()}! + [Internal function of (threading.Thread)] + [!] : This function must not be used! Start the task with {AsyncTask.start()} ! """ - self.__callback(*self.__args) + self.__run_callback(*self.__run_args) def stop(self) -> None: - # TODO - self.__stop.set() \ No newline at end of file + """ + Stop the running task, make sure you have previously defined the stop function. + """ + self.__quite_callback(*self.__quite_args) \ No newline at end of file