Major bug fixes, optimization + major advance

2022-09-05 23:13:38 +02:00
parent 539b75cb09
commit f8f7832dd7
11 changed files with 322 additions and 161 deletions
--- a/model/WebPicDownloader.py
+++ b/model/WebPicDownloader.py
@@ -1,10 +1,31 @@
 import os
+from enum import Enum
 from threading import Semaphore, Thread
 from urllib import request
-from urllib.error import HTTPError, URLError
 from bs4 import BeautifulSoup, Tag, ResultSet


+class MessageType(Enum):
+    """
+    MessageType
+
+    Is an enumeration to define the different types of messages sent by the webpic messenger.
+
+    There are 3 types of messages.
+    - log -> log
+    - error -> err
+    - success -> suc
+
+    @author     Jérémi Nihart / EndMove
+    @link       https://git.endmove.eu/EndMove/WebPicDownloader
+    @version    1.0.0
+    @since      2022-09-05
+    """
+    LOG = 'log'
+    ERROR = 'err'
+    SUCCESS = 'suc'
+
+
 class WebPicDownloader(Thread):
    """
    WebPicDownloader
@@ -12,19 +33,18 @@ class WebPicDownloader(Thread):
    Webpicdownloader is a scraping tool that allows you to browse a web page,
    find the images and download them. This tool is easily usable and implementable
    in an application. It has been designed to be executed in an integrated thread
-    in an asynchronous way as well as more classically in a synchronous way. This
-    tool allows to define 3 callback functions, one for events, one in case of
-    success and one in case of failure. It also has an integrated entry point
-    allowing it to be directly executed in terminal mode.
+    in an asynchronous way. This tool allows to define 3 callback functions, one for
+    events, one in case of success and one in case of failure. It also has an
+    integrated entry point allowing it to be directly executed in terminal mode.

    @author     EndMove <contact@endmove.eu>
-    @version    1.2.0
+    @version    1.2.1
    """
    # Variables
    __callbacks: dict = None    # Callback dictionary
-    __settings: dict = None     #
-    __dl_infos: dict = None     #
-    __sem: Semaphore = None     #
+    __settings: dict = None     # Webpic basics settings
+    __dl_infos: dict = None     # Download informations
+    __sem: Semaphore = None     # Semaphore for the webpic worker

    _exit: bool = None          # When set to True quit the thread

@@ -39,7 +59,6 @@ class WebPicDownloader(Thread):
        * :path: -> Folder in which the tool will create the download folders and place the images.
        * :headers: -> Dictionary allowing to define the different parameters present in the header
            of the requests sent by WebPic.
-        * :asynchronous: -> True: launch the download in a thread, False: the opposite.
        * :messenger: -> Callback function messenger (see setter).
        * :success: -> Callback function success (see setter).
        * :failure: -> Callback function failure (see setter).
@@ -60,8 +79,6 @@ class WebPicDownloader(Thread):
            'website_url': 'url',
            'download_name': 'name',
            'download_path': 'full_path',
-            'tot_image_count': 0,
-            'dl_image_count': 0,
            'running': False
        }  
        self.__sem = Semaphore(0)
@@ -69,6 +86,7 @@ class WebPicDownloader(Thread):

        self.start()  # start deamon

+
    # Internal functions
    def __get_html(self, url: str) -> str:
        """
@@ -82,6 +100,7 @@ class WebPicDownloader(Thread):
        response = request.urlopen(req)
        return response.read().decode('utf-8')

+
    def __find_all_img(self, html: str) -> ResultSet:
        """
            Internal Function #do-not-use#
@@ -93,6 +112,7 @@ class WebPicDownloader(Thread):
        soup = BeautifulSoup(html, 'html.parser')
        return soup.find_all('img')

+
    def __find_img_link(self, img: Tag) -> str:
        """
            Internal Function #do-not-use#
@@ -115,6 +135,7 @@ class WebPicDownloader(Thread):
            raise ValueError("Bad image url")
        return link

+
    def __find_image_type(self, img_link: str) -> str:
        """
            Internal Function #do-not-use#
@@ -128,6 +149,7 @@ class WebPicDownloader(Thread):
            type = type.split('?')[0]
        return type

+
    def __download_img(self, url: str, filename: str) -> None:
        """
            Internal Function #do-not-use#
@@ -141,6 +163,7 @@ class WebPicDownloader(Thread):
        with open(filename, 'wb') as img:
            img.write(raw_img)

+
    def __initialize_folder(self, folder_path: str) -> None:
        """
            Internal Function #do-not-use#
@@ -154,12 +177,17 @@ class WebPicDownloader(Thread):
        else:
            raise ValueError("The folder already exists, it may already contain images")

-    def __msg(self, message: str) -> None:
+
+    def __msg(self, message: str, type:MessageType=MessageType.LOG) -> None:
        """
            Internal Function #do-not-use#
        => Use the messenger callback to send a message.
+
+        * :message: -> the message to send through callback
+        * :type: -> message type, can be ['log', 'err', 'suc']
        """
-        self.__callbacks.get('messenger')(message)
+        self.__callbacks.get('messenger')(message, type)
+

    # Public functions
    def set_success_callback(self, callback) -> None:
@@ -170,6 +198,7 @@ class WebPicDownloader(Thread):
        """
        self.__callbacks['success'] = callback

+
    def set_failure_callback(self, callback) -> None:
        """
        Setter to define the callback function called when the download fails.
@@ -178,6 +207,7 @@ class WebPicDownloader(Thread):
        """
        self.__callbacks['failure'] = callback

+
    def set_messenger_callback(self, callback) -> None:
        """
        Setter to define the callback function called when new messages arrive.
@@ -186,74 +216,103 @@ class WebPicDownloader(Thread):
        """
        self.__callbacks['messenger'] = callback
    
+
    def start_downloading(self, url: str, name: str) -> None:
        """
-        TODO desc
+        Start downloading all pictures of a website.
+        
+        * :url: -> The url of the website to annalyse.
+        * :folder_name: -> The name of the folder in which to upload the photos.
        """
-        if self.__dl_infos.get('running'):
-            print("bussy")
+        if not self.is_alive:
+            self.__msg("Opss, the download thread is not running, please restart webpic.", MessageType.ERROR)
+        elif self.__dl_infos.get('running'):
+            self.__msg("Opss, the download thread is busy.", MessageType.ERROR)
        else:
            self.__dl_infos['website_url'] = url
            self.__dl_infos['download_name'] = name
            self.__sem.release()

+
    def stop_downloading(self, block=False) -> None:
        """
-        TODO DESC
+        Stops the download after the current item is processed and exit the downloading thread.
+        
+        <!> Attention once called it will not be possible any more to download. <!>
+
+        * :block: -> If true, the function will block until the worker has finished working, if
+        False(default value), the stop message will be thrown and the program will continue.
        """
        self.__exit = True
        self.__sem.release()
        if block:
            self.join()
    
+    
+    def is_download_running(self) -> bool:
+        """
+        Indique si un téléchargement est en cours
+
+        * RETURN -> True if yes, False else.
+        """
+        return self.__dl_infos['running'];
+    
+
    # Thread corp function
    def run(self) -> None:
        while True:
-            self.__sem.acquire()
+            self.__sem.acquire()  # waiting the authorization to process

-            if self.__exit:
+            if self.__exit:  # check if the exiting is requested
                return

-            self.__dl_infos['running'] = True  # reserv run
+            self.__dl_infos['running'] = True  # indicate that the thread is busy
+
            try:
+                # parse infos from url
                html = self.__get_html(self.__dl_infos.get('website_url'))  # website html
                images = self.__find_all_img(html)  # find all img balises ing html

-                self.__dl_infos['tot_image_count'] = len(images)  # count total image
-                self.__dl_infos['dl_image_count'] = 0  # set download count to 0
+                # setting up download informaations
+                tot_count = len(images)  # count total image
+                dl_count = 0  # set download count to 0
                self.__dl_infos['download_path'] = f"{self.__settings.get('root_path')}/{self.__dl_infos.get('download_name')}/"  # format path

+                # init working directory
                self.__initialize_folder(self.__dl_infos.get('download_path'))  # Init download folder
-                self.__msg(f"WebPicDownloader found {self.__dl_infos.get('tot_image_count')} images on the website.")
+                self.__msg(f"WebPicDownloader found {tot_count} images on the website.")

-                # process pictures
+                # start images processing
                for i, img in enumerate(images):
                    try:
                        self.__msg(f"Start downloading image {i}.")
+                        
                        img_link = self.__find_img_link(img)  # find image link
                        self.__download_img(img_link, f"{self.__dl_infos.get('download_path')}image-{i}.{self.__find_image_type(img_link)}")  # download the image
+                        
                        self.__msg(f"Download of image {i}, done!")
-                        self.__dl_infos['dl_image_count'] += 1  # increment download counter
+                        dl_count += 1  # increment download counter
                    except Exception as err:
                        self.__msg(f"ERROR: Unable to process image {i} -> err[{err}].")
-                self.__msg(f"WebPicDownloader has processed {self.__dl_infos.get('dl_image_count')} images out of {self.__dl_infos.get('tot_image_count')}.")
+                # end images processing
+
+                self.__msg(f"WebPicDownloader has processed {dl_count} images out of {tot_count}.")
                self.__callbacks.get('success')()  # success, launch callback
            except Exception as err:
                self.__msg(f"ERROR: An error occured -> err[{err}]")
                self.__callbacks.get('failure')()  # error, launch callback
-            self.__dl_infos['running'] = False  # free run
+
+            self.__dl_infos['running'] = False  # inficate that the thread is free
+

 if __name__ == "__main__":
    # Internal entry point for testing and consol use.
    wpd = WebPicDownloader()
-    def lol(msg):
-        pass
-    wpd.set_messenger_callback(lol)
    while True:
        url = input("Website URL ? ")
        name = input("Folder name ? ")
        wpd.start_downloading(url, name)
        if "n" == input("Do you want to continue [Y/n] ? ").lower():
-            wpd.stop_downloading()
            break
+    wpd.stop_downloading(block=True)
    print("Good bye !")