Major advance + start adapting my webpic script for a graphic version

2022-08-31 21:05:20 +02:00
parent 16794bf488
commit a93a88d344
4 changed files with 94 additions and 72 deletions
--- a/model/webpic.py
+++ b/model/webpic.py
@@ -11,30 +11,33 @@ class WebPicDownloader():
    find and download all pictures on a webpage.

    @author     EndMove <contact@endmove.eu>
-    @version    1.0.0
+    @version    1.1.0
    """
    # Variables
-    path: str
-    headers: dict = {'User-Agent': "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36"}
+    path: str = None
+    message_callback: function = None
+    headers: dict = None

    # Constructor
    def __init__(self, path: str = os.getcwd()) -> None:
        """Constructor"""
        self.path = path
+        self.message_callback = lambda message: print(message)
+        self.headers = {'User-Agent': "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36"}

    # Internal functions
-    def __getHtml(self, url: str) -> str:
+    def __get_html(self, url: str) -> str:
        """Allow to retrieve the HTML content of a website"""
        req = request.Request(url, headers=self.headers)
        response = request.urlopen(req)
        return response.read().decode('utf-8')

-    def __findAllImg(self, html: str) -> ResultSet:
+    def __find_all_img(self, html: str) -> ResultSet:
        """Allow to retrieve all images of an html page"""
        soup = BeautifulSoup(html, 'html.parser')
        return soup.find_all('img')

-    def __findImgLink(self, img: Tag) -> str:
+    def __find_img_link(self, img: Tag) -> str:
        """Allow to retrieve the link of a picture"""
        if img.get('src'):
            link = img.get('src')
@@ -50,48 +53,56 @@ class WebPicDownloader():
            raise ValueError("Bad image link")
        return link

-    def __findImageType(self, imgLink: str) -> str:
+    def __find_image_type(self, img_link: str) -> str:
        """Allow to retrieve the right image type"""
-        type = imgLink.split('.')[-1]
+        type = img_link.split('.')[-1]
        if '?' in type:
            type = type.split('?')[0]
        return type

-    def __downloadImg(self, url: str, filename: str) -> None:
+    def __download_img(self, url: str, filename: str) -> None:
        """Allow to download a picture from internet"""
        req = request.Request(url, headers=self.headers)
-        rawImg = request.urlopen(req).read()
+        raw_img = request.urlopen(req).read()
        with open(filename, 'wb') as img:
-            img.write(rawImg)
+            img.write(raw_img)

-    def __initializeFolder(self, folderPath: str) -> None:
+    def __initialize_folder(self, folder_path: str) -> None:
        """Init the folder on which put downloaded images"""
-        if not os.path.exists(folderPath):
-            os.mkdir(folderPath)
+        if not os.path.exists(folder_path):
+            os.mkdir(folder_path)
        else:
            raise ValueError("the folder already exists, it may already contain images")

    # Public functions
-    def download(self, url: str, folderName: str) -> None:
+    def set_message_callback(self, callback) -> None:
+        """
+        Setter to define the callback function in case of new messages.
+
+        :callback: -> the callback function to call when a message event is emited.
+        """
+        self.message_callback = callback
+
+    def download(self, url: str, folder_name: str) -> None:
        """
        Start downloading all pictures of a website
        
        :url: -> The url of the website to annalyse.\n
-        :folderName: -> The name of the folder in which to upload the photos.
+        :folder_name: -> The name of the folder in which to upload the photos.
        """
        try:
            count = 0
-            folderPath = f"{self.path}/{folderName}/"
-            html = self.__getHtml(url)
-            images = self.__findAllImg(html)
+            folder_path = f"{self.path}/{folder_name}/"
+            html = self.__get_html(url)
+            images = self.__find_all_img(html)

-            self.__initializeFolder(folderPath)
+            self.__initialize_folder(folder_path)
            print(f"\nWebPicDownload found {len(images)} images on the website.")

            for i, img in enumerate(images):
                try:
-                    imgLink = self.__findImgLink(img)
-                    self.__downloadImg(imgLink, f"{folderPath}image-{i}.{self.__findImageType(imgLink)}")
+                    img_link = self.__find_img_link(img)
+                    self.__download_img(img_link, f"{folder_path}image-{i}.{self.__find_image_type(img_link)}")
                    print(f"SUCCESS: File n°{i} successfuly downloaded.")
                    count += 1
                except ValueError as err: