WebPicDownloader/model/webpic.py

import os
from urllib import request
from urllib.error import HTTPError, URLError
from bs4 import BeautifulSoup, Tag, ResultSet

class WebPicDownloader():
    """
    WebPicDownloader

    webpicdownloader is a simple tool able to
    find and download all pictures on a webpage.

    @author     EndMove <contact@endmove.eu>
    @version    1.0.0
    """
    # Variables
    path: str
    headers: dict = {'User-Agent': "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36"}

    # Constructor
    def __init__(self, path: str = os.getcwd()) -> None:
        """Constructor"""
        self.path = path

    # Internal functions
    def __getHtml(self, url: str) -> str:
        """Allow to retrieve the HTML content of a website"""
        req = request.Request(url, headers=self.headers)
        response = request.urlopen(req)
        return response.read().decode('utf-8')

    def __findAllImg(self, html: str) -> ResultSet:
        """Allow to retrieve all images of an html page"""
        soup = BeautifulSoup(html, 'html.parser')
        return soup.find_all('img')

    def __findImgLink(self, img: Tag) -> str:
        """Allow to retrieve the link of a picture"""
        if img.get('src'):
            link = img.get('src')
        elif img.get('data-src'):
            link = img.get('data-src')
        elif img.get('data-srcset'):
            link = img.get('data-srcset')
        elif img.get('data-fallback-src'):
            link = img.get('data-fallback-src')
        else:
            raise ValueError("Unable to find image url")
        if not 'http' in link:
            raise ValueError("Bad image link")
        return link

    def __findImageType(self, imgLink: str) -> str:
        """Allow to retrieve the right image type"""
        type = imgLink.split('.')[-1]
        if '?' in type:
            type = type.split('?')[0]
        return type

    def __downloadImg(self, url: str, filename: str) -> None:
        """Allow to download a picture from internet"""
        req = request.Request(url, headers=self.headers)
        rawImg = request.urlopen(req).read()
        with open(filename, 'wb') as img:
            img.write(rawImg)

    def __initializeFolder(self, folderPath: str) -> None:
        """Init the folder on which put downloaded images"""
        if not os.path.exists(folderPath):
            os.mkdir(folderPath)
        else:
            raise ValueError("the folder already exists, it may already contain images")

    # Public functions
    def download(self, url: str, folderName: str) -> None:
        """
        Start downloading all pictures of a website
        
        :url: -> The url of the website to annalyse.\n
        :folderName: -> The name of the folder in which to upload the photos.
        """
        try:
            count = 0
            folderPath = f"{self.path}/{folderName}/"
            html = self.__getHtml(url)
            images = self.__findAllImg(html)

            self.__initializeFolder(folderPath)
            print(f"\nWebPicDownload found {len(images)} images on the website.")

            for i, img in enumerate(images):
                try:
                    imgLink = self.__findImgLink(img)
                    self.__downloadImg(imgLink, f"{folderPath}image-{i}.{self.__findImageType(imgLink)}")
                    print(f"SUCCESS: File n°{i} successfuly downloaded.")
                    count += 1
                except ValueError as err:
                    print(f"ERROR: Unable to process image n°{i} -> [{err}].")
                except Exception as err:
                    print(f"ERROR: Unable to process image n°{i}, an unknown error occured -> [{err}].")
                    
            print(f"WebPicDownloader has processed {count} images out of {len(images)}.")
        except HTTPError as err:
            print(f"ERROR: An http error occured -> [{err}].")
        except (ValueError, URLError) as err:
            print(f"ERROT: An error occured with the url -> [{err}].")
        except Exception as err:
            print(f"ERROR: An unknown error occured -> [{err}]")

if __name__ == "__main__":
    wpd = WebPicDownloader()
    while True:
        url = input("Website URL ? ")
        name = input("Folder name ? ")
        wpd.download(url, name)
        if "n" == input("Do you want to continue [Y/n] ? ").lower():
            break
    print("Good bye !")
Creation Webpic script + start creating graphic interface 2022-08-30 12:28:59 +02:00			`import os`
			`from urllib import request`
			`from urllib.error import HTTPError, URLError`
			`from bs4 import BeautifulSoup, Tag, ResultSet`

			`class WebPicDownloader():`
			`"""`
			`WebPicDownloader`

			`webpicdownloader is a simple tool able to`
			`find and download all pictures on a webpage.`

			`@author EndMove <contact@endmove.eu>`
			`@version 1.0.0`
			`"""`
			`# Variables`
			`path: str`
			`headers: dict = {'User-Agent': "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36"}`

			`# Constructor`
			`def __init__(self, path: str = os.getcwd()) -> None:`
			`"""Constructor"""`
			`self.path = path`

			`# Internal functions`
			`def __getHtml(self, url: str) -> str:`
			`"""Allow to retrieve the HTML content of a website"""`
			`req = request.Request(url, headers=self.headers)`
			`response = request.urlopen(req)`
			`return response.read().decode('utf-8')`

			`def __findAllImg(self, html: str) -> ResultSet:`
			`"""Allow to retrieve all images of an html page"""`
			`soup = BeautifulSoup(html, 'html.parser')`
			`return soup.find_all('img')`

			`def __findImgLink(self, img: Tag) -> str:`
			`"""Allow to retrieve the link of a picture"""`
			`if img.get('src'):`
			`link = img.get('src')`
			`elif img.get('data-src'):`
			`link = img.get('data-src')`
			`elif img.get('data-srcset'):`
			`link = img.get('data-srcset')`
			`elif img.get('data-fallback-src'):`
			`link = img.get('data-fallback-src')`
			`else:`
			`raise ValueError("Unable to find image url")`
			`if not 'http' in link:`
			`raise ValueError("Bad image link")`
			`return link`

			`def __findImageType(self, imgLink: str) -> str:`
			`"""Allow to retrieve the right image type"""`
			`type = imgLink.split('.')[-1]`
			`if '?' in type:`
			`type = type.split('?')[0]`
			`return type`

			`def __downloadImg(self, url: str, filename: str) -> None:`
			`"""Allow to download a picture from internet"""`
			`req = request.Request(url, headers=self.headers)`
			`rawImg = request.urlopen(req).read()`
			`with open(filename, 'wb') as img:`
			`img.write(rawImg)`

			`def __initializeFolder(self, folderPath: str) -> None:`
			`"""Init the folder on which put downloaded images"""`
			`if not os.path.exists(folderPath):`
			`os.mkdir(folderPath)`
			`else:`
			`raise ValueError("the folder already exists, it may already contain images")`

			`# Public functions`
			`def download(self, url: str, folderName: str) -> None:`
			`"""`
			`Start downloading all pictures of a website`

			`:url: -> The url of the website to annalyse.\n`
			`:folderName: -> The name of the folder in which to upload the photos.`
			`"""`
			`try:`
			`count = 0`
			`folderPath = f"{self.path}/{folderName}/"`
			`html = self.__getHtml(url)`
			`images = self.__findAllImg(html)`

			`self.__initializeFolder(folderPath)`
			`print(f"\nWebPicDownload found {len(images)} images on the website.")`

			`for i, img in enumerate(images):`
			`try:`
			`imgLink = self.__findImgLink(img)`
			`self.__downloadImg(imgLink, f"{folderPath}image-{i}.{self.__findImageType(imgLink)}")`
			`print(f"SUCCESS: File n°{i} successfuly downloaded.")`
			`count += 1`
			`except ValueError as err:`
			`print(f"ERROR: Unable to process image n°{i} -> [{err}].")`
			`except Exception as err:`
			`print(f"ERROR: Unable to process image n°{i}, an unknown error occured -> [{err}].")`

			`print(f"WebPicDownloader has processed {count} images out of {len(images)}.")`
			`except HTTPError as err:`
			`print(f"ERROR: An http error occured -> [{err}].")`
			`except (ValueError, URLError) as err:`
			`print(f"ERROT: An error occured with the url -> [{err}].")`
			`except Exception as err:`
			`print(f"ERROR: An unknown error occured -> [{err}]")`

			`if __name__ == "__main__":`
			`wpd = WebPicDownloader()`
			`while True:`
			`url = input("Website URL ? ")`
			`name = input("Folder name ? ")`
			`wpd.download(url, name)`
			`if "n" == input("Do you want to continue [Y/n] ? ").lower():`
			`break`
			`print("Good bye !")`