Major advance + start adapting my webpic script for a graphic version

This commit is contained in:
2022-08-31 21:05:20 +02:00
parent 16794bf488
commit a93a88d344
4 changed files with 94 additions and 72 deletions

View File

@@ -11,30 +11,33 @@ class WebPicDownloader():
find and download all pictures on a webpage.
@author EndMove <contact@endmove.eu>
@version 1.0.0
@version 1.1.0
"""
# Variables
path: str
headers: dict = {'User-Agent': "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36"}
path: str = None
message_callback: function = None
headers: dict = None
# Constructor
def __init__(self, path: str = os.getcwd()) -> None:
"""Constructor"""
self.path = path
self.message_callback = lambda message: print(message)
self.headers = {'User-Agent': "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36"}
# Internal functions
def __getHtml(self, url: str) -> str:
def __get_html(self, url: str) -> str:
"""Allow to retrieve the HTML content of a website"""
req = request.Request(url, headers=self.headers)
response = request.urlopen(req)
return response.read().decode('utf-8')
def __findAllImg(self, html: str) -> ResultSet:
def __find_all_img(self, html: str) -> ResultSet:
"""Allow to retrieve all images of an html page"""
soup = BeautifulSoup(html, 'html.parser')
return soup.find_all('img')
def __findImgLink(self, img: Tag) -> str:
def __find_img_link(self, img: Tag) -> str:
"""Allow to retrieve the link of a picture"""
if img.get('src'):
link = img.get('src')
@@ -50,48 +53,56 @@ class WebPicDownloader():
raise ValueError("Bad image link")
return link
def __findImageType(self, imgLink: str) -> str:
def __find_image_type(self, img_link: str) -> str:
"""Allow to retrieve the right image type"""
type = imgLink.split('.')[-1]
type = img_link.split('.')[-1]
if '?' in type:
type = type.split('?')[0]
return type
def __downloadImg(self, url: str, filename: str) -> None:
def __download_img(self, url: str, filename: str) -> None:
"""Allow to download a picture from internet"""
req = request.Request(url, headers=self.headers)
rawImg = request.urlopen(req).read()
raw_img = request.urlopen(req).read()
with open(filename, 'wb') as img:
img.write(rawImg)
img.write(raw_img)
def __initializeFolder(self, folderPath: str) -> None:
def __initialize_folder(self, folder_path: str) -> None:
"""Init the folder on which put downloaded images"""
if not os.path.exists(folderPath):
os.mkdir(folderPath)
if not os.path.exists(folder_path):
os.mkdir(folder_path)
else:
raise ValueError("the folder already exists, it may already contain images")
# Public functions
def download(self, url: str, folderName: str) -> None:
def set_message_callback(self, callback) -> None:
"""
Setter to define the callback function in case of new messages.
:callback: -> the callback function to call when a message event is emited.
"""
self.message_callback = callback
def download(self, url: str, folder_name: str) -> None:
"""
Start downloading all pictures of a website
:url: -> The url of the website to annalyse.\n
:folderName: -> The name of the folder in which to upload the photos.
:folder_name: -> The name of the folder in which to upload the photos.
"""
try:
count = 0
folderPath = f"{self.path}/{folderName}/"
html = self.__getHtml(url)
images = self.__findAllImg(html)
folder_path = f"{self.path}/{folder_name}/"
html = self.__get_html(url)
images = self.__find_all_img(html)
self.__initializeFolder(folderPath)
self.__initialize_folder(folder_path)
print(f"\nWebPicDownload found {len(images)} images on the website.")
for i, img in enumerate(images):
try:
imgLink = self.__findImgLink(img)
self.__downloadImg(imgLink, f"{folderPath}image-{i}.{self.__findImageType(imgLink)}")
img_link = self.__find_img_link(img)
self.__download_img(img_link, f"{folder_path}image-{i}.{self.__find_image_type(img_link)}")
print(f"SUCCESS: File n°{i} successfuly downloaded.")
count += 1
except ValueError as err: