Major advance + start adapting my webpic script for a graphic version
This commit is contained in:
@@ -11,30 +11,33 @@ class WebPicDownloader():
|
||||
find and download all pictures on a webpage.
|
||||
|
||||
@author EndMove <contact@endmove.eu>
|
||||
@version 1.0.0
|
||||
@version 1.1.0
|
||||
"""
|
||||
# Variables
|
||||
path: str
|
||||
headers: dict = {'User-Agent': "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36"}
|
||||
path: str = None
|
||||
message_callback: function = None
|
||||
headers: dict = None
|
||||
|
||||
# Constructor
|
||||
def __init__(self, path: str = os.getcwd()) -> None:
|
||||
"""Constructor"""
|
||||
self.path = path
|
||||
self.message_callback = lambda message: print(message)
|
||||
self.headers = {'User-Agent': "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36"}
|
||||
|
||||
# Internal functions
|
||||
def __getHtml(self, url: str) -> str:
|
||||
def __get_html(self, url: str) -> str:
|
||||
"""Allow to retrieve the HTML content of a website"""
|
||||
req = request.Request(url, headers=self.headers)
|
||||
response = request.urlopen(req)
|
||||
return response.read().decode('utf-8')
|
||||
|
||||
def __findAllImg(self, html: str) -> ResultSet:
|
||||
def __find_all_img(self, html: str) -> ResultSet:
|
||||
"""Allow to retrieve all images of an html page"""
|
||||
soup = BeautifulSoup(html, 'html.parser')
|
||||
return soup.find_all('img')
|
||||
|
||||
def __findImgLink(self, img: Tag) -> str:
|
||||
def __find_img_link(self, img: Tag) -> str:
|
||||
"""Allow to retrieve the link of a picture"""
|
||||
if img.get('src'):
|
||||
link = img.get('src')
|
||||
@@ -50,48 +53,56 @@ class WebPicDownloader():
|
||||
raise ValueError("Bad image link")
|
||||
return link
|
||||
|
||||
def __findImageType(self, imgLink: str) -> str:
|
||||
def __find_image_type(self, img_link: str) -> str:
|
||||
"""Allow to retrieve the right image type"""
|
||||
type = imgLink.split('.')[-1]
|
||||
type = img_link.split('.')[-1]
|
||||
if '?' in type:
|
||||
type = type.split('?')[0]
|
||||
return type
|
||||
|
||||
def __downloadImg(self, url: str, filename: str) -> None:
|
||||
def __download_img(self, url: str, filename: str) -> None:
|
||||
"""Allow to download a picture from internet"""
|
||||
req = request.Request(url, headers=self.headers)
|
||||
rawImg = request.urlopen(req).read()
|
||||
raw_img = request.urlopen(req).read()
|
||||
with open(filename, 'wb') as img:
|
||||
img.write(rawImg)
|
||||
img.write(raw_img)
|
||||
|
||||
def __initializeFolder(self, folderPath: str) -> None:
|
||||
def __initialize_folder(self, folder_path: str) -> None:
|
||||
"""Init the folder on which put downloaded images"""
|
||||
if not os.path.exists(folderPath):
|
||||
os.mkdir(folderPath)
|
||||
if not os.path.exists(folder_path):
|
||||
os.mkdir(folder_path)
|
||||
else:
|
||||
raise ValueError("the folder already exists, it may already contain images")
|
||||
|
||||
# Public functions
|
||||
def download(self, url: str, folderName: str) -> None:
|
||||
def set_message_callback(self, callback) -> None:
|
||||
"""
|
||||
Setter to define the callback function in case of new messages.
|
||||
|
||||
:callback: -> the callback function to call when a message event is emited.
|
||||
"""
|
||||
self.message_callback = callback
|
||||
|
||||
def download(self, url: str, folder_name: str) -> None:
|
||||
"""
|
||||
Start downloading all pictures of a website
|
||||
|
||||
:url: -> The url of the website to annalyse.\n
|
||||
:folderName: -> The name of the folder in which to upload the photos.
|
||||
:folder_name: -> The name of the folder in which to upload the photos.
|
||||
"""
|
||||
try:
|
||||
count = 0
|
||||
folderPath = f"{self.path}/{folderName}/"
|
||||
html = self.__getHtml(url)
|
||||
images = self.__findAllImg(html)
|
||||
folder_path = f"{self.path}/{folder_name}/"
|
||||
html = self.__get_html(url)
|
||||
images = self.__find_all_img(html)
|
||||
|
||||
self.__initializeFolder(folderPath)
|
||||
self.__initialize_folder(folder_path)
|
||||
print(f"\nWebPicDownload found {len(images)} images on the website.")
|
||||
|
||||
for i, img in enumerate(images):
|
||||
try:
|
||||
imgLink = self.__findImgLink(img)
|
||||
self.__downloadImg(imgLink, f"{folderPath}image-{i}.{self.__findImageType(imgLink)}")
|
||||
img_link = self.__find_img_link(img)
|
||||
self.__download_img(img_link, f"{folder_path}image-{i}.{self.__find_image_type(img_link)}")
|
||||
print(f"SUCCESS: File n°{i} successfuly downloaded.")
|
||||
count += 1
|
||||
except ValueError as err:
|
||||
|
||||
Reference in New Issue
Block a user