Fixed the program stop bug. Adapted webpic script for multi-tasking + added comments
This commit is contained in:
parent
7002439532
commit
03a6d9b54f
@ -17,14 +17,15 @@ class HomeController:
|
||||
__main_controller = None
|
||||
__view = None
|
||||
__webpic = None
|
||||
__download_task = None
|
||||
|
||||
# Constructor
|
||||
def __init__(self, controller: MainController, webpic) -> None:
|
||||
"""
|
||||
Constructor
|
||||
|
||||
:controller: -> The main application cpntroller.
|
||||
:webpic: -> The webpicdownloader instance.
|
||||
* :controller: -> The main application cpntroller.
|
||||
* :webpic: -> The webpicdownloader instance.
|
||||
"""
|
||||
# Setub variables
|
||||
self.__main_controller = controller
|
||||
@ -39,7 +40,7 @@ class HomeController:
|
||||
[function for view]
|
||||
=> Define the view of this controller.
|
||||
|
||||
:view: -> The view that this controller manage.
|
||||
* :view: -> The view that this controller manage.
|
||||
"""
|
||||
self.__view = view
|
||||
self.__webpic.set_messenger_callback(view.add_log)
|
||||
@ -51,7 +52,7 @@ class HomeController:
|
||||
[event function for view]
|
||||
=> Call this event method when the user requests to change the window.
|
||||
|
||||
:frame: -> The frame we want to launch.
|
||||
* :frame: -> The frame we want to launch.
|
||||
"""
|
||||
self.__main_controller.change_frame(frame)
|
||||
|
||||
@ -60,20 +61,16 @@ class HomeController:
|
||||
[event function for view]
|
||||
=> Call this event method when the user requests to download
|
||||
|
||||
:url: -> The url of the website to use for pic-download.\n
|
||||
:name: -> The name of the folder in which put pictures.
|
||||
* :url: -> The url of the website to use for pic-download.
|
||||
* :name: -> The name of the folder in which put pictures.
|
||||
"""
|
||||
# Define the download task function (to call in a AsyncTask)
|
||||
def download_task():
|
||||
self.__view.clear_logs()
|
||||
if self.__webpic.download(url, name):
|
||||
self.__view.show_success_message("The download has been successfully completed.")
|
||||
else:
|
||||
self.__view.show_error_message("A critical error preventing the download occurred, check the logs.")
|
||||
|
||||
# Verify variable and start AsyncTask
|
||||
if url.strip() and name.strip():
|
||||
AsyncTask(download_task)
|
||||
self.__download_task = AsyncTask(
|
||||
rcallback=self.__async_task_start,
|
||||
rargs=(url, name),
|
||||
qcallback=self.__async_task_stop
|
||||
)
|
||||
self.__download_task.start()
|
||||
else:
|
||||
self.__view.show_error_message("Opss, the url or folder name are not valid!")
|
||||
# END View events
|
||||
@ -84,5 +81,35 @@ class HomeController:
|
||||
[event function for controller]
|
||||
=> Call this event when a request to exit is thrown.
|
||||
"""
|
||||
print("Quit... homecontroller")
|
||||
self.__download_task.stop()
|
||||
print("Quit... homecontroller END")
|
||||
# END Controller methods
|
||||
|
||||
# START Task methods
|
||||
def __async_task_start(self, url, name) -> None:
|
||||
"""
|
||||
[CallBack start function]
|
||||
=> Start Callback function for asynctask, be careful once executed in asynctask this
|
||||
function will keep its controller context. In short it's as if the thread was
|
||||
launched in the controller and the execution never left it.
|
||||
|
||||
* :url: -> Url for webpic.
|
||||
* :name: -> Working dir name for webpic.
|
||||
"""
|
||||
print("start callback called")
|
||||
self.__view.clear_logs()
|
||||
if self.__webpic.download(url, name):
|
||||
self.__view.show_success_message("The download has been successfully completed.")
|
||||
else:
|
||||
self.__view.show_error_message("A critical error preventing the download occurred, check the logs.")
|
||||
|
||||
def __async_task_stop(self) -> None:
|
||||
"""
|
||||
[CallBack stop function]
|
||||
=> End Callback function for asynctask, be careful once executed in asynctask this
|
||||
function will keep its controller context. In short it's as if the thread was
|
||||
launched in the controller and the execution never left it.
|
||||
"""
|
||||
print("stop callback called")
|
||||
self.__webpic.stop()
|
||||
# END Task methods
|
@ -46,11 +46,12 @@ class MainController:
|
||||
def on_quite(self) -> None:
|
||||
"""
|
||||
[event function for view]
|
||||
=> Event launch when you ask to quit the program.
|
||||
=> Event launch when you ask to quit the program. This event is propagated
|
||||
to the subscribers, they can eventually cancel the event
|
||||
"""
|
||||
for callback in self.__quite_event_subscribers:
|
||||
callback()
|
||||
print("on_quite") # TODO remove
|
||||
if callback():
|
||||
return
|
||||
self.__view.close_window() # End the program
|
||||
|
||||
def on_check_for_update(self) -> None:
|
||||
|
2
main.py
2
main.py
@ -3,7 +3,7 @@ from controller.HomeController import HomeController
|
||||
from controller.InfoController import InfoController
|
||||
from controller.MainController import MainController
|
||||
from controller.Frames import Frames
|
||||
from model.webpic import WebPicDownloader
|
||||
from model.WebPicDownloader import WebPicDownloader
|
||||
from view.HomeView import HomeView
|
||||
from view.InfoView import InfoView
|
||||
from view.MainWindow import MainWindow
|
||||
|
191
model/WebPicDownloader.py
Normal file
191
model/WebPicDownloader.py
Normal file
@ -0,0 +1,191 @@
|
||||
import os
|
||||
from urllib import request
|
||||
from urllib.error import HTTPError, URLError
|
||||
from bs4 import BeautifulSoup, Tag, ResultSet
|
||||
|
||||
|
||||
class WebPicDownloader():
|
||||
"""
|
||||
WebPicDownloader
|
||||
|
||||
webpicdownloader is a simple tool able to find and download all pictures on a webpage.
|
||||
This tool is customizable and allows to define the working folder, the headers present
|
||||
in the http requests which will be emitted, a call back function named messenger which
|
||||
will be called at each event, and in addition the script is thread safe and allows to
|
||||
be stopped in the middle of treatment with a simple call to the stop function.
|
||||
|
||||
@author EndMove <contact@endmove.eu>
|
||||
@version 1.1.1
|
||||
"""
|
||||
# Variables
|
||||
path: str = None # Main working folder directory
|
||||
headers: dict = None # Header parameters
|
||||
messenger = None # Event callback function
|
||||
thread_run: bool = None # Idicate if the task can still run (for use in a thread)
|
||||
|
||||
# Constructor
|
||||
def __init__(self, path: str = None, headers: dict = None, messenger = None) -> None:
|
||||
"""
|
||||
Constructor
|
||||
=> TODO
|
||||
|
||||
* :path: -> Folder where the tool will download the images.
|
||||
* :headers: -> Dictionary allowing to define the different parameters present in the header of the requests sent by WebPic.
|
||||
"""
|
||||
self.path = path if path else os.getcwd()
|
||||
self.headers = headers if headers else {
|
||||
'User-Agent': "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36"
|
||||
}
|
||||
self.messenger = messenger if messenger else lambda msg: print(msg)
|
||||
self.thread_run = True
|
||||
|
||||
# Internal functions
|
||||
def __get_html(self, url: str) -> str:
|
||||
"""
|
||||
Internal Function #do-not-use#
|
||||
=> Allow to retrieve the HTML content of a website.
|
||||
|
||||
* :url: -> The url of the site for which we want to get the content of the HTML page.
|
||||
* RETURN -> Web page content.
|
||||
"""
|
||||
req = request.Request(url, headers=self.headers)
|
||||
response = request.urlopen(req)
|
||||
return response.read().decode('utf-8')
|
||||
|
||||
def __find_all_img(self, html: str) -> ResultSet:
|
||||
"""
|
||||
Internal Function #do-not-use#
|
||||
=> Allow to retrieve all images of an html page.
|
||||
|
||||
* :html: -> Html code in which to search for image balises.
|
||||
* RETURN -> Iterable with all image balises.
|
||||
"""
|
||||
soup = BeautifulSoup(html, 'html.parser')
|
||||
return soup.find_all('img')
|
||||
|
||||
def __find_img_link(self, img: Tag) -> str:
|
||||
"""
|
||||
Internal Function #do-not-use#
|
||||
=> Allow to retrieve the link of a picture.
|
||||
|
||||
* :img: -> Image tag {@code bs4.Tag} for which to search the link of an image.
|
||||
* RETURN -> Image link.
|
||||
"""
|
||||
if img.get('src'):
|
||||
link = img.get('src')
|
||||
elif img.get('data-src'):
|
||||
link = img.get('data-src')
|
||||
elif img.get('data-srcset'):
|
||||
link = img.get('data-srcset')
|
||||
elif img.get('data-fallback-src'):
|
||||
link = img.get('data-fallback-src')
|
||||
else:
|
||||
raise ValueError("Unable to find image url")
|
||||
if not 'http' in link:
|
||||
raise ValueError("Bad image url")
|
||||
return link
|
||||
|
||||
def __find_image_type(self, img_link: str) -> str:
|
||||
"""
|
||||
Internal Function #do-not-use#
|
||||
=> Allow to retrieve the right image type (png, jpeg...)
|
||||
|
||||
* :img_link: -> Lien de l'image pourllaquel trouver le bon type.
|
||||
* RETURN -> Type of image.
|
||||
"""
|
||||
type = img_link.split('.')[-1]
|
||||
if '?' in type:
|
||||
type = type.split('?')[0]
|
||||
return type
|
||||
|
||||
def __download_img(self, url: str, filename: str) -> None:
|
||||
"""
|
||||
Internal Function #do-not-use#
|
||||
=> Allow to download a picture from internet
|
||||
|
||||
* :url: -> Image url on the web.
|
||||
* :filename: -> Full path with name of the future image.
|
||||
"""
|
||||
req = request.Request(url, headers=self.headers)
|
||||
raw_img = request.urlopen(req).read()
|
||||
with open(filename, 'wb') as img:
|
||||
img.write(raw_img)
|
||||
|
||||
def __initialize_folder(self, folder_path: str) -> None:
|
||||
"""
|
||||
Internal Function #do-not-use#
|
||||
=> Checks if the folder in which to place the images to be uploaded exists and if
|
||||
not chalk it up. An exception is raised if this folder already exists.
|
||||
|
||||
* :folder_path: -> Full path to the working folder (for the download task).
|
||||
"""
|
||||
if not os.path.exists(folder_path):
|
||||
os.mkdir(folder_path)
|
||||
else:
|
||||
raise ValueError("The folder already exists, it may already contain images")
|
||||
|
||||
# Public functions
|
||||
def set_messenger_callback(self, callback) -> None:
|
||||
"""
|
||||
Setter to define the callback function called when new messages arrive.
|
||||
|
||||
* :callback: -> the callback function to call when a message event is emited.
|
||||
"""
|
||||
self.messenger = callback
|
||||
|
||||
def stop(self) -> None:
|
||||
"""
|
||||
Stop the downloading and processing of images (method for use in a thread).
|
||||
"""
|
||||
self.thread_run = False
|
||||
|
||||
def download(self, url: str, folder_name: str) -> bool:
|
||||
"""
|
||||
Start downloading all pictures of a website
|
||||
|
||||
* :url: -> The url of the website to annalyse.
|
||||
* :folder_name: -> The name of the folder in which to upload the photos.
|
||||
* RETURN -> True if success, False else.
|
||||
"""
|
||||
try:
|
||||
count = 0 # count to 0
|
||||
folder_path = f"{self.path}/{folder_name}/" # format path
|
||||
html = self.__get_html(url) # website html
|
||||
images = self.__find_all_img(html) # find all img balises ing html
|
||||
|
||||
self.thread_run = True # set thread_run to true
|
||||
self.__initialize_folder(folder_path) # initialize formatted path
|
||||
self.messenger(f"WebPicDownloader found {len(images)} images on the website.")
|
||||
|
||||
for i, img in enumerate(images):
|
||||
if not self.thread_run:
|
||||
exit()
|
||||
try:
|
||||
self.messenger(f"Start downloading image {i}.")
|
||||
img_link = self.__find_img_link(img)
|
||||
self.__download_img(img_link, f"{folder_path}image-{i}.{self.__find_image_type(img_link)}")
|
||||
self.messenger(f"Download of image {i}, done!")
|
||||
count += 1
|
||||
except Exception as err:
|
||||
self.messenger(f"ERROR: Unable to process image {i} -> err[{err}].")
|
||||
|
||||
self.messenger(f"WebPicDownloader has processed {count} images out of {len(images)}.")
|
||||
return True
|
||||
except HTTPError as err:
|
||||
self.messenger(f"ERROR: An http error occured -> err[{err}].")
|
||||
except (ValueError, URLError) as err:
|
||||
self.messenger(f"ERROT: An error occured with the url -> err[{err}].")
|
||||
except Exception as err:
|
||||
self.messenger(f"ERROR: An unknown error occured -> err[{err}]")
|
||||
return False
|
||||
|
||||
if __name__ == "__main__":
|
||||
wpd = WebPicDownloader()
|
||||
wpd.set_messenger_callback(lambda msg: print(f"--> {msg}"))
|
||||
while True:
|
||||
url = input("Website URL ? ")
|
||||
name = input("Folder name ? ")
|
||||
wpd.download(url, name)
|
||||
if "n" == input("Do you want to continue [Y/n] ? ").lower():
|
||||
break
|
||||
print("Good bye !")
|
132
model/webpic.py
132
model/webpic.py
@ -1,132 +0,0 @@
|
||||
import os
|
||||
from urllib import request
|
||||
from urllib.error import HTTPError, URLError
|
||||
from bs4 import BeautifulSoup, Tag, ResultSet
|
||||
|
||||
|
||||
class WebPicDownloader():
|
||||
"""
|
||||
WebPicDownloader
|
||||
|
||||
webpicdownloader is a simple tool able to
|
||||
find and download all pictures on a webpage.
|
||||
|
||||
@author EndMove <contact@endmove.eu>
|
||||
@version 1.1.0
|
||||
"""
|
||||
# Variables
|
||||
path: str = None
|
||||
messenger = None
|
||||
headers: dict = None
|
||||
|
||||
# Constructor
|
||||
def __init__(self, path: str = os.getcwd()) -> None:
|
||||
"""Constructor"""
|
||||
self.path = path
|
||||
self.messenger = lambda message: print(message)
|
||||
self.headers = {'User-Agent': "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36"}
|
||||
|
||||
# Internal functions
|
||||
def __get_html(self, url: str) -> str:
|
||||
"""Allow to retrieve the HTML content of a website"""
|
||||
req = request.Request(url, headers=self.headers)
|
||||
response = request.urlopen(req)
|
||||
return response.read().decode('utf-8')
|
||||
|
||||
def __find_all_img(self, html: str) -> ResultSet:
|
||||
"""Allow to retrieve all images of an html page"""
|
||||
soup = BeautifulSoup(html, 'html.parser')
|
||||
return soup.find_all('img')
|
||||
|
||||
def __find_img_link(self, img: Tag) -> str:
|
||||
"""Allow to retrieve the link of a picture"""
|
||||
if img.get('src'):
|
||||
link = img.get('src')
|
||||
elif img.get('data-src'):
|
||||
link = img.get('data-src')
|
||||
elif img.get('data-srcset'):
|
||||
link = img.get('data-srcset')
|
||||
elif img.get('data-fallback-src'):
|
||||
link = img.get('data-fallback-src')
|
||||
else:
|
||||
raise ValueError("Unable to find image url")
|
||||
if not 'http' in link:
|
||||
raise ValueError("Bad image url")
|
||||
return link
|
||||
|
||||
def __find_image_type(self, img_link: str) -> str:
|
||||
"""Allow to retrieve the right image type"""
|
||||
type = img_link.split('.')[-1]
|
||||
if '?' in type:
|
||||
type = type.split('?')[0]
|
||||
return type
|
||||
|
||||
def __download_img(self, url: str, filename: str) -> None:
|
||||
"""Allow to download a picture from internet"""
|
||||
req = request.Request(url, headers=self.headers)
|
||||
raw_img = request.urlopen(req).read()
|
||||
with open(filename, 'wb') as img:
|
||||
img.write(raw_img)
|
||||
|
||||
def __initialize_folder(self, folder_path: str) -> None:
|
||||
"""Init the folder on which put downloaded images"""
|
||||
if not os.path.exists(folder_path):
|
||||
os.mkdir(folder_path)
|
||||
else:
|
||||
raise ValueError("the folder already exists, it may already contain images")
|
||||
|
||||
# Public functions
|
||||
def set_messenger_callback(self, callback) -> None:
|
||||
"""
|
||||
Setter to define the callback function called when new messages arrive.
|
||||
|
||||
:callback: -> the callback function to call when a message event is emited.
|
||||
"""
|
||||
self.messenger = callback
|
||||
|
||||
def download(self, url: str, folder_name: str) -> bool:
|
||||
"""
|
||||
Start downloading all pictures of a website
|
||||
|
||||
:url: -> The url of the website to annalyse.\n
|
||||
:folder_name: -> The name of the folder in which to upload the photos.
|
||||
"""
|
||||
try:
|
||||
count = 0
|
||||
folder_path = f"{self.path}/{folder_name}/"
|
||||
html = self.__get_html(url)
|
||||
images = self.__find_all_img(html)
|
||||
|
||||
self.__initialize_folder(folder_path)
|
||||
self.messenger(f"WebPicDownloader found {len(images)} images on the website.")
|
||||
|
||||
for i, img in enumerate(images):
|
||||
try:
|
||||
self.messenger(f"Start downloading image {i}.")
|
||||
img_link = self.__find_img_link(img)
|
||||
self.__download_img(img_link, f"{folder_path}image-{i}.{self.__find_image_type(img_link)}")
|
||||
self.messenger(f"Download of image {i}, done!")
|
||||
count += 1
|
||||
except Exception as err:
|
||||
self.messenger(f"ERROR: Unable to process image {i} -> err[{err}].")
|
||||
|
||||
self.messenger(f"WebPicDownloader has processed {count} images out of {len(images)}.")
|
||||
return True
|
||||
except HTTPError as err:
|
||||
self.messenger(f"ERROR: An http error occured -> err[{err}].")
|
||||
except (ValueError, URLError) as err:
|
||||
self.messenger(f"ERROT: An error occured with the url -> err[{err}].")
|
||||
except Exception as err:
|
||||
self.messenger(f"ERROR: An unknown error occured -> err[{err}]")
|
||||
return False
|
||||
|
||||
if __name__ == "__main__":
|
||||
wpd = WebPicDownloader()
|
||||
wpd.set_messenger_callback(lambda msg: print(f"--> {msg}"))
|
||||
while True:
|
||||
url = input("Website URL ? ")
|
||||
name = input("Folder name ? ")
|
||||
wpd.download(url, name)
|
||||
if "n" == input("Do you want to continue [Y/n] ? ").lower():
|
||||
break
|
||||
print("Good bye !")
|
@ -10,37 +10,45 @@ class AsyncTask(threading.Thread):
|
||||
|
||||
@author Jérémi Nihart / EndMove
|
||||
@link https://git.endmove.eu/EndMove/WebPicDownloader
|
||||
@version 1.0.0
|
||||
@since 2022-09-01
|
||||
@version 1.0.1
|
||||
@since 2022-09-04
|
||||
"""
|
||||
# Variables
|
||||
__stop = None
|
||||
__callback = None
|
||||
__args: list = None
|
||||
__run_callback = None
|
||||
__run_args: list = None
|
||||
__quite_callback = None
|
||||
__quite_args = None
|
||||
|
||||
# Constructor
|
||||
def __init__(self, callback, args=()) -> None:
|
||||
def __init__(self, rcallback, rargs=None, qcallback=None, qargs=None) -> None:
|
||||
"""
|
||||
Constructor
|
||||
=> Spacify here the function that should be launched asynchronously. Then use the
|
||||
function {AsyncTask.start()} to start the thread and the processing.
|
||||
=> Indicate in the constructors, the parameters for launching the process, as
|
||||
well as the stop otpions. Then use the function {AsyncTask.start()} to start
|
||||
the thread and the processing.
|
||||
[!]: The function {AsyncTask.run()} is reserved for the thread and should not be run
|
||||
from outside.
|
||||
|
||||
:callback: -> Is the function to launch asynchronously.
|
||||
:args: -> Argument to pass to the function when executing it.
|
||||
* :rcallback: -> Asynchronous start function.
|
||||
* :rargs: -> Arguments for the asyncrone startup function.
|
||||
* :qcallback: -> Stop function to stop asynchronous processing.
|
||||
* :qargs: -> Argument for the stop function.
|
||||
"""
|
||||
super().__init__()
|
||||
self.__stop = threading.Event()
|
||||
self.__callback = callback
|
||||
self.__args = args
|
||||
self.__run_callback = rcallback
|
||||
self.__run_args = rargs if rargs else ()
|
||||
self.__quite_callback = qcallback if qcallback else lambda: print("exiting thread")
|
||||
self.__quite_args = qargs if qargs else ()
|
||||
|
||||
def run(self) -> None:
|
||||
"""
|
||||
[!] : This function should not be used! Start the task with {AsyncTask.start()}!
|
||||
[Internal function of (threading.Thread)]
|
||||
[!] : This function must not be used! Start the task with {AsyncTask.start()} !
|
||||
"""
|
||||
self.__callback(*self.__args)
|
||||
self.__run_callback(*self.__run_args)
|
||||
|
||||
def stop(self) -> None:
|
||||
# TODO
|
||||
self.__stop.set()
|
||||
"""
|
||||
Stop the running task, make sure you have previously defined the stop function.
|
||||
"""
|
||||
self.__quite_callback(*self.__quite_args)
|
Reference in New Issue
Block a user