From 96c692aef76f8dffe1203548fea7b237c1aeeb44 Mon Sep 17 00:00:00 2001 From: Romain Quinet Date: Fri, 6 Oct 2023 22:43:28 +0200 Subject: [PATCH] Include episode title and number into data --- download_transcripts.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/download_transcripts.py b/download_transcripts.py index 5b9f382..f2a45f0 100644 --- a/download_transcripts.py +++ b/download_transcripts.py @@ -6,8 +6,14 @@ for i in range(1, 139): r = requests.get(url) soup = BeautifulSoup(r.text, 'html.parser') pre_section = soup.find('pre') + title_section = soup.find('h1') if pre_section: text = pre_section.get_text() + title = title_section.get_text() with open(f"data/episode_{i}.txt", "w") as f: - f.write(text) \ No newline at end of file + f.write( + f"Darknet Diaries - {title}\n" + + text + ) + print(title)