diff --git a/download_transcripts.py b/download_transcripts.py index 5b9f382..f2a45f0 100644 --- a/download_transcripts.py +++ b/download_transcripts.py @@ -6,8 +6,14 @@ for i in range(1, 139): r = requests.get(url) soup = BeautifulSoup(r.text, 'html.parser') pre_section = soup.find('pre') + title_section = soup.find('h1') if pre_section: text = pre_section.get_text() + title = title_section.get_text() with open(f"data/episode_{i}.txt", "w") as f: - f.write(text) \ No newline at end of file + f.write( + f"Darknet Diaries - {title}\n" + + text + ) + print(title)