Compare commits
2 Commits
f4a9c9bed7
...
a77d41c6ec
Author | SHA1 | Date | |
---|---|---|---|
|
a77d41c6ec | ||
|
96c692aef7 |
@ -6,8 +6,14 @@ for i in range(1, 139):
|
||||
r = requests.get(url)
|
||||
soup = BeautifulSoup(r.text, 'html.parser')
|
||||
pre_section = soup.find('pre')
|
||||
title_section = soup.find('h1')
|
||||
|
||||
if pre_section:
|
||||
text = pre_section.get_text()
|
||||
title = title_section.get_text()
|
||||
with open(f"data/episode_{i}.txt", "w") as f:
|
||||
f.write(text)
|
||||
f.write(
|
||||
f"Darknet Diaries - {title}\n" +
|
||||
text
|
||||
)
|
||||
print(title)
|
||||
|
10
main.py
10
main.py
@ -11,6 +11,7 @@ service_context = ServiceContext.from_defaults(llm=llm)
|
||||
set_global_service_context(service_context)
|
||||
|
||||
if not os.path.exists("./index/lock"):
|
||||
print("Generating index...")
|
||||
documents = []
|
||||
for filename in os.listdir("./data"):
|
||||
episode_number = re.search(r'\d+', filename).group()
|
||||
@ -31,12 +32,17 @@ if not os.path.exists("./index/lock"):
|
||||
index.storage_context.persist(persist_dir="./index")
|
||||
open("./index/lock", 'a').close()
|
||||
else:
|
||||
print("Loading index...")
|
||||
storage_context = StorageContext.from_defaults(persist_dir="./index")
|
||||
index = load_index_from_storage(storage_context)
|
||||
|
||||
template = (
|
||||
"You are now an expert on the Darknet Diaries podcast. \n"
|
||||
"Please answer this question by referring to the podcast: {query_str}\n"
|
||||
"You have been trained on the Darknet Diaries podcast transcripts with data from october 6 2023."
|
||||
"You are now an expert about it and will answer as such. You know about every episode up to number 138. \n"
|
||||
"----------------\n"
|
||||
"Here is the context: {context_str}"
|
||||
"----------------\n"
|
||||
"Please answer this question by referring to the podcast: {query_str}"
|
||||
)
|
||||
qa_template = PromptTemplate(template)
|
||||
query_engine = index.as_query_engine(text_qa_template=qa_template)
|
||||
|
Loading…
Reference in New Issue
Block a user