Compare commits

..

No commits in common. "a77d41c6ecdd225f66a482649a947aaaa3491f83" and "f4a9c9bed7efcf0ab3c3768756cf361856b0d14e" have entirely different histories.

2 changed files with 3 additions and 15 deletions

View File

@ -6,14 +6,8 @@ for i in range(1, 139):
r = requests.get(url) r = requests.get(url)
soup = BeautifulSoup(r.text, 'html.parser') soup = BeautifulSoup(r.text, 'html.parser')
pre_section = soup.find('pre') pre_section = soup.find('pre')
title_section = soup.find('h1')
if pre_section: if pre_section:
text = pre_section.get_text() text = pre_section.get_text()
title = title_section.get_text()
with open(f"data/episode_{i}.txt", "w") as f: with open(f"data/episode_{i}.txt", "w") as f:
f.write( f.write(text)
f"Darknet Diaries - {title}\n" +
text
)
print(title)

10
main.py
View File

@ -11,7 +11,6 @@ service_context = ServiceContext.from_defaults(llm=llm)
set_global_service_context(service_context) set_global_service_context(service_context)
if not os.path.exists("./index/lock"): if not os.path.exists("./index/lock"):
print("Generating index...")
documents = [] documents = []
for filename in os.listdir("./data"): for filename in os.listdir("./data"):
episode_number = re.search(r'\d+', filename).group() episode_number = re.search(r'\d+', filename).group()
@ -32,17 +31,12 @@ if not os.path.exists("./index/lock"):
index.storage_context.persist(persist_dir="./index") index.storage_context.persist(persist_dir="./index")
open("./index/lock", 'a').close() open("./index/lock", 'a').close()
else: else:
print("Loading index...")
storage_context = StorageContext.from_defaults(persist_dir="./index") storage_context = StorageContext.from_defaults(persist_dir="./index")
index = load_index_from_storage(storage_context) index = load_index_from_storage(storage_context)
template = ( template = (
"You have been trained on the Darknet Diaries podcast transcripts with data from october 6 2023." "You are now an expert on the Darknet Diaries podcast. \n"
"You are now an expert about it and will answer as such. You know about every episode up to number 138. \n" "Please answer this question by referring to the podcast: {query_str}\n"
"----------------\n"
"Here is the context: {context_str}"
"----------------\n"
"Please answer this question by referring to the podcast: {query_str}"
) )
qa_template = PromptTemplate(template) qa_template = PromptTemplate(template)
query_engine = index.as_query_engine(text_qa_template=qa_template) query_engine = index.as_query_engine(text_qa_template=qa_template)