deps-managment-and-dotenv #1
14
.editorconfig
Normal file
14
.editorconfig
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
root = true
|
||||||
|
|
||||||
|
[*]
|
||||||
|
charset = utf-8
|
||||||
|
end_of_line = lf
|
||||||
|
indent_size = 4
|
||||||
|
indent_style = space
|
||||||
|
insert_final_newline = true
|
||||||
|
trim_trailing_whitespace = true
|
||||||
|
max_line_length = 120
|
||||||
|
|
||||||
|
[*.md]
|
||||||
|
trim_trailing_whitespace = false
|
||||||
|
max_line_length = 0
|
2
.gitignore
vendored
2
.gitignore
vendored
@ -1,3 +1,5 @@
|
|||||||
|
.env
|
||||||
/transcripts
|
/transcripts
|
||||||
/index
|
/index
|
||||||
/.idea
|
/.idea
|
||||||
|
/venv
|
||||||
|
@ -9,9 +9,15 @@ Well, let's ask our LLM:
|
|||||||
|
|
||||||
## How to run
|
## How to run
|
||||||
### Install dependencies
|
### Install dependencies
|
||||||
I have no idea what the correct way to install dependencies with python is. Somehow install these libraries and their dependencies:
|
It is recommended to use a python version greater than or equal to ``3.10.0``.
|
||||||
- llama_index
|
Another stuff recommended, is to create a venv or use an IDE that supports venv creation, so all dependencies are installed locally to the project and not globally. If not, you can use https://virtualenv.pypa.io/en/latest/ to artificially create isolated environments.
|
||||||
- beautifulsoup4
|
|
||||||
|
Install the dependencies required to run the project by running the following command at the project root :
|
||||||
|
|
||||||
|
```shell
|
||||||
|
pip install -r requirements.txt
|
||||||
|
```
|
||||||
|
|
||||||
### Execution
|
### Execution
|
||||||
Download transcripts:
|
Download transcripts:
|
||||||
```shell
|
```shell
|
||||||
@ -31,6 +37,7 @@ python3 main.py
|
|||||||
On the first run, it will generate the index. This can take a while, but it will be cached on disk for the next runs.
|
On the first run, it will generate the index. This can take a while, but it will be cached on disk for the next runs.
|
||||||
|
|
||||||
You can then ask it any questions about Darknet Diaries!
|
You can then ask it any questions about Darknet Diaries!
|
||||||
|
|
||||||
## Examples
|
## Examples
|
||||||
|
|
||||||
> What is the intro of the podcast?
|
> What is the intro of the podcast?
|
@ -5,17 +5,20 @@ import json
|
|||||||
|
|
||||||
folder_path = "transcripts"
|
folder_path = "transcripts"
|
||||||
|
|
||||||
if not os.path.exists(folder_path):
|
if __name__ == '__main__':
|
||||||
|
|||||||
|
if not os.path.exists(folder_path):
|
||||||
os.makedirs(folder_path)
|
os.makedirs(folder_path)
|
||||||
|
|
||||||
for i in range(1, 139):
|
for i in range(1, 139):
|
||||||
try:
|
try:
|
||||||
|
# fetch transcript
|
||||||
url = f"https://darknetdiaries.com/transcript/{i}"
|
url = f"https://darknetdiaries.com/transcript/{i}"
|
||||||
r = requests.get(url)
|
r = requests.get(url)
|
||||||
soup = BeautifulSoup(r.text, 'html.parser')
|
soup = BeautifulSoup(r.text, 'html.parser')
|
||||||
|
|
||||||
transcript = soup.find('pre').get_text()
|
transcript = soup.find('pre').get_text()
|
||||||
|
|
||||||
|
# fetch transcript metadata
|
||||||
url = f"https://api.darknetdiaries.com/{i}.json"
|
url = f"https://api.darknetdiaries.com/{i}.json"
|
||||||
r = requests.get(url)
|
r = requests.get(url)
|
||||||
parsed_json = json.loads(r.text)
|
parsed_json = json.loads(r.text)
|
||||||
@ -23,8 +26,9 @@ for i in range(1, 139):
|
|||||||
number = parsed_json["episode_number"]
|
number = parsed_json["episode_number"]
|
||||||
downloads = parsed_json["total_downloads"]
|
downloads = parsed_json["total_downloads"]
|
||||||
|
|
||||||
with open(f"{folder_path}/episode_{number}.txt", "w") as f:
|
# write transcript
|
||||||
|
with open(f"{folder_path}/episode_{number}.txt", "w", encoding='utf-8') as f:
|
||||||
f.write(f"{title}\n{downloads}\n{transcript}")
|
f.write(f"{title}\n{downloads}\n{transcript}")
|
||||||
print(f"{number} {title}")
|
print(f"{number} {title}")
|
||||||
except Exception:
|
except Exception as err:
|
||||||
print(f"Failed scraping episode {i}")
|
print(f"Failed scraping episode {i} : [{err}]")
|
||||||
|
34
main.py
34
main.py
@ -4,18 +4,25 @@ from llama_index.node_parser import SimpleNodeParser
|
|||||||
from llama_index import VectorStoreIndex
|
from llama_index import VectorStoreIndex
|
||||||
from llama_index.llms import OpenAI, ChatMessage, MessageRole
|
from llama_index.llms import OpenAI, ChatMessage, MessageRole
|
||||||
from llama_index.prompts import ChatPromptTemplate
|
from llama_index.prompts import ChatPromptTemplate
|
||||||
from llama_index import set_global_handler
|
# from llama_index import set_global_handler
|
||||||
from llama_index.chat_engine.types import ChatMode
|
from llama_index.chat_engine.types import ChatMode
|
||||||
|
from dotenv import load_dotenv
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
|
||||||
# set_global_handler("simple")
|
# set_global_handler("simple")
|
||||||
|
|
||||||
llm = OpenAI(model="gpt-4", temperature=0, max_tokens=256)
|
# load .env
|
||||||
|
load_dotenv()
|
||||||
|
OPEN_API_KEY = os.getenv('OPEN_API_KEY')
|
||||||
phito
commented
pas nécessaire, llama-index prend déjà la clé API depuis les envvar pas nécessaire, llama-index prend déjà la clé API depuis les envvar
EndMove
commented
pour ceux qui ne passent pas par la :p genre moi ^^ pour ceux qui ne passent pas par la :p genre moi ^^
|
|||||||
|
|
||||||
|
# config llm context
|
||||||
|
llm = OpenAI(model="gpt-4", temperature=0, max_tokens=256, api_key=OPEN_API_KEY)
|
||||||
service_context = ServiceContext.from_defaults(llm=llm)
|
service_context = ServiceContext.from_defaults(llm=llm)
|
||||||
set_global_service_context(service_context)
|
set_global_service_context(service_context)
|
||||||
|
|
||||||
if not os.path.exists("./index/lock"):
|
if __name__ == '__main__':
|
||||||
|
if not os.path.exists("./index/lock"):
|
||||||
documents = []
|
documents = []
|
||||||
for filename in os.listdir("./transcripts"):
|
for filename in os.listdir("./transcripts"):
|
||||||
episode_number = re.search(r'\d+', filename).group()
|
episode_number = re.search(r'\d+', filename).group()
|
||||||
@ -41,12 +48,12 @@ if not os.path.exists("./index/lock"):
|
|||||||
index = VectorStoreIndex(nodes, show_progress=True)
|
index = VectorStoreIndex(nodes, show_progress=True)
|
||||||
index.storage_context.persist(persist_dir="./index")
|
index.storage_context.persist(persist_dir="./index")
|
||||||
open("./index/lock", 'a').close()
|
open("./index/lock", 'a').close()
|
||||||
else:
|
else:
|
||||||
print("Loading index...")
|
print("Loading index...")
|
||||||
storage_context = StorageContext.from_defaults(persist_dir="./index")
|
storage_context = StorageContext.from_defaults(persist_dir="./index")
|
||||||
index = load_index_from_storage(storage_context)
|
index = load_index_from_storage(storage_context)
|
||||||
|
|
||||||
chat_text_qa_msgs = [
|
chat_text_qa_msgs = [
|
||||||
ChatMessage(
|
ChatMessage(
|
||||||
role=MessageRole.SYSTEM,
|
role=MessageRole.SYSTEM,
|
||||||
content=(
|
content=(
|
||||||
@ -67,10 +74,10 @@ chat_text_qa_msgs = [
|
|||||||
"answer the question: {query_str}\n"
|
"answer the question: {query_str}\n"
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
text_qa_template = ChatPromptTemplate(chat_text_qa_msgs)
|
text_qa_template = ChatPromptTemplate(chat_text_qa_msgs)
|
||||||
|
|
||||||
chat_refine_msgs = [
|
chat_refine_msgs = [
|
||||||
ChatMessage(
|
ChatMessage(
|
||||||
role=MessageRole.SYSTEM,
|
role=MessageRole.SYSTEM,
|
||||||
content="Always answer the question, even if the context isn't helpful.",
|
content="Always answer the question, even if the context isn't helpful.",
|
||||||
@ -89,18 +96,17 @@ chat_refine_msgs = [
|
|||||||
"Original Answer: {existing_answer}"
|
"Original Answer: {existing_answer}"
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
refine_template = ChatPromptTemplate(chat_refine_msgs)
|
refine_template = ChatPromptTemplate(chat_refine_msgs)
|
||||||
|
|
||||||
chat_engine = index.as_chat_engine(
|
chat_engine = index.as_chat_engine(
|
||||||
text_qa_template=text_qa_template,
|
text_qa_template=text_qa_template,
|
||||||
refine_template=refine_template,
|
refine_template=refine_template,
|
||||||
chat_mode=ChatMode.OPENAI
|
chat_mode=ChatMode.OPENAI
|
||||||
)
|
)
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
chat_engine.chat_repl()
|
chat_engine.chat_repl()
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
break
|
break
|
||||||
|
|
||||||
|
16
requirements.txt
Normal file
16
requirements.txt
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
# =====================
|
||||||
|
# Required dependencies
|
||||||
|
# =====================
|
||||||
|
# general deps
|
||||||
|
requests~=2.31.0
|
||||||
|
llama-index~=0.8.40
|
||||||
|
beautifulsoup4~=4.12.2
|
||||||
|
python-dotenv~=1.0.0
|
||||||
|
|
||||||
|
# llama sub deps
|
||||||
|
transformers~=4.34.0
|
||||||
|
torch~=2.1.0
|
||||||
|
|
||||||
|
# =====================
|
||||||
|
# Development dependencies
|
||||||
|
# =====================
|
Loading…
Reference in New Issue
Block a user
c'est vraiment nécessaire d'avoir ça dans un fichier qui n'est pas importé par d'autres fichiers? C'est juste un script pas un module
yup pour spécifier que c'est un script "executable"