pgvector-python/examples/rag/example.py at master · pgvector/pgvector-python · GitHub [proxy]

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# Run:
# ollama pull llama3.2
# ollama pull nomic-embed-text
# ollama serve

import numpy as np
import ollama
from pathlib import Path
from pgvector.psycopg import register_vector
import psycopg
import urllib.request

query = 'What index types are supported?'
load_data = True

conn = psycopg.connect(dbname='pgvector_example', autocommit=True)
conn.execute('CREATE EXTENSION IF NOT EXISTS vector')
register_vector(conn)

if load_data:
    # get data
    url = 'https://raw.githubusercontent.com/pgvector/pgvector/refs/heads/master/README.md'
    dest = Path(__file__).parent / 'README.md'
    if not dest.exists():
        urllib.request.urlretrieve(url, dest)

    with open(dest, encoding='utf-8') as f:
        doc = f.read()

    # generate chunks
    # TODO improve chunking
    # TODO remove markdown
    chunks = doc.split('\n## ')

    # embed chunks
    # nomic-embed-text has task instruction prefix
    input = ['search_document: ' + chunk for chunk in chunks]
    embeddings = ollama.embed(model='nomic-embed-text', input=input).embeddings

    # create table
    conn.execute('DROP TABLE IF EXISTS chunks')
    conn.execute('CREATE TABLE chunks (id bigserial PRIMARY KEY, content text, embedding vector(768))')

    # store chunks
    cur = conn.cursor()
    with cur.copy('COPY chunks (content, embedding) FROM STDIN WITH (FORMAT BINARY)') as copy:
        copy.set_types(['text', 'vector'])

        for content, embedding in zip(chunks, embeddings):
            copy.write_row([content, embedding])

# embed query
# nomic-embed-text has task instruction prefix
input = 'search_query: ' + query
embedding = ollama.embed(model='nomic-embed-text', input=input).embeddings[0]

# retrieve chunks
result = conn.execute('SELECT content FROM chunks ORDER BY embedding <=> %s LIMIT 5', (np.array(embedding),)).fetchall()
context = '\n\n'.join([row[0] for row in result])

# get answer
# TODO improve prompt
prompt = f'Answer this question: {query}\n\n{context}'
response = ollama.generate(model='llama3.2', prompt=prompt).response
print(response)