OpenAI API key (will be deleted on 12 Feb 2026 at 1PM CET, note that one character is missing at the end, see slides):
sk-proj-JLiR8ad0qHfmjMNmVu8RYgU9wGjToqvBL8MR78vygOFGaueuR_BKvnUTnsBdp8AZ9am_q1IG-RT3BlbkFJ6vHlNhHuKmrdU07erhGThYjeWdQAMcpRh6sYdTkD9bPFkuPWgGZRoPIKSJsm1Z_yilWuNWdX8
Basic AI chatbot: https://n8n.io/workflows/1954-ai-agent-chat/
RAG template: click to download
SQL table with cities: click here to download (after download rename from .txt to .sql)
Prepare vector table in Supabase (run this in SQL Editor in Supabase):
-- Enable pgvector (run once per database)
create extension if not exists vector;
-- Table to store embeddings (e.g., written by n8n)
create table if not exists public.embeddings (
id uuid primary key default gen_random_uuid(),
created_at timestamptz not null default now(),
-- What the embedding represents
source text, -- e.g. "notion", "gmail", "web", etc.
external_id text, -- id from the upstream system
content text not null, -- raw text used to create the embedding
-- Flexible extra fields for filtering/search
metadata jsonb not null default '{}'::jsonb,
-- The embedding vector (pick the dimension that matches your model)
embedding vector(1536) not null
);
-- Helpful indexes
create index if not exists embeddings_external_id_idx
on public.embeddings (external_id);
create index if not exists embeddings_metadata_gin_idx
on public.embeddings using gin (metadata);
-- Vector index for similarity search (requires you to choose a distance op)
-- Use vector_cosine_ops for cosine similarity, or vector_l2_ops for Euclidean.
create index if not exists embeddings_embedding_ivfflat_idx
on public.embeddings using ivfflat (embedding vector_cosine_ops)
with (lists = 100);
-- Optional: basic uniqueness to prevent duplicates from n8n (if you want)
-- create unique index if not exists embeddings_source_external_id_uniq
-- on public.embeddings (source, external_id);
Create a function for similarity search in Supabase (run this in SQL Editor in Supabase):
create or replace function public.match_documents(
query_embedding vector(1536),
match_count int default null,
filter jsonb default '{}'
)
returns table (
id uuid,
content text,
metadata jsonb,
similarity float
)
language sql
stable
as $$
select
d.id,
d.content,
d.metadata,
1 - (d.embedding <=> query_embedding) as similarity
from public.embeddings d
where (filter = '{}'::jsonb or d.metadata @> filter)
order by d.embedding <=> query_embedding
limit coalesce(match_count, 10);
$$;
# pip install sentence-transformers faiss-cpu
from sentence_transformers import SentenceTransformer
import numpy as np
import faiss
# Our "knowledge base" (in reality: e.g. OCR on PDF documents on Google Drive)
docs = [
"Paris is the capital of France.",
"Berlin is the capital of Germany.",
"The Eiffel Tower is in Paris.",
"Tokyo is the capital of Japan.",
]
# Create embeddings
model = SentenceTransformer("all-MiniLM-L6-v2")
doc_emb = model.encode(docs, normalize_embeddings=True) # normalize => cosine via dot product
doc_emb = np.asarray(doc_emb, dtype="float32")
# Build a FAISS index (inner product = cosine, because vectors are normalized)
dim = doc_emb.shape[1]
index = faiss.IndexFlatIP(dim)
index.add(doc_emb)
# Similarity search
query = "Where is the Eiffel Tower located?"
q_emb = model.encode([query], normalize_embeddings=True)
q_emb = np.asarray(q_emb, dtype="float32")
k = 2
scores, ids = index.search(q_emb, k)
print("Query:", query)
for rank, (i, s) in enumerate(zip(ids[0], scores[0]), start=1):
print(f"{rank}. score={s:.3f} doc={docs[i]}")
# pip install openai numpy
import os
import numpy as np
from openai import OpenAI
client = OpenAI(api_key="OPENAI_API_KEY")
docs = [
"Paris is the capital of France.",
"Berlin is the capital of Germany.",
"The Eiffel Tower is in Paris.",
"Tokyo is the capital of Japan.",
]
def embed_texts(texts, model="text-embedding-3-small"):
# The API accepts a list of strings and returns one embedding per string
resp = client.embeddings.create(model=model, input=texts)
return np.array([item.embedding for item in resp.data], dtype=np.float32)
def l2_normalize(x, axis=1, eps=1e-12):
norm = np.linalg.norm(x, axis=axis, keepdims=True)
return x / np.clip(norm, eps, None)
# 1) Embed all docs
doc_emb = embed_texts(docs)
doc_emb = l2_normalize(doc_emb)
# 2) Embed query
query = "Where is the Eiffel Tower located?"
q_emb = embed_texts([query])
q_emb = l2_normalize(q_emb)
# 3) Cosine similarity (since vectors are normalized, cosine = dot product)
scores = (doc_emb @ q_emb[0]) # shape: (num_docs,)
# 4) Top-k
k = 2
topk_idx = np.argsort(-scores)[:k]
print("Query:", query)
for rank, i in enumerate(topk_idx, start=1):
print(f"{rank}. score={scores[i]:.3f} doc={docs[i]}")