|
| 1 | +# Example based on https://ollama.com/blog/embedding-models |
| 2 | +# using objectbox as a vector store |
| 3 | + |
| 4 | +import ollama |
| 5 | +import objectbox |
| 6 | + |
| 7 | +documents = [ |
| 8 | + "Llamas are members of the camelid family meaning they're pretty closely related to vicuñas and camels", |
| 9 | + "Llamas were first domesticated and used as pack animals 4,000 to 5,000 years ago in the Peruvian highlands", |
| 10 | + "Llamas can grow as much as 6 feet tall though the average llama between 5 feet 6 inches and 5 feet 9 inches tall", |
| 11 | + "Llamas weigh between 280 and 450 pounds and can carry 25 to 30 percent of their body weight", |
| 12 | + "Llamas are vegetarians and have very efficient digestive systems", |
| 13 | + "Llamas live to be about 20 years old, though some only live for 15 years and others live to be 30 years old", |
| 14 | +] |
| 15 | + |
| 16 | + |
| 17 | +from objectbox.model import * |
| 18 | +from objectbox.model.properties import * |
| 19 | +import numpy as np |
| 20 | + |
| 21 | +# Have fresh data for each start |
| 22 | +objectbox.Store.remove_db_files("objectbox") |
| 23 | + |
| 24 | +@Entity(id=1, uid=1) |
| 25 | +class DocumentEmbedding: |
| 26 | + id = Id(id=1, uid=1001) |
| 27 | + document = Property(str, id=2, uid=1002) |
| 28 | + embedding = Property(np.ndarray, type=PropertyType.floatVector, id=3, uid=1003, index=HnswIndex( |
| 29 | + id=3, uid=10001, |
| 30 | + dimensions=1024, |
| 31 | + distance_type=VectorDistanceType.COSINE |
| 32 | + )) |
| 33 | + |
| 34 | +model = Model() |
| 35 | +model.entity(DocumentEmbedding, last_property_id=IdUid(3, 1003)) |
| 36 | +model.last_entity_id = IdUid(1, 1) |
| 37 | +model.last_index_id = IdUid(3,10001) |
| 38 | + |
| 39 | +store = objectbox.Store(model=model) |
| 40 | +box = store.box(DocumentEmbedding) |
| 41 | + |
| 42 | +print("Documents to embed: ", len(documents)) |
| 43 | + |
| 44 | +# store each document in a vector embedding database |
| 45 | +for i, d in enumerate(documents): |
| 46 | + response = ollama.embeddings(model="mxbai-embed-large", prompt=d) |
| 47 | + embedding = response["embedding"] |
| 48 | + |
| 49 | + box.put(DocumentEmbedding(document=d,embedding=embedding)) |
| 50 | + print(f"Document {i + 1} embedded") |
| 51 | + |
| 52 | +# an example prompt |
| 53 | +prompt = "What animals are llamas related to?" |
| 54 | + |
| 55 | +# generate an embedding for the prompt and retrieve the most relevant doc |
| 56 | +response = ollama.embeddings( |
| 57 | + prompt=prompt, |
| 58 | + model="mxbai-embed-large" |
| 59 | +) |
| 60 | + |
| 61 | + |
| 62 | +embedding_prop: Property = DocumentEmbedding.get_property("embedding") |
| 63 | +query = box.query( |
| 64 | + embedding_prop.nearest_neighbor(response["embedding"], 1) |
| 65 | +).build() |
| 66 | + |
| 67 | +results = query.find_with_scores() |
| 68 | +data = results[0][0].document |
| 69 | + |
| 70 | +print(f"Data most relevant to \"{prompt}\" : {data}") |
| 71 | + |
| 72 | +print("Generating the response now...") |
| 73 | + |
| 74 | +# generate a response combining the prompt and data we retrieved in step 2 |
| 75 | +output = ollama.generate( |
| 76 | + model="llama3", |
| 77 | + prompt=f"Using this data: {data}. Respond to this prompt: {prompt}" |
| 78 | +) |
| 79 | + |
| 80 | +print(output['response']) |
0 commit comments