In Vectorize you can create pipelines to ingest data from multiple sources into a Vector Database. In this guide, we will deploy a pipeline that will ingest a local file.
import urllib3, json, os
file_path = "path/to/file.pdf"
http = urllib3.PoolManager()
uploads_api = v.UploadsApi(api)
metadata = {"created-from-api": True}
upload_response = uploads_api.start_file_upload_to_connector(
org, source_connector_id, v.StartFileUploadToConnectorRequest(
name=file_path.split("/")[-1],
content_type="application/pdf",
# add additional metadata that will be stored along with each chunk in the vector database
metadata=json.dumps(metadata))
)
with open(file_path, "rb") as f:
response = http.request("PUT", upload_response.upload_url, body=f, headers={"Content-Type": "application/pdf", "Content-Length": str(os.path.getsize(file_path))})
if response.status != 200:
print("Upload failed: ", response.data)
else:
print("Upload successful")
const fileBuffer = fs.readFileSync("path/to/file.pdf");
const uploadResponse = await uploadsApi.startFileUploadToConnector({
organization: org,
connectorId: sourceConnectorId,
startFileUploadToConnectorRequest: {
name: "file.pdf",
contentType: "application/pdf",
// add additional metadata that will be stored along with each chunk in the vector database
metadata: JSON.stringify({"mymeta": true})
}
})
const fetchResponse = await fetch(uploadResponse.uploadUrl, {
method: 'PUT',
body: fileBuffer,
headers: {
'Content-Type': 'application/pdf'
},
});
if (!fetchResponse.ok) {
throw new Error(`Failed to upload file: ${fetchResponse.statusText}`);
}
AI Platform and Vector Database
We will use the Built-In AI Platform and Vector Database. Since they already exist in the platform, we need to retrieve their IDs.
ai_platforms = connectors_api.get_ai_platform_connectors(org)
builtin_ai_platform = [c.id for c in ai_platforms.ai_platform_connectors if c.type == v.AIPlatformType.VECTORIZE][0]
vector_databases = connectors_api.get_destination_connectors(org)
builtin_vector_db = [c.id for c in vector_databases.destination_connectors if c.type == v.DestinationConnectorType.VECTORIZE][0]