Upload Files to File Upload Connectors
Learn how to programmatically manage files in your File Upload connectors using the Vectorize API.
What are File Upload Connectors?
File Upload connectors allow you to manually upload files for processing by your RAG pipelines. Unlike automated connectors that sync from external sources (like AWS S3 or Google Drive), File Upload connectors give you direct control over which files to process and when.
Prerequisites
Before you begin, you'll need:
- A Vectorize account
- An API access token (create one here)
- Your organization ID (see below)
- A connector ID (see below)
Finding your Organization ID
Your organization ID is in the Vectorize platform URL:
https://platform.vectorize.io/organization/[YOUR-ORG-ID]
For example, if your URL is:
https://platform.vectorize.io/organization/ecf3fa1d-30d0-4df1-8af6-f4852bc851cb
Your organization ID is: ecf3fa1d-30d0-4df1-8af6-f4852bc851cb
Finding your Connector ID
Navigate to your connector in the Vectorize platform. The connector ID can be found:
- In the URL when viewing the connector details
- In the connector list on the pipeline's configuration page
- By hovering over the connector name in the UI
Setup
This guide assumes you've completed the Getting Started guide. Here's the basic setup:
- Python
- Node.js
import vectorize_client as v
# Your credentials
org_id = "your-organization-id"
token = "your-api-token"
connector_id = "your-connector-id"
# Initialize the client
api = v.ApiClient(v.Configuration(access_token=token))
const { Configuration } = require('@vectorize-io/vectorize-client');
// Your credentials
const orgId = 'your-organization-id';
const token = 'your-api-token';
const connectorId = 'your-connector-id';
// Initialize the client
const api = new Configuration({
accessToken: token
});
List Files in a Connector
Use the Uploads API to list all files currently in your connector.
- Python
- Node.js
# Create API instance
uploads_api = v.UploadsApi(api)
# List files
try:
response = uploads_api.get_upload_files_from_connector(org_id, connector_id)
print(f"Found {len(response.files)} files in connector")
for file in response.files:
print(f" - {file.name}")
print(f" Size: {file.size} bytes")
print(f" Type: {file.content_type}")
print(f" Uploaded: {file.upload_date}")
if file.metadata:
print(f" Metadata: {file.metadata}")
print()
except Exception as e:
print(f"Error listing files: {e}")
const { UploadsApi } = require('@vectorize-io/vectorize-client');
// Create API instance
const uploadsApi = new UploadsApi(api);
// List files
try {
const response = await uploadsApi.getUploadFilesFromConnector({
organization: orgId,
connectorId: connectorId
});
console.log(`Found ${response.files.length} files in connector`);
for (const file of response.files) {
console.log(` - ${file.name}`);
console.log(` Size: ${file.size} bytes`);
console.log(` Type: ${file.contentType}`);
console.log(` Uploaded: ${file.uploadDate}`);
if (file.metadata) {
console.log(` Metadata:`, file.metadata);
}
console.log();
}
} catch (error) {
console.error('Error listing files:', error.response?.status || error.message);
if (error.response?.data) {
console.error('Details:', error.response.data);
}
}
Upload a File
Uploading a file to a connector is a two-step process:
- Request a pre-signed upload URL from the API
- Upload your file to that URL
- Python
- Node.js
import urllib3
import os
import json
# Create API instances
uploads_api = v.UploadsApi(api)
# File details
file_path = "path/to/document.pdf"
file_name = os.path.basename(file_path)
content_type = "application/pdf" # Set appropriate content type
# Optional metadata
metadata = {
"category": "research",
"tags": ["machine-learning", "2024"],
"processed": False
}
try:
# Step 1: Get upload URL
start_response = uploads_api.start_file_upload_to_connector(
org_id,
connector_id,
start_file_upload_to_connector_request=v.StartFileUploadToConnectorRequest(
name=file_name,
content_type=content_type,
metadata=json.dumps(metadata) if metadata else None # Convert to JSON string
)
)
# Step 2: Upload file to the URL
http = urllib3.PoolManager()
with open(file_path, "rb") as f:
response = http.request(
"PUT",
start_response.upload_url,
body=f,
headers={
"Content-Type": content_type,
"Content-Length": str(os.path.getsize(file_path))
}
)
if response.status != 200:
print(f"Upload failed: {response.data}")
else:
print(f"Successfully uploaded {file_name}")
except Exception as e:
print(f"Error during upload: {e}")
const fs = require('fs');
const path = require('path');
// Create API instance
const uploadsApi = new v.UploadsApi(api);
// File details
const filePath = "path/to/document.pdf";
const fileName = path.basename(filePath);
const contentType = "application/pdf"; // Set appropriate content type
// Optional metadata
const metadata = {
category: "research",
tags: ["machine-learning", "2024"],
processed: false
};
try {
// Step 1: Get upload URL
const startResponse = await uploadsApi.startFileUploadToConnector({
organization: orgId,
connectorId: connectorId,
startFileUploadToConnectorRequest: {
name: fileName,
contentType: contentType,
metadata: metadata ? JSON.stringify(metadata) : undefined // Convert to JSON string
}
});
// Step 2: Upload file to the URL
const fileBuffer = fs.readFileSync(filePath);
const uploadResponse = await fetch(startResponse.uploadUrl, {
method: 'PUT',
body: fileBuffer,
headers: {
'Content-Type': contentType
}
});
if (!uploadResponse.ok) {
throw new Error(`Upload failed: ${uploadResponse.statusText}`);
}
console.log(`Successfully uploaded ${fileName}`);
} catch (error) {
console.error('Error during upload:', error.response?.status || error.message);
if (error.response?.data) {
console.error('Details:', error.response.data);
}
}
If a file with the same name already exists in the connector, it will be overwritten.
Working with Metadata
Metadata allows you to attach additional information to your files that will be preserved throughout processing and can be used for filtering and organization in your RAG pipelines.
Metadata Examples
# Simple key-value pairs
metadata = {
"department": "engineering",
"year": 2024,
"confidential": True
}
# Arrays and nested objects
metadata = {
"authors": ["John Doe", "Jane Smith"],
"project": {
"name": "AI Research",
"phase": "development"
},
"tags": ["ml", "nlp", "research"]
}
Retrieving Files with Metadata
When you list files, the metadata is included in the response:
response = uploads_api.get_upload_files_from_connector(org_id, connector_id)
for file in response.files:
if file.metadata and file.metadata.get("department") == "engineering":
print(f"Engineering file: {file.name}")
Complete Example
Here's a complete example that demonstrates the full workflow:
- Python
- Node.js
import vectorize_client as v
import urllib3
import os
import json
import time
# Your credentials
org_id = "your-organization-id"
token = "your-api-token"
connector_id = "your-connector-id"
# Initialize the client
api = v.ApiClient(v.Configuration(access_token=token))
# Create API instance
uploads_api = v.UploadsApi(api)
def manage_connector_files():
"""Example workflow for managing files in a connector"""
file_path = "path/to/research-paper.pdf"
try:
# List current files
print("Current files in connector:")
response = uploads_api.get_upload_files_from_connector(org_id, connector_id)
for file in response.files:
print(f" - {file.name} ({file.size} bytes)")
# Upload a new file with metadata
print("\nUploading new file...")
metadata = {
"type": "research-paper",
"subject": "machine-learning",
"year": 2024,
"reviewed": False
}
start_response = uploads_api.start_file_upload_to_connector(
org_id,
connector_id,
start_file_upload_to_connector_request=v.StartFileUploadToConnectorRequest(
name="research-paper.pdf",
content_type="application/pdf",
metadata=json.dumps(metadata) # Convert metadata to JSON string
)
)
# Upload the actual file
http = urllib3.PoolManager()
with open(file_path, "rb") as f:
upload_response = http.request(
"PUT",
start_response.upload_url,
body=f,
headers={
"Content-Type": "application/pdf",
"Content-Length": str(os.path.getsize(file_path))
}
)
if upload_response.status == 200:
print("Upload successful!")
# Wait a moment for processing
time.sleep(2)
# List files again to confirm
print("\nFiles after upload:")
response = uploads_api.get_upload_files_from_connector(org_id, connector_id)
for file in response.files:
print(f" - {file.name}")
if file.metadata:
print(f" Metadata: {file.metadata}")
except Exception as e:
print(f"Error: {e}")
# Run the example
if __name__ == "__main__":
manage_connector_files()
const { Configuration, UploadsApi } = require('@vectorize-io/vectorize-client');
const fs = require('fs');
const path = require('path');
// Your credentials
const orgId = 'your-organization-id';
const token = 'your-api-token';
const connectorId = 'your-connector-id';
// Initialize the client
const api = new Configuration({
accessToken: token
});
// Create API instance
const uploadsApi = new UploadsApi(api);
async function manageConnectorFiles() {
const filePath = "path/to/research-paper.pdf";
try {
// List current files
console.log("Current files in connector:");
let response = await uploadsApi.getUploadFilesFromConnector({
organization: orgId,
connectorId: connectorId
});
for (const file of response.files) {
console.log(` - ${file.name} (${file.size} bytes)`);
}
// Upload a new file with metadata
console.log("\nUploading new file...");
const metadata = {
type: "research-paper",
subject: "machine-learning",
year: 2024,
reviewed: false
};
const startResponse = await uploadsApi.startFileUploadToConnector({
organization: orgId,
connectorId: connectorId,
startFileUploadToConnectorRequest: {
name: "research-paper.pdf",
contentType: "application/pdf",
metadata: JSON.stringify(metadata) // Convert metadata to JSON string
}
});
// Upload the actual file
const fileBuffer = fs.readFileSync(filePath);
const uploadResponse = await fetch(startResponse.uploadUrl, {
method: 'PUT',
body: fileBuffer,
headers: {
'Content-Type': 'application/pdf'
}
});
if (uploadResponse.ok) {
console.log("Upload successful!");
}
// Wait a moment for processing
await new Promise(resolve => setTimeout(resolve, 2000));
// List files again to confirm
console.log("\nFiles after upload:");
response = await uploadsApi.getUploadFilesFromConnector({
organization: orgId,
connectorId: connectorId
});
for (const file of response.files) {
console.log(` - ${file.name}`);
if (file.metadata) {
console.log(` Metadata:`, file.metadata);
}
}
} catch (error) {
console.error('Error:', error.response?.status || error.message);
if (error.response?.data) {
console.error('Details:', error.response.data);
}
}
}
// Run the example
manageConnectorFiles();