Upload Files to File Upload Connectors
Learn how to programmatically manage files in your File Upload connectors using the Vectorize API.
What are File Upload Connectors?β
File Upload connectors allow you to manually upload files for processing by your RAG pipelines. Unlike automated connectors that sync from external sources (like AWS S3 or Google Drive), File Upload connectors give you direct control over which files to process and when.
List Files in a Connectorβ
Use the Uploads API to list all files currently in your connector.
- Python
- Node.js
import vectorize_client as v
organization_id = "your-organization-id"
source_connector_id = "your-connector-id"
const vectorize = require('@vectorize-io/vectorize-client')
let organizationId = "your-organization-id";
let sourceConnectorId = "your-connector-id";
- Python
- Node.js
import vectorize_client as v
# Create API instance
uploads_api = v.UploadsApi(apiClient)
# List files
try:
response = uploads_api.get_upload_files_from_connector(organization_id, source_connector_id)
print(f"Found {len(response.files)} files in connector")
for file in response.files:
print(f" π {file.name} ({file.size:,} bytes, Uploaded: {file.last_modified})")
if file.metadata:
print(f" Metadata: {file.metadata}")
print()
except Exception as e:
print(f"Error listing files: {e}")
const { UploadsApi } = vectorize;
// Create API instance
const uploadsApi = new UploadsApi(apiClient);
// List files
let response; // Declare response outside try block for test execution
try {
response = await uploadsApi.getUploadFilesFromConnector({
"your-org-id": "your-org-id",
connectorId: sourceConnectorId
});
console.log(`Found ${response.files.length} files in connector`);
for (const file of response.files) {
console.log(` π ${file.name} (${file.size.toLocaleString()} bytes, Uploaded: ${file.lastModified})`);
if (file.metadata) {
console.log(` Metadata: ${file.metadata}`);
}
console.log();
}
} catch (error) {
console.log(`Error listing files: ${error.message}`);
// Set response to empty structure so test can continue
response = { files: [] };
}
Upload a Fileβ
Uploading a file to a connector is a two-step process:
- Request a pre-signed upload URL from the API
- Upload your file to that URL
- Python
- Node.js
import vectorize_client as v
import urllib3
import os
import json
organization_id = "your-organization-id"
source_connector_id = "your-connector-id"
file_path = "path/to/your/file.pdf"
file_name = "file.pdf"
const vectorize = require('@vectorize-io/vectorize-client')
const fs = require('fs')
let organizationId = "your-organization-id";
let sourceConnectorId = "your-connector-id";
let filePath = "path/to/your/file.pdf";
let fileName = "file.pdf";
- Python
- Node.js
import vectorize_client as v
import os
import json
import urllib3
# Create API instances
uploads_api = v.UploadsApi(apiClient)
# File details
content_type = "application/pdf" # Set appropriate content type
# Optional metadata - all values as strings
metadata = {
"category": "research",
"tags": "machine-learning,2024", # Store as comma-separated string
"processed": "false" # Store boolean as string
}
try:
# Step 1: Get upload URL
start_response = uploads_api.start_file_upload_to_connector(
organization_id,
source_connector_id,
start_file_upload_to_connector_request=v.StartFileUploadToConnectorRequest(
name=file_name,
content_type=content_type,
metadata=json.dumps(metadata) if metadata else None # Convert to JSON string
)
)
# Step 2: Upload file to the URL
http = urllib3.PoolManager()
with open(file_path, "rb") as f:
response = http.request(
"PUT",
start_response.upload_url,
body=f,
headers={
"Content-Type": content_type,
"Content-Length": str(os.path.getsize(file_path))
}
)
if response.status != 200:
print(f"Upload failed: {response.data}")
else:
print(f"Successfully uploaded {file_name}")
except Exception as e:
print(f"Error during upload: {e}")
const fs = require('fs')
const { UploadsApi } = vectorize;
// Create API instances
const uploadsApi = new UploadsApi(apiClient);
async function uploadFileWithMetadata(connectorId, filePath, fileName) {
// File details
const contentType = "application/pdf"; // Set appropriate content type
// Optional metadata - all values must be strings
const metadata = {
"category": "research",
"tags": "machine-learning,2024", // Store lists as comma-separated strings
"processed": "false" // Store booleans as strings
};
try {
// Step 1: Get pre-signed upload URL from Vectorize
const startRequest = {
name: fileName,
contentType: contentType,
metadata: metadata ? JSON.stringify(metadata) : undefined // Convert to JSON string
};
const startResponse = await uploadsApi.startFileUploadToConnector({
"your-org-id": "your-org-id",
connectorId: connectorId,
startFileUploadToConnectorRequest: startRequest
});
// Step 2: Upload file directly to the pre-signed URL
const fileBuffer = fs.readFileSync(filePath);
const fileStats = fs.statSync(filePath);
const uploadResponse = await fetch(startResponse.uploadUrl, {
method: 'PUT',
body: fileBuffer,
headers: {
'Content-Type': contentType,
'Content-Length': fileStats.size.toString()
}
});
if (uploadResponse.status !== 200) {
const errorText = await uploadResponse.text();
throw new Error(`Upload failed (${uploadResponse.status}): ${errorText}`);
}
console.log(`β
Successfully uploaded ${fileName} with metadata`);
return { fileId: startResponse.fileId, uploadStatus: uploadResponse.status }; // Return file ID and status
} catch (error) {
console.error(`β Error during upload: ${error.message}`);
throw error;
}
}
// Usage
const result = await uploadFileWithMetadata(sourceConnectorId, filePath, fileName);
If a file with the same name already exists in the connector, it will be overwritten.
Working with Metadataβ
Metadata allows you to attach additional information to your files that will be preserved throughout processing and can be used for filtering and organization in your RAG pipelines.
Metadata Examplesβ
The following examples show metadata structures, but remember that all metadata values must be strings when uploading. Complex types will be converted to JSON strings.
# Simple key-value pairs
metadata = {
"department": "engineering",
"year": "2024", # Numbers as strings
"confidential": "true" # Booleans as strings
}
# Arrays and nested objects (will be JSON stringified)
metadata = {
"authors": ["John Doe", "Jane Smith"],
"project": {
"name": "AI Research",
"phase": "development"
},
"tags": ["ml", "nlp", "research"]
}
# When uploading, convert to JSON string:
metadata_string = json.dumps(metadata)
Retrieving Files with Metadataβ
When you list files, the metadata is included in the response:
response = uploads_api.get_upload_files_from_connector(organization_id, connector_id)
for file in response.files:
if file.metadata and file.metadata.get("department") == "engineering":
print(f"Engineering file: {file.name}")
Complete Example
Here's all the code from this guide combined into a complete, runnable example:
- Python
- Node.js
import vectorize_client as v
import os
# Get credentials from environment variables
organization_id = os.environ.get("VECTORIZE_ORGANIZATION_ID")
api_key = os.environ.get("VECTORIZE_API_KEY")
if not organization_id or not api_key:
raise ValueError("Please set VECTORIZE_ORGANIZATION_ID and VECTORIZE_API_KEY environment variables")
# Initialize the API client
configuration = v.Configuration(
host="https://api.vectorize.io",
api_key={"ApiKeyAuth": api_key}
)
api = v.ApiClient(configuration)
print(f"β
API client initialized for organization: {organization_id}")
import vectorize_client as v
organization_id = "your-organization-id"
source_connector_id = "your-connector-id"
import vectorize_client as v
# Create API instance
uploads_api = v.UploadsApi(apiClient)
# List files
try:
response = uploads_api.get_upload_files_from_connector(organization_id, source_connector_id)
print(f"Found {len(response.files)} files in connector")
for file in response.files:
print(f" π {file.name} ({file.size:,} bytes, Uploaded: {file.last_modified})")
if file.metadata:
print(f" Metadata: {file.metadata}")
print()
except Exception as e:
print(f"Error listing files: {e}")
import vectorize_client as v
import urllib3
import os
import json
organization_id = "your-organization-id"
source_connector_id = "your-connector-id"
file_path = "path/to/your/file.pdf"
file_name = "file.pdf"
import vectorize_client as v
import os
import json
import urllib3
# Create API instances
uploads_api = v.UploadsApi(apiClient)
# File details
content_type = "application/pdf" # Set appropriate content type
# Optional metadata - all values as strings
metadata = {
"category": "research",
"tags": "machine-learning,2024", # Store as comma-separated string
"processed": "false" # Store boolean as string
}
try:
# Step 1: Get upload URL
start_response = uploads_api.start_file_upload_to_connector(
organization_id,
source_connector_id,
start_file_upload_to_connector_request=v.StartFileUploadToConnectorRequest(
name=file_name,
content_type=content_type,
metadata=json.dumps(metadata) if metadata else None # Convert to JSON string
)
)
# Step 2: Upload file to the URL
http = urllib3.PoolManager()
with open(file_path, "rb") as f:
response = http.request(
"PUT",
start_response.upload_url,
body=f,
headers={
"Content-Type": content_type,
"Content-Length": str(os.path.getsize(file_path))
}
)
if response.status != 200:
print(f"Upload failed: {response.data}")
else:
print(f"Successfully uploaded {file_name}")
except Exception as e:
print(f"Error during upload: {e}")
const v = require('@vectorize-io/vectorize-client');
// Get credentials from environment variables
const organizationId = process.env.VECTORIZE_ORGANIZATION_ID;
const apiKey = process.env.VECTORIZE_API_KEY;
if (!organizationId || !apiKey) {
throw new Error("Please set VECTORIZE_ORGANIZATION_ID and VECTORIZE_API_KEY environment variables");
}
// Initialize the API client
const configuration = new v.Configuration({
basePath: 'https://api.vectorize.io',
accessToken: apiKey
});
const apiClient = new v.ApiClient(configuration);
console.log(`β
API client initialized for organization: ${organizationId}`);
const vectorize = require('@vectorize-io/vectorize-client')
let organizationId = "your-organization-id";
let sourceConnectorId = "your-connector-id";
const { UploadsApi } = vectorize;
// Create API instance
const uploadsApi = new UploadsApi(apiClient);
// List files
let response; // Declare response outside try block for test execution
try {
response = await uploadsApi.getUploadFilesFromConnector({
"your-org-id": "your-org-id",
connectorId: sourceConnectorId
});
console.log(`Found ${response.files.length} files in connector`);
for (const file of response.files) {
console.log(` π ${file.name} (${file.size.toLocaleString()} bytes, Uploaded: ${file.lastModified})`);
if (file.metadata) {
console.log(` Metadata: ${file.metadata}`);
}
console.log();
}
} catch (error) {
console.log(`Error listing files: ${error.message}`);
// Set response to empty structure so test can continue
response = { files: [] };
}
const vectorize = require('@vectorize-io/vectorize-client')
const fs = require('fs')
let organizationId = "your-organization-id";
let sourceConnectorId = "your-connector-id";
let filePath = "path/to/your/file.pdf";
let fileName = "file.pdf";
const fs = require('fs')
const { UploadsApi } = vectorize;
// Create API instances
const uploadsApi = new UploadsApi(apiClient);
async function uploadFileWithMetadata(connectorId, filePath, fileName) {
// File details
const contentType = "application/pdf"; // Set appropriate content type
// Optional metadata - all values must be strings
const metadata = {
"category": "research",
"tags": "machine-learning,2024", // Store lists as comma-separated strings
"processed": "false" // Store booleans as strings
};
try {
// Step 1: Get pre-signed upload URL from Vectorize
const startRequest = {
name: fileName,
contentType: contentType,
metadata: metadata ? JSON.stringify(metadata) : undefined // Convert to JSON string
};
const startResponse = await uploadsApi.startFileUploadToConnector({
"your-org-id": "your-org-id",
connectorId: connectorId,
startFileUploadToConnectorRequest: startRequest
});
// Step 2: Upload file directly to the pre-signed URL
const fileBuffer = fs.readFileSync(filePath);
const fileStats = fs.statSync(filePath);
const uploadResponse = await fetch(startResponse.uploadUrl, {
method: 'PUT',
body: fileBuffer,
headers: {
'Content-Type': contentType,
'Content-Length': fileStats.size.toString()
}
});
if (uploadResponse.status !== 200) {
const errorText = await uploadResponse.text();
throw new Error(`Upload failed (${uploadResponse.status}): ${errorText}`);
}
console.log(`β
Successfully uploaded ${fileName} with metadata`);
return { fileId: startResponse.fileId, uploadStatus: uploadResponse.status }; // Return file ID and status
} catch (error) {
console.error(`β Error during upload: ${error.message}`);
throw error;
}
}
// Usage
const result = await uploadFileWithMetadata(sourceConnectorId, filePath, fileName);