Upload Files to File Upload Connectors
Learn how to programmatically manage files in your File Upload connectors using the Vectorize API.
What are File Upload Connectors?β
File Upload connectors allow you to manually upload files for processing by your RAG pipelines. Unlike automated connectors that sync from external sources (like AWS S3 or Google Drive), File Upload connectors give you direct control over which files to process and when.
List Files in a Connectorβ
Use the Uploads API to list all files currently in your connector.
- Python
- Node.js
import vectorize_client as v
# Create API instance
uploads_api = v.UploadsApi(apiClient)
# List files
try:
response = uploads_api.get_upload_files_from_connector(organization_id, source_connector_id)
print(f"Found {len(response.files)} files in connector")
for file in response.files:
print(f" π {file.name} ({file.size:,} bytes, Uploaded: {file.last_modified})")
if file.metadata:
print(f" Metadata: {file.metadata}")
print()
except Exception as e:
print(f"Error listing files: {e}")
// This snippet uses async operations and should be run in an async context
(async () => {
const vectorize = require('@vectorize-io/vectorize-client')
const { UploadsApi } = vectorize;
// Create API instance
const uploadsApi = new UploadsApi(apiClient);
// List files
let response; // Declare response outside try block for test execution
try {
response = await uploadsApi.getUploadFilesFromConnector({
organizationId: "your-org-id",
connectorId: sourceConnectorId
});
console.log(`Found ${response.files.length} files in connector`);
for (const file of response.files) {
console.log(` π ${file.name} (${file.size.toLocaleString()} bytes, Uploaded: ${file.lastModified})`);
if (file.metadata) {
console.log(` Metadata: ${file.metadata}`);
}
console.log();
}
} catch (error) {
console.log(`Error listing files: ${error.message}`);
// Set response to empty structure so test can continue
response = { files: [] };
}
})();
Upload a Fileβ
Uploading a file to a connector is a two-step process:
- Request a pre-signed upload URL from the API
- Upload your file to that URL
- Python
- Node.js
import vectorize_client as v
import urllib3
import os
import json
# Create API instances
uploads_api = v.UploadsApi(apiClient)
# File details
content_type = "application/pdf" # Set appropriate content type
# Optional metadata - all values as strings
metadata = {
"category": "research",
"tags": "machine-learning,2024", # Store as comma-separated string
"processed": "false" # Store boolean as string
}
try:
# Step 1: Get upload URL
start_response = uploads_api.start_file_upload_to_connector(
organization_id,
source_connector_id,
start_file_upload_to_connector_request=v.StartFileUploadToConnectorRequest(
name=file_name,
content_type=content_type,
metadata=json.dumps(metadata) if metadata else None # Convert to JSON string
)
)
# Step 2: Upload file to the URL
http = urllib3.PoolManager()
with open(file_path, "rb") as f:
response = http.request(
"PUT",
start_response.upload_url,
body=f,
headers={
"Content-Type": content_type,
"Content-Length": str(os.path.getsize(file_path))
}
)
if response.status != 200:
print(f"Upload failed: {response.data}")
else:
print(f"Successfully uploaded {file_name}")
except Exception as e:
print(f"Error during upload: {e}")
// This snippet uses async operations and should be run in an async context
(async () => {
const vectorize = require('@vectorize-io/vectorize-client')
const fs = require('fs')
const { UploadsApi } = vectorize;
// Create API instances
const uploadsApi = new UploadsApi(apiClient);
// File details
const contentType = "application/pdf"; // Set appropriate content type
// Optional metadata - all values as strings
let metadata = {
"category": "research",
"tags": "machine-learning,2024", // Store as comma-separated string
"processed": "false" // Store boolean as string
};
let uploadResponse;
try {
// Step 1: Get upload URL
const startResponse = await uploadsApi.startFileUploadToConnector({
organizationId: "your-org-id",
connectorId: sourceConnectorId,
startFileUploadToConnectorRequest: {
name: fileName,
contentType: contentType,
metadata: metadata ? JSON.stringify(metadata) : undefined // Convert to JSON string
}
});
// Step 2: Upload file to the URL
const fileBuffer = fs.readFileSync(filePath);
const fileStats = fs.statSync(filePath);
uploadResponse = await fetch(startResponse.uploadUrl, {
method: 'PUT',
body: fileBuffer,
headers: {
'Content-Type': contentType,
'Content-Length': fileStats.size.toString()
}
});
if (uploadResponse.status !== 200) {
const errorText = await uploadResponse.text();
console.log(`Upload failed: ${errorText}`);
} else {
console.log(`Successfully uploaded ${fileName}`);
}
} catch (error) {
console.log(`Error during upload: ${error.message}`);
}
})();
If a file with the same name already exists in the connector, it will be overwritten.
Working with Metadataβ
Metadata allows you to attach additional information to your files that will be preserved throughout processing and can be used for filtering and organization in your RAG pipelines.
Metadata Examplesβ
The following examples show metadata structures, but remember that all metadata values must be strings when uploading. Complex types will be converted to JSON strings.
# Simple key-value pairs
metadata = {
"department": "engineering",
"year": "2024", # Numbers as strings
"confidential": "true" # Booleans as strings
}
# Arrays and nested objects (will be JSON stringified)
metadata = {
"authors": ["John Doe", "Jane Smith"],
"project": {
"name": "AI Research",
"phase": "development"
},
"tags": ["ml", "nlp", "research"]
}
# When uploading, convert to JSON string:
metadata_string = json.dumps(metadata)
Retrieving Files with Metadataβ
When you list files, the metadata is included in the response:
response = uploads_api.get_upload_files_from_connector(organization_id, connector_id)
for file in response.files:
if file.metadata and file.metadata.get("department") == "engineering":
print(f"Engineering file: {file.name}")
Complete Exampleβ
Here's all the code from this guide combined into a complete, runnable example:
- Python
- Node.js
β’ `VECTORIZE_API_KEY`
β’ `VECTORIZE_ORGANIZATION_ID`
Required Files:
β’ `example.txt` β’ A text file to upload
Additional Requirements:
β’ Creates a source connector in the example
#!/usr/bin/env python3
"""
Complete example for file upload with metadata.
This is a hand-written example that corresponds to the test file:
api-clients/python/tests/connectors/file_upload/file_upload.py
IMPORTANT: Keep this file in sync with the test file's snippets!
"""
import os
import sys
import json
import time
import tempfile
import urllib3
import vectorize_client as v
def get_api_config():
"""Get API configuration from environment variables."""
organization_id = os.environ.get("VECTORIZE_ORGANIZATION_ID")
api_key = os.environ.get("VECTORIZE_API_KEY")
if not organization_id or not api_key:
print("π Setup required:")
print("1. Get your API key from: https://app.vectorize.io/settings")
print("2. Set environment variables:")
print(" export VECTORIZE_ORGANIZATION_ID='your-org-id'")
print(" export VECTORIZE_API_KEY='your-api-key'")
sys.exit(1)
# Always use production API
configuration = v.Configuration(
host="https://api.vectorize.io/v1",
access_token=api_key
)
return configuration, organization_id
def create_file_upload_connector(api_client, organization_id):
"""Create a file upload connector for testing."""
connectors_api = v.SourceConnectorsApi(api_client)
try:
# Create the FileUpload object
file_upload = v.FileUpload(
name=f"file-upload-example",
type="FILE_UPLOAD",
config={}
)
# Create the request with the FileUpload object
request = v.CreateSourceConnectorRequest(file_upload)
response = connectors_api.create_source_connector(
organization_id,
request
)
print(f"β
Created file upload connector: {response.connector.name}")
print(f" Connector ID: {response.connector.id}\n")
return response.connector.id
except Exception as e:
print(f"β Error creating file upload connector: {e}")
raise
def list_files(api_client, organization_id, source_connector_id):
"""List files in the connector."""
# Create API instance
uploads_api = v.UploadsApi(api_client)
# List files
try:
response = uploads_api.get_upload_files_from_connector(organization_id, source_connector_id)
print(f"Found {len(response.files)} files in connector")
for file in response.files:
print(f" π {file.name} ({file.size:,} bytes, Uploaded: {file.last_modified})")
if file.metadata:
print(f" Metadata: {file.metadata}")
print()
return response.files
except Exception as e:
print(f"Error listing files: {e}")
return []
def create_sample_pdf():
"""Create a sample PDF file for demonstration."""
# Create a simple text file to use as our example file
# In a real scenario, you'd provide your own PDF file
sample_content = """Sample Document for File Upload
This is a sample document that demonstrates file upload functionality with metadata.
Key points:
- Files can be uploaded with custom metadata
- Metadata is stored as JSON
- Files are processed asynchronously
For more information, visit the Vectorize documentation.
"""
# Create temporary file
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f:
f.write(sample_content)
temp_path = f.name
return temp_path, "sample_document.txt"
def upload_file(api_client, organization_id, source_connector_id, file_path, file_name):
"""Upload a file with metadata to the connector."""
# Create API instances
uploads_api = v.UploadsApi(api_client)
# File details
content_type = "text/plain" # Set appropriate content type
# Optional metadata - all values as strings
metadata = {
"category": "research",
"tags": "machine-learning,2024", # Store as comma-separated string
"processed": "false" # Store boolean as string
}
try:
# Step 1: Get upload URL
start_response = uploads_api.start_file_upload_to_connector(
organization_id,
source_connector_id,
start_file_upload_to_connector_request=v.StartFileUploadToConnectorRequest(
name=file_name,
content_type=content_type,
metadata=json.dumps(metadata) if metadata else None # Convert to JSON string
)
)
# Step 2: Upload file to the URL
http = urllib3.PoolManager()
with open(file_path, "rb") as f:
response = http.request(
"PUT",
start_response.upload_url,
body=f,
headers={
"Content-Type": content_type,
"Content-Length": str(os.path.getsize(file_path))
}
)
if response.status != 200:
print(f"Upload failed: {response.data}")
return False
else:
print(f"Successfully uploaded {file_name}")
return True
except Exception as e:
print(f"Error during upload: {e}")
return False
def verify_upload(api_client, organization_id, source_connector_id, file_name):
"""Verify the file was uploaded successfully."""
uploads_api = v.UploadsApi(api_client)
try:
response = uploads_api.get_upload_files_from_connector(organization_id, source_connector_id)
uploaded_file = None
for file in response.files:
if file.name == file_name:
uploaded_file = file
print(f" β
Found: {file.name}")
if hasattr(file, 'metadata') and file.metadata:
print(f" π Metadata: {file.metadata}")
break
if uploaded_file:
print(f"β
Upload verified! File size: {uploaded_file.size} bytes")
return True
else:
print(f"β File {file_name} not found after upload")
return False
except Exception as e:
print(f"β Error verifying upload: {e}")
return False
def cleanup_connector(api_client, organization_id, connector_id):
"""Clean up the connector we created."""
try:
connectors_api = v.SourceConnectorsApi(api_client)
response = connectors_api.delete_source_connector(
organization_id,
connector_id
)
print(f"β
Cleaned up connector: {connector_id}")
except Exception as e:
print(f"β οΈ Warning: Could not clean up connector: {e}")
def main():
"""Main function demonstrating file upload with metadata."""
print("=== File Upload with Metadata Example ===\n")
temp_file_path = None
try:
# Get configuration
configuration, organization_id = get_api_config()
print(f"βοΈ Configuration:")
print(f" Organization ID: {organization_id}")
print(f" Host: {configuration.host}\n")
# Create sample file
temp_file_path, file_name = create_sample_pdf()
print(f"π Created sample file: {file_name}\n")
# Initialize API client with proper headers for local env
with v.ApiClient(configuration) as api_client:
# Create a file upload connector
print("π Creating File Upload Connector")
connector_id = create_file_upload_connector(api_client, organization_id)
try:
# List initial files (should be empty)
print("π Listing Files (Initial)")
initial_files = list_files(api_client, organization_id, connector_id)
# Clean up if the test file already exists
if any(f.name == file_name for f in initial_files):
print(f"ποΈ Cleaning up existing file: {file_name}")
uploads_api = v.UploadsApi(api_client)
try:
uploads_api.delete_file_from_connector(organization_id, connector_id, file_name)
time.sleep(2) # Wait for deletion
except Exception as e:
print(f"β οΈ Warning: Could not delete existing file: {e}")
# Upload file with metadata
print("π€ Uploading File with Metadata")
upload_success = upload_file(api_client, organization_id, connector_id, temp_file_path, file_name)
if upload_success:
# Wait for processing
print("β³ Waiting for file processing...")
time.sleep(5) # Reduced wait time for example
# Verify upload
print("\nπ Verifying Upload")
verification_success = verify_upload(api_client, organization_id, connector_id, file_name)
# List files again to see the uploaded file
print("\nπ Listing Files (After Upload)")
final_files = list_files(api_client, organization_id, connector_id)
if verification_success:
print(f"\nπ File upload with metadata completed successfully!")
else:
print(f"\nβ οΈ Upload completed but verification failed")
else:
print(f"\nβ File upload failed")
finally:
# Clean up the connector
print(f"\nπ§Ή Cleaning Up")
cleanup_connector(api_client, organization_id, connector_id)
except ValueError as e:
print(f"β Configuration Error: {e}")
print("\nπ‘ Make sure to set the required environment variables:")
print(" export VECTORIZE_ORGANIZATION_ID='your-org-id'")
print(" export VECTORIZE_API_KEY='your-api-key'")
sys.exit(1)
except Exception as e:
print(f"β Error: {e}")
sys.exit(1)
finally:
# Clean up temp file
if temp_file_path and os.path.exists(temp_file_path):
os.unlink(temp_file_path)
if __name__ == "__main__":
main()
β’ `VECTORIZE_API_KEY`
β’ `VECTORIZE_ORGANIZATION_ID`
Required Files:
β’ `example.txt` β’ A text file to upload
Additional Requirements:
β’ Creates a source connector in the example
#!/usr/bin/env node
/**
* Complete example for file upload operations.
* This is a hand-written example that corresponds to the test file:
* api-clients/javascript/tests/connectors/file_upload/file_upload.js
*
* IMPORTANT: Keep this file in sync with the test file's snippets!
*/
const vectorize = require('@vectorize-io/vectorize-client');
const fs = require('fs');
const path = require('path');
const os = require('os');
// For test environment, use test configuration
function getApiConfig() {
// Check if we're in test environment
if (process.env.VECTORIZE_TEST_MODE === 'true') {
const testConfigPath = path.join(__dirname, '../common/test_config.js');
if (fs.existsSync(testConfigPath)) {
const { getApiClient } = require(testConfigPath);
const { apiConfig, config } = getApiClient();
return { apiClient: apiConfig, organizationId: config.organization_id };
}
}
// Fall back to environment variables
const organizationId = process.env.VECTORIZE_ORGANIZATION_ID;
const apiKey = process.env.VECTORIZE_API_KEY;
if (!organizationId || !apiKey) {
throw new Error("Please set VECTORIZE_ORGANIZATION_ID and VECTORIZE_API_KEY environment variables");
}
// Always use production API
const basePath = 'https://api.vectorize.io/v1';
const configuration = new vectorize.Configuration({
basePath: basePath,
accessToken: apiKey
});
return { apiClient: configuration, organizationId };
}
async function main() {
// Initialize the API client
const { apiClient, organizationId } = getApiConfig();
// Create a file upload connector for this example
console.log('π Creating file upload connector...');
const connectorsApi = new vectorize.SourceConnectorsApi(apiClient);
const connectorResponse = await connectorsApi.createSourceConnector({
organizationId: organizationId,
createSourceConnectorRequest: {
name: `file-upload-example-${Date.now()}`,
type: 'FILE_UPLOAD',
config: {}
}
});
const sourceConnectorId = connectorResponse.connector.id;
console.log(`β
Created connector: ${sourceConnectorId}\n`);
try {
// ============================================================================
// SNIPPET: list_files_2
// List all files that have been uploaded to a connector
// ============================================================================
console.log('π Listing files in connector...');
{
const { UploadsApi } = vectorize;
// Create API instance
const uploadsApi = new UploadsApi(apiClient);
// List files
let response;
try {
response = await uploadsApi.getUploadFilesFromConnector({
organizationId: organizationId,
connectorId: sourceConnectorId
});
console.log(`Found ${response.files.length} files in connector`);
for (const file of response.files) {
console.log(` π ${file.name} (${file.size.toLocaleString()} bytes, Uploaded: ${file.lastModified})`);
if (file.metadata) {
console.log(` Metadata: ${file.metadata}`);
}
console.log();
}
} catch (error) {
console.log(`Error listing files: ${error.message}`);
// Set response to empty structure so example can continue
response = { files: [] };
}
}
// ============================================================================
// SNIPPET: upload_file_2
// Upload a file to a connector with metadata
// ============================================================================
console.log('\nπ€ Uploading file with metadata...');
{
// Create a temporary test file for this example
const filePath = path.join(os.tmpdir(), 'example-document.txt');
const fileName = 'example-document.txt';
fs.writeFileSync(filePath, 'This is an example document for file upload testing.\nIt demonstrates uploading files with metadata to Vectorize.');
try {
const { UploadsApi } = vectorize;
// Create API instances
const uploadsApi = new UploadsApi(apiClient);
// File details
const contentType = "text/plain"; // Set appropriate content type
// Optional metadata - all values as strings
let metadata = {
"category": "research",
"tags": "machine-learning,2024", // Store as comma-separated string
"processed": "false" // Store boolean as string
};
let uploadResponse;
try {
// Step 1: Get upload URL
const startResponse = await uploadsApi.startFileUploadToConnector({
organizationId: organizationId,
connectorId: sourceConnectorId,
startFileUploadToConnectorRequest: {
name: fileName,
contentType: contentType,
metadata: metadata ? JSON.stringify(metadata) : undefined // Convert to JSON string
}
});
// Step 2: Upload file to the URL
const fileBuffer = fs.readFileSync(filePath);
const fileStats = fs.statSync(filePath);
uploadResponse = await fetch(startResponse.uploadUrl, {
method: 'PUT',
body: fileBuffer,
headers: {
'Content-Type': contentType,
'Content-Length': fileStats.size.toString()
}
});
if (uploadResponse.status !== 200) {
const errorText = await uploadResponse.text();
console.log(`Upload failed: ${errorText}`);
} else {
console.log(`Successfully uploaded ${fileName}`);
}
} catch (error) {
console.log(`Error during upload: ${error.message}`);
}
} finally {
// Clean up temporary file
if (fs.existsSync(filePath)) {
fs.unlinkSync(filePath);
}
}
}
// Wait for file processing
console.log('\nβ³ Waiting for file processing...');
await new Promise(resolve => setTimeout(resolve, 3000));
// Upload additional files to demonstrate metadata querying
console.log('\nπ€ Uploading additional files for metadata demo...');
const additionalFiles = [
{
name: "engineering_spec.txt",
content: "Engineering specification document.\nThis contains technical requirements.",
metadata: {
department: "engineering",
year: "2024",
confidential: "true"
}
},
{
name: "research_paper.txt",
content: "Research paper on AI and machine learning.\nAuthored by multiple researchers.",
metadata: {
authors: ["John Doe", "Jane Smith"],
project: {
name: "AI Research",
phase: "development"
},
tags: ["ml", "nlp", "research"]
}
}
];
const uploadsApi = new vectorize.UploadsApi(apiClient);
for (const fileInfo of additionalFiles) {
const tempPath = path.join(os.tmpdir(), fileInfo.name);
fs.writeFileSync(tempPath, fileInfo.content);
try {
// Get upload URL with metadata
const startResponse = await uploadsApi.startFileUploadToConnector({
organizationId: organizationId,
connectorId: sourceConnectorId,
startFileUploadToConnectorRequest: {
name: fileInfo.name,
contentType: "text/plain",
metadata: JSON.stringify(fileInfo.metadata)
}
});
// Upload file
const fileContent = fs.readFileSync(tempPath);
const uploadResponse = await fetch(startResponse.uploadUrl, {
method: 'PUT',
body: fileContent,
headers: {
'Content-Type': 'text/plain',
'Content-Length': fs.statSync(tempPath).size
}
});
if (uploadResponse.ok) {
console.log(`β
Uploaded ${fileInfo.name} with metadata`);
} else {
console.log(`β Failed to upload ${fileInfo.name}`);
}
} catch (error) {
console.log(`β Error uploading ${fileInfo.name}: ${error.message}`);
} finally {
// Clean up temp file
if (fs.existsSync(tempPath)) {
fs.unlinkSync(tempPath);
}
}
}
// Wait for processing
console.log('\nβ³ Waiting for files to process...');
await new Promise(resolve => setTimeout(resolve, 3000));
// ============================================================================
// SNIPPET: query_files_by_metadata_2
// Query and filter files by their metadata
// ============================================================================
console.log('\nπ Querying files by metadata...');
{
const { UploadsApi } = vectorize;
// Create API instance
const uploadsApi = new UploadsApi(apiClient);
// Get all files from connector
const response = await uploadsApi.getUploadFilesFromConnector({
organizationId: organizationId,
connectorId: sourceConnectorId
});
// Filter files by metadata
for (const file of response.files) {
if (file.metadata) {
// Metadata might be returned as string or object depending on API version
try {
let metadata;
if (typeof file.metadata === 'string') {
metadata = JSON.parse(file.metadata);
} else {
metadata = file.metadata;
}
// Example: Find all engineering department files
if (metadata.department === "engineering") {
console.log(`Engineering file: ${file.name}`);
console.log(` Year: ${metadata.year || 'N/A'}`);
console.log(` Confidential: ${metadata.confidential || 'false'}`);
}
// Example: Find files by author
if (metadata.authors) {
console.log(`Research file: ${file.name}`);
console.log(` Authors: ${metadata.authors.join(', ')}`);
}
// Example: Find files by tags
if (metadata.tags) {
console.log(`Tagged file: ${file.name}`);
console.log(` Tags: ${metadata.tags}`);
}
} catch (error) {
console.log(`Warning: Could not process metadata for ${file.name}: ${error.message}`);
}
}
}
}
} finally {
// Clean up: Delete the connector
console.log('\nπ§Ή Cleaning up...');
try {
await connectorsApi.deleteSourceConnector({
organizationId: organizationId,
sourceConnectorId: sourceConnectorId
});
console.log('β
Deleted test connector');
} catch (error) {
console.error('Warning: Could not delete test connector:', error.message);
process.exit(1); }
}
}
// Run the example
if (require.main === module) {
main().catch(error => {
console.error('β Error:', error);
process.exit(1);
});
}
module.exports = { main };