Upload Files to File Upload Connectors
Learn how to programmatically manage files in your File Upload connectors using the Vectorize API.
What are File Upload Connectors?โ
File Upload connectors allow you to manually upload files for processing by your RAG pipelines. Unlike automated connectors that sync from external sources (like AWS S3 or Google Drive), File Upload connectors give you direct control over which files to process and when.
List Files in a Connectorโ
Use the Uploads API to list all files currently in your connector.
- Python
- Node.js
import vectorize_client as v
# Create API instance
uploads_api = v.UploadsApi(apiClient)
# List files
try:
response = uploads_api.get_upload_files_from_connector(organization_id, source_connector_id)
print(f"Found {len(response.files)} files in connector")
for file in response.files:
print(f" ๐ {file.name} ({file.size:,} bytes, Uploaded: {file.last_modified})")
if file.metadata:
print(f" Metadata: {file.metadata}")
print()
except Exception as e:
print(f"Error listing files: {e}")
// This snippet uses async operations and should be run in an async context
(async () => {
const vectorize = require('@vectorize-io/vectorize-client')
const { UploadsApi } = vectorize;
// Create API instance
const uploadsApi = new UploadsApi(apiClient);
// List files
let response; // Declare response outside try block for test execution
try {
response = await uploadsApi.getUploadFilesFromConnector({
organizationId: "your-org-id",
connectorId: sourceConnectorId
});
console.log(`Found ${response.files.length} files in connector`);
for (const file of response.files) {
console.log(` ๐ ${file.name} (${file.size.toLocaleString()} bytes, Uploaded: ${file.lastModified})`);
if (file.metadata) {
console.log(` Metadata: ${file.metadata}`);
}
console.log();
}
} catch (error) {
console.log(`Error listing files: ${error.message}`);
// Set response to empty structure so test can continue
response = { files: [] };
}
})();
Upload a Fileโ
Uploading a file to a connector is a two-step process:
- Request a pre-signed upload URL from the API
- Upload your file to that URL
- Python
- Node.js
import vectorize_client as v
import urllib3
import os
import json
# Create API instances
uploads_api = v.UploadsApi(apiClient)
# File details
content_type = "application/pdf" # Set appropriate content type
# Optional metadata - all values as strings
metadata = {
"category": "research",
"tags": "machine-learning,2024", # Store as comma-separated string
"processed": "false" # Store boolean as string
}
try:
# Step 1: Get upload URL
start_response = uploads_api.start_file_upload_to_connector(
organization_id,
source_connector_id,
start_file_upload_to_connector_request=v.StartFileUploadToConnectorRequest(
name=file_name,
content_type=content_type,
metadata=json.dumps(metadata) if metadata else None # Convert to JSON string
)
)
# Step 2: Upload file to the URL
http = urllib3.PoolManager()
with open(file_path, "rb") as f:
response = http.request(
"PUT",
start_response.upload_url,
body=f,
headers={
"Content-Type": content_type,
"Content-Length": str(os.path.getsize(file_path))
}
)
if response.status != 200:
print(f"Upload failed: {response.data}")
else:
print(f"Successfully uploaded {file_name}")
except Exception as e:
print(f"Error during upload: {e}")
// This snippet uses async operations and should be run in an async context
(async () => {
const vectorize = require('@vectorize-io/vectorize-client')
const fs = require('fs')
const { UploadsApi } = vectorize;
// Create API instances
const uploadsApi = new UploadsApi(apiClient);
// File details
const contentType = "application/pdf"; // Set appropriate content type
// Optional metadata - all values as strings
const metadata = {
"category": "research",
"tags": "machine-learning,2024", // Store as comma-separated string
"processed": "false" // Store boolean as string
};
let uploadResponse;
try {
// Step 1: Get upload URL
const startResponse = await uploadsApi.startFileUploadToConnector({
organizationId: "your-org-id",
connectorId: sourceConnectorId,
startFileUploadToConnectorRequest: {
name: fileName,
contentType: contentType,
metadata: metadata ? JSON.stringify(metadata) : undefined // Convert to JSON string
}
});
// Step 2: Upload file to the URL
const fileBuffer = fs.readFileSync(filePath);
const fileStats = fs.statSync(filePath);
uploadResponse = await fetch(startResponse.uploadUrl, {
method: 'PUT',
body: fileBuffer,
headers: {
'Content-Type': contentType,
'Content-Length': fileStats.size.toString()
}
});
if (uploadResponse.status !== 200) {
const errorText = await uploadResponse.text();
console.log(`Upload failed: ${errorText}`);
} else {
console.log(`Successfully uploaded ${fileName}`);
}
} catch (error) {
console.log(`Error during upload: ${error.message}`);
}
})();
If a file with the same name already exists in the connector, it will be overwritten.
Working with Metadataโ
Metadata allows you to attach additional information to your files that will be preserved throughout processing and can be used for filtering and organization in your RAG pipelines.
Metadata Examplesโ
The following examples show metadata structures, but remember that all metadata values must be strings when uploading. Complex types will be converted to JSON strings.
# Simple key-value pairs
metadata = {
"department": "engineering",
"year": "2024", # Numbers as strings
"confidential": "true" # Booleans as strings
}
# Arrays and nested objects (will be JSON stringified)
metadata = {
"authors": ["John Doe", "Jane Smith"],
"project": {
"name": "AI Research",
"phase": "development"
},
"tags": ["ml", "nlp", "research"]
}
# When uploading, convert to JSON string:
metadata_string = json.dumps(metadata)
Retrieving Files with Metadataโ
When you list files, the metadata is included in the response:
response = uploads_api.get_upload_files_from_connector(organization_id, connector_id)
for file in response.files:
if file.metadata and file.metadata.get("department") == "engineering":
print(f"Engineering file: {file.name}")
Complete Exampleโ
Here's all the code from this guide combined into a complete, runnable example:
- Python
- Node.js
โข `VECTORIZE_API_KEY`
โข `VECTORIZE_ORGANIZATION_ID`
Required Files:
โข `example.txt` โข A text file to upload
Additional Requirements:
โข Creates a source connector in the example
import json
import os
import urllib3
import vectorize_client as v
def main():
# Initialize the API client
apiClient = v.ApiClient(v.Configuration(
api_key=os.environ.get("VECTORIZE_API_KEY"),
host="https://api.vectorize.io"
))
# Set organization ID from environment
organization_id = os.environ.get("VECTORIZE_ORGANIZATION_ID")
# List Files 2
# Create API instance
uploads_api = v.UploadsApi(apiClient)
# List files
try:
response = uploads_api.get_upload_files_from_connector(organization_id, source_connector_id)
print(f"Found {len(response.files)} files in connector")
for file in response.files:
print(f" ๐ {file.name} ({file.size:,} bytes, Uploaded: {file.last_modified})")
if file.metadata:
print(f" Metadata: {file.metadata}")
print()
except Exception as e:
print(f"Error listing files: {e}")
# Upload File 2
# Create API instances
uploads_api = v.UploadsApi(apiClient)
# File details
content_type = "application/pdf" # Set appropriate content type
# Optional metadata - all values as strings
metadata = {
"category": "research",
"tags": "machine-learning,2024", # Store as comma-separated string
"processed": "false" # Store boolean as string
}
try:
# Step 1: Get upload URL
start_response = uploads_api.start_file_upload_to_connector(
organization_id,
source_connector_id,
start_file_upload_to_connector_request=v.StartFileUploadToConnectorRequest(
name=file_name,
content_type=content_type,
metadata=json.dumps(metadata) if metadata else None # Convert to JSON string
)
)
# Step 2: Upload file to the URL
http = urllib3.PoolManager()
with open(file_path, "rb") as f:
response = http.request(
"PUT",
start_response.upload_url,
body=f,
headers={
"Content-Type": content_type,
"Content-Length": str(os.path.getsize(file_path))
}
)
if response.status != 200:
print(f"Upload failed: {response.data}")
else:
print(f"Successfully uploaded {file_name}")
except Exception as e:
print(f"Error during upload: {e}")
# Metadata Simple
# Simple metadata with all values as strings
metadata = {
"department": "engineering",
"year": "2024", # Numbers as strings
"confidential": "true" # Booleans as strings
}
# Metadata Complex
# Complex metadata with nested objects and arrays
metadata = {
"authors": ["John Doe", "Jane Smith"],
"project": {
"name": "AI Research",
"phase": "development"
},
"tags": ["ml", "nlp", "research"]
}
# When uploading, convert to JSON string:
metadata_string = json.dumps(metadata)
# Query Files By Metadata 2
# Create API instance
uploads_api = v.UploadsApi(apiClient)
# Get all files from connector
response = uploads_api.get_upload_files_from_connector(organization_id, source_connector_id)
# Filter files by metadata
for file in response.files:
if file.metadata:
# Metadata might be returned as string or dict depending on API version
try:
if isinstance(file.metadata, str):
metadata = json.loads(file.metadata)
else:
metadata = file.metadata
# Example: Find all engineering department files
if metadata.get("department") == "engineering":
print(f"Engineering file: {file.name}")
print(f" Year: {metadata.get('year', 'N/A')}")
print(f" Confidential: {metadata.get('confidential', 'false')}")
# Example: Find files by author
if "authors" in metadata:
print(f"Research file: {file.name}")
print(f" Authors: {', '.join(metadata['authors'])}")
# Example: Find files by tags
if "tags" in metadata:
print(f"Tagged file: {file.name}")
print(f" Tags: {metadata['tags']}")
except (json.JSONDecodeError, TypeError) as e:
print(f"Warning: Could not process metadata for {file.name}: {e}")
if __name__ == "__main__":
main()
โข `VECTORIZE_API_KEY`
โข `VECTORIZE_ORGANIZATION_ID`
Required Files:
โข `example.txt` โข A text file to upload
Additional Requirements:
โข Creates a source connector in the example
const vectorize = require('@vectorize-io/vectorize-client');
const fs = require('fs')
async function main() {
// Initialize the API client
const apiClient = new vectorize.ApiClient(new vectorize.Configuration({
basePath: "https://api.vectorize.io/api",
accessToken: process.env.VECTORIZE_API_KEY
}));
// Set organization ID from environment
const organizationId = process.env.VECTORIZE_ORGANIZATION_ID;
// List Files 2
const { UploadsApi } = vectorize;
// Create API instance
const uploadsApi = new UploadsApi(apiClient);
// List files
let response; // Declare response outside try block for test execution
try {
response = await uploadsApi.getUploadFilesFromConnector({
organizationId: "your-org-id",
connectorId: sourceConnectorId
});
console.log(`Found ${response.files.length} files in connector`);
for (const file of response.files) {
console.log(` ๐ ${file.name} (${file.size.toLocaleString()} bytes, Uploaded: ${file.lastModified})`);
if (file.metadata) {
console.log(` Metadata: ${file.metadata}`);
}
console.log();
}
} catch (error) {
console.log(`Error listing files: ${error.message}`);
// Set response to empty structure so test can continue
response = { files: [] };
}
// Upload File 2
const { UploadsApi } = vectorize;
// Create API instances
const uploadsApi = new UploadsApi(apiClient);
// File details
const contentType = "application/pdf"; // Set appropriate content type
// Optional metadata - all values as strings
const metadata = {
"category": "research",
"tags": "machine-learning,2024", // Store as comma-separated string
"processed": "false" // Store boolean as string
};
let uploadResponse;
try {
// Step 1: Get upload URL
const startResponse = await uploadsApi.startFileUploadToConnector({
organizationId: "your-org-id",
connectorId: sourceConnectorId,
startFileUploadToConnectorRequest: {
name: fileName,
contentType: contentType,
metadata: metadata ? JSON.stringify(metadata) : undefined // Convert to JSON string
}
});
// Step 2: Upload file to the URL
const fileBuffer = fs.readFileSync(filePath);
const fileStats = fs.statSync(filePath);
uploadResponse = await fetch(startResponse.uploadUrl, {
method: 'PUT',
body: fileBuffer,
headers: {
'Content-Type': contentType,
'Content-Length': fileStats.size.toString()
}
});
if (uploadResponse.status !== 200) {
const errorText = await uploadResponse.text();
console.log(`Upload failed: ${errorText}`);
} else {
console.log(`Successfully uploaded ${fileName}`);
}
} catch (error) {
console.log(`Error during upload: ${error.message}`);
}
// Metadata Simple
// Simple metadata with all values as strings
const metadata = {
department: "engineering",
year: "2024", // Numbers as strings
confidential: "true" // Booleans as strings
};
// Metadata Complex
// Complex metadata with nested objects and arrays
const complexMetadata = {
authors: ["John Doe", "Jane Smith"],
project: {
name: "AI Research",
phase: "development"
},
tags: ["ml", "nlp", "research"]
};
// When uploading, convert to JSON string:
const metadataString = JSON.stringify(complexMetadata);
// Query Files By Metadata 2
// COMPLETE_EXAMPLE_PREREQUISITES:
// - env_vars: VECTORIZE_API_KEY, VECTORIZE_ORGANIZATION_ID
// - files: example.txt (A text file to upload)
// - notes: Creates a source connector in the example
// - description: Upload files with metadata to a connector
const { UploadsApi } = vectorize;
// Create API instance
const uploadsApi = new UploadsApi(apiClient);
// Get all files from connector
const response = await uploadsApi.getUploadFilesFromConnector({
organizationId: "your-org-id",
connectorId: sourceConnectorId
});
// Filter files by metadata
for (const file of response.files) {
if (file.metadata) {
// Metadata might be returned as string or object depending on API version
try {
let metadata;
if (typeof file.metadata === 'string') {
metadata = JSON.parse(file.metadata);
} else {
metadata = file.metadata;
}
// Example: Find all engineering department files
if (metadata.department === "engineering") {
console.log(`Engineering file: ${file.name}`);
console.log(` Year: ${metadata.year || 'N/A'}`);
console.log(` Confidential: ${metadata.confidential || 'false'}`);
}
// Example: Find files by author
if (metadata.authors) {
console.log(`Research file: ${file.name}`);
console.log(` Authors: ${metadata.authors.join(', ')}`);
}
// Example: Find files by tags
if (metadata.tags) {
console.log(`Tagged file: ${file.name}`);
console.log(` Tags: ${metadata.tags}`);
}
} catch (error) {
console.log(`Warning: Could not process metadata for ${file.name}: ${error.message}`);
}
}
}
}
// Run the example
main().catch(console.error);