Skip to main content

Upload Files to File Upload Connectors

Learn how to programmatically manage files in your File Upload connectors using the Vectorize API.

Before You Start
This guide assumes you've already set up your Vectorize API client and have access to your organization’s API key and ID.

What are File Upload Connectors?​

File Upload connectors allow you to manually upload files for processing by your RAG pipelines. Unlike automated connectors that sync from external sources (like AWS S3 or Google Drive), File Upload connectors give you direct control over which files to process and when.

List Files in a Connector​

Use the Uploads API to list all files currently in your connector.

import vectorize_client as v

# Create API instance
uploads_api = v.UploadsApi(apiClient)

# List files
try:
response = uploads_api.get_upload_files_from_connector(organization_id, source_connector_id)
print(f"Found {len(response.files)} files in connector")

for file in response.files:
print(f" πŸ“„ {file.name} ({file.size:,} bytes, Uploaded: {file.last_modified})")
if file.metadata:
print(f" Metadata: {file.metadata}")
print()

except Exception as e:
print(f"Error listing files: {e}")

Upload a File​

Uploading a file to a connector is a two-step process:

  1. Request a pre-signed upload URL from the API
  2. Upload your file to that URL
import vectorize_client as v
import urllib3
import os
import json

# Create API instances
uploads_api = v.UploadsApi(apiClient)

# File details
content_type = "application/pdf" # Set appropriate content type

# Optional metadata - all values as strings
metadata = {
"category": "research",
"tags": "machine-learning,2024", # Store as comma-separated string
"processed": "false" # Store boolean as string
}

try:
# Step 1: Get upload URL
start_response = uploads_api.start_file_upload_to_connector(
organization_id,
source_connector_id,
start_file_upload_to_connector_request=v.StartFileUploadToConnectorRequest(
name=file_name,
content_type=content_type,
metadata=json.dumps(metadata) if metadata else None # Convert to JSON string
)
)

# Step 2: Upload file to the URL
http = urllib3.PoolManager()

with open(file_path, "rb") as f:
response = http.request(
"PUT",
start_response.upload_url,
body=f,
headers={
"Content-Type": content_type,
"Content-Length": str(os.path.getsize(file_path))
}
)

if response.status != 200:
print(f"Upload failed: {response.data}")
else:
print(f"Successfully uploaded {file_name}")

except Exception as e:
print(f"Error during upload: {e}")
note

If a file with the same name already exists in the connector, it will be overwritten.

Working with Metadata​

Metadata allows you to attach additional information to your files that will be preserved throughout processing and can be used for filtering and organization in your RAG pipelines.

Metadata Examples​

note

The following examples show metadata structures, but remember that all metadata values must be strings when uploading. Complex types will be converted to JSON strings.

# Simple key-value pairs
metadata = {
"department": "engineering",
"year": "2024", # Numbers as strings
"confidential": "true" # Booleans as strings
}

# Arrays and nested objects (will be JSON stringified)
metadata = {
"authors": ["John Doe", "Jane Smith"],
"project": {
"name": "AI Research",
"phase": "development"
},
"tags": ["ml", "nlp", "research"]
}

# When uploading, convert to JSON string:
metadata_string = json.dumps(metadata)

Retrieving Files with Metadata​

When you list files, the metadata is included in the response:

response = uploads_api.get_upload_files_from_connector(organization_id, connector_id)
for file in response.files:
if file.metadata and file.metadata.get("department") == "engineering":
print(f"Engineering file: {file.name}")

Complete Example​

Here's all the code from this guide combined into a complete, runnable example:

Required Environment Variables:
β€’ `VECTORIZE_API_KEY`
β€’ `VECTORIZE_ORGANIZATION_ID`

Required Files:
β€’ `example.txt` β€’ A text file to upload

Additional Requirements:
β€’ Creates a source connector in the example
#!/usr/bin/env python3
"""
Complete example for file upload with metadata.
This is a hand-written example that corresponds to the test file:
api-clients/python/tests/connectors/file_upload/file_upload.py

IMPORTANT: Keep this file in sync with the test file's snippets!
"""

import os
import sys
import json
import time
import tempfile
import urllib3
import vectorize_client as v


def get_api_config():
"""Get API configuration from environment variables."""
organization_id = os.environ.get("VECTORIZE_ORGANIZATION_ID")
api_key = os.environ.get("VECTORIZE_API_KEY")

if not organization_id or not api_key:
print("πŸ”‘ Setup required:")
print("1. Get your API key from: https://app.vectorize.io/settings")
print("2. Set environment variables:")
print(" export VECTORIZE_ORGANIZATION_ID='your-org-id'")
print(" export VECTORIZE_API_KEY='your-api-key'")
sys.exit(1)

# Always use production API
configuration = v.Configuration(
host="https://api.vectorize.io/v1",
access_token=api_key
)

return configuration, organization_id


def create_file_upload_connector(api_client, organization_id):
"""Create a file upload connector for testing."""
connectors_api = v.SourceConnectorsApi(api_client)

try:
# Create the FileUpload object
file_upload = v.FileUpload(
name=f"file-upload-example",
type="FILE_UPLOAD",
config={}
)

# Create the request with the FileUpload object
request = v.CreateSourceConnectorRequest(file_upload)

response = connectors_api.create_source_connector(
organization_id,
request
)

print(f"βœ… Created file upload connector: {response.connector.name}")
print(f" Connector ID: {response.connector.id}\n")

return response.connector.id

except Exception as e:
print(f"❌ Error creating file upload connector: {e}")
raise


def list_files(api_client, organization_id, source_connector_id):
"""List files in the connector."""
# Create API instance
uploads_api = v.UploadsApi(api_client)

# List files
try:
response = uploads_api.get_upload_files_from_connector(organization_id, source_connector_id)
print(f"Found {len(response.files)} files in connector")

for file in response.files:
print(f" πŸ“„ {file.name} ({file.size:,} bytes, Uploaded: {file.last_modified})")
if file.metadata:
print(f" Metadata: {file.metadata}")
print()

return response.files

except Exception as e:
print(f"Error listing files: {e}")
return []


def create_sample_pdf():
"""Create a sample PDF file for demonstration."""
# Create a simple text file to use as our example file
# In a real scenario, you'd provide your own PDF file
sample_content = """Sample Document for File Upload

This is a sample document that demonstrates file upload functionality with metadata.

Key points:
- Files can be uploaded with custom metadata
- Metadata is stored as JSON
- Files are processed asynchronously

For more information, visit the Vectorize documentation.
"""

# Create temporary file
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f:
f.write(sample_content)
temp_path = f.name

return temp_path, "sample_document.txt"


def upload_file(api_client, organization_id, source_connector_id, file_path, file_name):
"""Upload a file with metadata to the connector."""
# Create API instances
uploads_api = v.UploadsApi(api_client)

# File details
content_type = "text/plain" # Set appropriate content type

# Optional metadata - all values as strings
metadata = {
"category": "research",
"tags": "machine-learning,2024", # Store as comma-separated string
"processed": "false" # Store boolean as string
}

try:
# Step 1: Get upload URL
start_response = uploads_api.start_file_upload_to_connector(
organization_id,
source_connector_id,
start_file_upload_to_connector_request=v.StartFileUploadToConnectorRequest(
name=file_name,
content_type=content_type,
metadata=json.dumps(metadata) if metadata else None # Convert to JSON string
)
)

# Step 2: Upload file to the URL
http = urllib3.PoolManager()

with open(file_path, "rb") as f:
response = http.request(
"PUT",
start_response.upload_url,
body=f,
headers={
"Content-Type": content_type,
"Content-Length": str(os.path.getsize(file_path))
}
)

if response.status != 200:
print(f"Upload failed: {response.data}")
return False
else:
print(f"Successfully uploaded {file_name}")
return True

except Exception as e:
print(f"Error during upload: {e}")
return False


def verify_upload(api_client, organization_id, source_connector_id, file_name):
"""Verify the file was uploaded successfully."""
uploads_api = v.UploadsApi(api_client)

try:
response = uploads_api.get_upload_files_from_connector(organization_id, source_connector_id)

uploaded_file = None
for file in response.files:
if file.name == file_name:
uploaded_file = file
print(f" βœ… Found: {file.name}")
if hasattr(file, 'metadata') and file.metadata:
print(f" πŸ“‹ Metadata: {file.metadata}")
break

if uploaded_file:
print(f"βœ… Upload verified! File size: {uploaded_file.size} bytes")
return True
else:
print(f"❌ File {file_name} not found after upload")
return False

except Exception as e:
print(f"❌ Error verifying upload: {e}")
return False


def cleanup_connector(api_client, organization_id, connector_id):
"""Clean up the connector we created."""
try:
connectors_api = v.SourceConnectorsApi(api_client)
response = connectors_api.delete_source_connector(
organization_id,
connector_id
)
print(f"βœ… Cleaned up connector: {connector_id}")
except Exception as e:
print(f"⚠️ Warning: Could not clean up connector: {e}")


def main():
"""Main function demonstrating file upload with metadata."""
print("=== File Upload with Metadata Example ===\n")

temp_file_path = None

try:
# Get configuration
configuration, organization_id = get_api_config()

print(f"βš™οΈ Configuration:")
print(f" Organization ID: {organization_id}")
print(f" Host: {configuration.host}\n")

# Create sample file
temp_file_path, file_name = create_sample_pdf()
print(f"πŸ“„ Created sample file: {file_name}\n")

# Initialize API client with proper headers for local env
with v.ApiClient(configuration) as api_client:
# Create a file upload connector
print("πŸ“ Creating File Upload Connector")
connector_id = create_file_upload_connector(api_client, organization_id)

try:
# List initial files (should be empty)
print("πŸ“‹ Listing Files (Initial)")
initial_files = list_files(api_client, organization_id, connector_id)

# Clean up if the test file already exists
if any(f.name == file_name for f in initial_files):
print(f"πŸ—‘οΈ Cleaning up existing file: {file_name}")
uploads_api = v.UploadsApi(api_client)
try:
uploads_api.delete_file_from_connector(organization_id, connector_id, file_name)
time.sleep(2) # Wait for deletion
except Exception as e:
print(f"⚠️ Warning: Could not delete existing file: {e}")

# Upload file with metadata
print("πŸ“€ Uploading File with Metadata")
upload_success = upload_file(api_client, organization_id, connector_id, temp_file_path, file_name)

if upload_success:
# Wait for processing
print("⏳ Waiting for file processing...")
time.sleep(5) # Reduced wait time for example

# Verify upload
print("\nπŸ” Verifying Upload")
verification_success = verify_upload(api_client, organization_id, connector_id, file_name)

# List files again to see the uploaded file
print("\nπŸ“‹ Listing Files (After Upload)")
final_files = list_files(api_client, organization_id, connector_id)

if verification_success:
print(f"\nπŸŽ‰ File upload with metadata completed successfully!")
else:
print(f"\n⚠️ Upload completed but verification failed")
else:
print(f"\n❌ File upload failed")

finally:
# Clean up the connector
print(f"\n🧹 Cleaning Up")
cleanup_connector(api_client, organization_id, connector_id)

except ValueError as e:
print(f"❌ Configuration Error: {e}")
print("\nπŸ’‘ Make sure to set the required environment variables:")
print(" export VECTORIZE_ORGANIZATION_ID='your-org-id'")
print(" export VECTORIZE_API_KEY='your-api-key'")
sys.exit(1)

except Exception as e:
print(f"❌ Error: {e}")
sys.exit(1)

finally:
# Clean up temp file
if temp_file_path and os.path.exists(temp_file_path):
os.unlink(temp_file_path)


if __name__ == "__main__":
main()

Was this page helpful?