Skip to main content

Upload Files to File Upload Connectors

Learn how to programmatically manage files in your File Upload connectors using the Vectorize API.

Before You Start
This guide assumes you've already set up your Vectorize API client and have access to your organization’s API key and ID.

What are File Upload Connectors?​

File Upload connectors allow you to manually upload files for processing by your RAG pipelines. Unlike automated connectors that sync from external sources (like AWS S3 or Google Drive), File Upload connectors give you direct control over which files to process and when.

List Files in a Connector​

Use the Uploads API to list all files currently in your connector.

import vectorize_client as v

organization_id = "your-organization-id"
source_connector_id = "your-connector-id"
import vectorize_client as v

# Create API instance
uploads_api = v.UploadsApi(apiClient)

# List files
try:
response = uploads_api.get_upload_files_from_connector(organization_id, source_connector_id)
print(f"Found {len(response.files)} files in connector")

for file in response.files:
print(f" πŸ“„ {file.name} ({file.size:,} bytes, Uploaded: {file.last_modified})")
if file.metadata:
print(f" Metadata: {file.metadata}")
print()

except Exception as e:
print(f"Error listing files: {e}")

Upload a File​

Uploading a file to a connector is a two-step process:

  1. Request a pre-signed upload URL from the API
  2. Upload your file to that URL
import vectorize_client as v
import urllib3
import os
import json

organization_id = "your-organization-id"
source_connector_id = "your-connector-id"
file_path = "path/to/your/file.pdf"
file_name = "file.pdf"
import vectorize_client as v
import os
import json
import urllib3

# Create API instances
uploads_api = v.UploadsApi(apiClient)

# File details
content_type = "application/pdf" # Set appropriate content type

# Optional metadata - all values as strings
metadata = {
"category": "research",
"tags": "machine-learning,2024", # Store as comma-separated string
"processed": "false" # Store boolean as string
}

try:
# Step 1: Get upload URL
start_response = uploads_api.start_file_upload_to_connector(
organization_id,
source_connector_id,
start_file_upload_to_connector_request=v.StartFileUploadToConnectorRequest(
name=file_name,
content_type=content_type,
metadata=json.dumps(metadata) if metadata else None # Convert to JSON string
)
)

# Step 2: Upload file to the URL
http = urllib3.PoolManager()

with open(file_path, "rb") as f:
response = http.request(
"PUT",
start_response.upload_url,
body=f,
headers={
"Content-Type": content_type,
"Content-Length": str(os.path.getsize(file_path))
}
)

if response.status != 200:
print(f"Upload failed: {response.data}")
else:
print(f"Successfully uploaded {file_name}")

except Exception as e:
print(f"Error during upload: {e}")
note

If a file with the same name already exists in the connector, it will be overwritten.

Working with Metadata​

Metadata allows you to attach additional information to your files that will be preserved throughout processing and can be used for filtering and organization in your RAG pipelines.

Metadata Examples​

note

The following examples show metadata structures, but remember that all metadata values must be strings when uploading. Complex types will be converted to JSON strings.

# Simple key-value pairs
metadata = {
"department": "engineering",
"year": "2024", # Numbers as strings
"confidential": "true" # Booleans as strings
}

# Arrays and nested objects (will be JSON stringified)
metadata = {
"authors": ["John Doe", "Jane Smith"],
"project": {
"name": "AI Research",
"phase": "development"
},
"tags": ["ml", "nlp", "research"]
}

# When uploading, convert to JSON string:
metadata_string = json.dumps(metadata)

Retrieving Files with Metadata​

When you list files, the metadata is included in the response:

response = uploads_api.get_upload_files_from_connector(organization_id, connector_id)
for file in response.files:
if file.metadata and file.metadata.get("department") == "engineering":
print(f"Engineering file: {file.name}")

Complete Example

Here's all the code from this guide combined into a complete, runnable example:

import vectorize_client as v
import os

# Get credentials from environment variables
organization_id = os.environ.get("VECTORIZE_ORGANIZATION_ID")
api_key = os.environ.get("VECTORIZE_API_KEY")

if not organization_id or not api_key:
raise ValueError("Please set VECTORIZE_ORGANIZATION_ID and VECTORIZE_API_KEY environment variables")

# Initialize the API client
configuration = v.Configuration(
host="https://api.vectorize.io",
api_key={"ApiKeyAuth": api_key}
)
api = v.ApiClient(configuration)

print(f"βœ… API client initialized for organization: {organization_id}")

import vectorize_client as v

organization_id = "your-organization-id"
source_connector_id = "your-connector-id"

import vectorize_client as v

# Create API instance
uploads_api = v.UploadsApi(apiClient)

# List files
try:
response = uploads_api.get_upload_files_from_connector(organization_id, source_connector_id)
print(f"Found {len(response.files)} files in connector")

for file in response.files:
print(f" πŸ“„ {file.name} ({file.size:,} bytes, Uploaded: {file.last_modified})")
if file.metadata:
print(f" Metadata: {file.metadata}")
print()

except Exception as e:
print(f"Error listing files: {e}")

import vectorize_client as v
import urllib3
import os
import json

organization_id = "your-organization-id"
source_connector_id = "your-connector-id"
file_path = "path/to/your/file.pdf"
file_name = "file.pdf"

import vectorize_client as v
import os
import json
import urllib3

# Create API instances
uploads_api = v.UploadsApi(apiClient)

# File details
content_type = "application/pdf" # Set appropriate content type

# Optional metadata - all values as strings
metadata = {
"category": "research",
"tags": "machine-learning,2024", # Store as comma-separated string
"processed": "false" # Store boolean as string
}

try:
# Step 1: Get upload URL
start_response = uploads_api.start_file_upload_to_connector(
organization_id,
source_connector_id,
start_file_upload_to_connector_request=v.StartFileUploadToConnectorRequest(
name=file_name,
content_type=content_type,
metadata=json.dumps(metadata) if metadata else None # Convert to JSON string
)
)

# Step 2: Upload file to the URL
http = urllib3.PoolManager()

with open(file_path, "rb") as f:
response = http.request(
"PUT",
start_response.upload_url,
body=f,
headers={
"Content-Type": content_type,
"Content-Length": str(os.path.getsize(file_path))
}
)

if response.status != 200:
print(f"Upload failed: {response.data}")
else:
print(f"Successfully uploaded {file_name}")

except Exception as e:
print(f"Error during upload: {e}")

Was this page helpful?