Skip to main content

Generate a Private Deep Research on a pipeline

Beta Feature

The API and Deep Research are currently in beta. Features and configuration may change.

Prerequisites

Before you begin, you'll need:

  1. A Vectorize account
  2. An API access token (how to create one)
  3. Your organization ID (see below)
  4. A pipeline ID (see below)

Finding your Organization ID

Your organization ID is in the Vectorize platform URL:

https://platform.vectorize.io/organization/[YOUR-ORG-ID]

For example, if your URL is:

https://platform.vectorize.io/organization/ecf3fa1d-30d0-4df1-8af6-f4852bc851cb

Your organization ID is: ecf3fa1d-30d0-4df1-8af6-f4852bc851cb

Finding your Pipeline ID

Navigate to your pipeline in the Vectorize platform. The pipeline ID is shown in:

  • The URL: https://platform.vectorize.io/organization/[org-id]/pipeline/[PIPELINE-ID]
  • The pipeline details page
  • The "Connect" tab of your pipeline

Generate the Deep Research

With your pipeline ID ready, you can now start a deep research task. This will analyze your pipeline's data to generate comprehensive insights based on your query.

import vectorize_client as v

# Create API interface
pipelines_api = v.PipelinesApi(apiClient)

# Start deep research
response = pipelines_api.start_deep_research(
organization_id,
pipeline_id,
v.StartDeepResearchRequest(
query="What is the meaning of life?",
web_search=True # Enable web search for comprehensive results
)
)

research_id = response.research_id
print(f"Research started with ID: {research_id}")

Get the Deep Research result

Deep Research tasks run asynchronously. Use the research ID returned from the previous step to check the status and retrieve your results.

import vectorize_client as v
import time

# Create API interface
pipelines_api = v.PipelinesApi(apiClient)

# Check research status and get results
max_attempts = 60 # Maximum 5 minutes (60 * 5 seconds)
attempt = 0

while attempt < max_attempts:
try:
response = pipelines_api.get_deep_research_result(
organization_id,
pipeline_id,
research_id
)

if response.ready:
if response.data.success:
print("Research completed successfully!")
print(response.data.markdown)
else:
print("Research failed:", response.data.error)
break

print("Research in progress...")
time.sleep(5) # Wait 5 seconds before checking again
attempt += 1

except Exception as e:
print(f"Error checking research status: {e}")
raise

if attempt >= max_attempts:
print("Research timed out after 5 minutes")

Complete Example

Here's all the code from this guide combined into a complete, runnable example:

Required Environment Variables:
• `VECTORIZE_API_KEY`
• `VECTORIZE_ORGANIZATION_ID`

Additional Requirements:
• Requires a pipeline ID configured for deep research
#!/usr/bin/env python3
"""
Complete example for deep research queries.
This is a hand-written example that corresponds to the test file:
api-clients/python/tests/pipelines/deep_research.py

IMPORTANT: Keep this file in sync with the test file's snippets!
"""

import os
import sys
import time
import vectorize_client as v


def get_api_config():
"""Get API configuration from environment variables."""
organization_id = os.environ.get("VECTORIZE_ORGANIZATION_ID")
api_key = os.environ.get("VECTORIZE_API_KEY")

if not organization_id or not api_key:
print("🔑 Setup required:")
print("1. Get your API key from: https://app.vectorize.io/settings")
print("2. Set environment variables:")
print(" export VECTORIZE_ORGANIZATION_ID='your-org-id'")
print(" export VECTORIZE_API_KEY='your-api-key'")
sys.exit(1)

# Always use production API
configuration = v.Configuration(
host="https://api.vectorize.io/v1",
access_token=api_key
)

return configuration, organization_id


def create_pipeline_for_research(api_client, organization_id):
"""Create a pipeline for deep research."""
print("🚀 Creating pipeline for deep research...")

# Get required connector IDs from environment
ai_platform_connector_id = os.environ.get("VECTORIZE_AI_PLATFORM_CONNECTOR_ID")
destination_connector_id = os.environ.get("VECTORIZE_DESTINATION_CONNECTOR_ID")

if not ai_platform_connector_id or not destination_connector_id:
print("❌ Missing required connector IDs")
print(" Please set:")
print(" - VECTORIZE_AI_PLATFORM_CONNECTOR_ID")
print(" - VECTORIZE_DESTINATION_CONNECTOR_ID")
print("\n💡 Run get_vectorize_connectors.py to find your VECTORIZE connector IDs")
sys.exit(1)

# First, create a source connector
connectors_api = v.SourceConnectorsApi(api_client)

try:
# Create file upload connector
file_upload = v.FileUpload(
name="deep-research-source",
type="FILE_UPLOAD",
config={}
)

request = v.CreateSourceConnectorRequest(file_upload)
source_response = connectors_api.create_source_connector(
organization_id,
request
)
source_connector_id = source_response.connector.id
print(f"✅ Created source connector: {source_connector_id}")

except Exception as e:
print(f"❌ Error creating source connector: {e}")
raise

# Create the pipeline
pipelines_api = v.PipelinesApi(api_client)

pipeline_configuration = v.PipelineConfigurationSchema(
pipeline_name="Deep Research Pipeline",
source_connectors=[
v.PipelineSourceConnectorSchema(
id=source_connector_id,
type="FILE_UPLOAD",
config={}
)
],
ai_platform_connector=v.PipelineAIPlatformConnectorSchema(
id=ai_platform_connector_id,
type="VECTORIZE",
config={}
),
destination_connector=v.PipelineDestinationConnectorSchema(
id=destination_connector_id,
type="VECTORIZE",
config={}
),
schedule=v.ScheduleSchema(type="manual")
)

try:
response = pipelines_api.create_pipeline(
organization_id,
pipeline_configuration
)

pipeline_id = response.data.id
print(f"✅ Created pipeline: {pipeline_id}")
print(f" Name: Deep Research Pipeline")

# Wait for pipeline to be ready
print("⏳ Waiting for pipeline to be ready...")
max_wait = 60 # 60 seconds
for i in range(max_wait):
pipeline = pipelines_api.get_pipeline(organization_id, pipeline_id)
status = pipeline.data.status

if status in ["LISTENING", "IDLE"]:
print(f"✅ Pipeline is ready! Status: {status}\n")
break
elif status in ["ERROR_DEPLOYING", "SHUTDOWN"]:
print(f"❌ Pipeline failed to deploy: {status}")
sys.exit(1)

if i % 10 == 0:
print(f" Current status: {status}")

time.sleep(1)

return pipeline_id, source_connector_id

except Exception as e:
print(f"❌ Error creating pipeline: {e}")
# Clean up source connector if pipeline creation failed
try:
connectors_api.delete_source_connector(organization_id, source_connector_id)
except:
pass
raise


def start_deep_research(api_client, organization_id, pipeline_id):
"""Start a deep research query."""
# Create API interface
pipelines_api = v.PipelinesApi(api_client)

# Start deep research
response = pipelines_api.start_deep_research(
organization_id,
pipeline_id,
v.StartDeepResearchRequest(
query="What is the meaning of life?",
web_search=True # Enable web search for comprehensive results
)
)

research_id = response.research_id
print(f"Research started with ID: {research_id}")

return research_id


def get_deep_research_result(api_client, organization_id, pipeline_id, research_id):
"""Get deep research results."""
# Create API interface
pipelines_api = v.PipelinesApi(api_client)

# Check research status and get results
max_attempts = 60 # Maximum 5 minutes (60 * 5 seconds)
attempt = 0

while attempt < max_attempts:
try:
response = pipelines_api.get_deep_research_result(
organization_id,
pipeline_id,
research_id
)

if response.ready:
if response.data.success:
print("Research completed successfully!")
print(response.data.markdown)
return response.data.markdown
else:
print("Research failed:", response.data.error)
return None
break

print("Research in progress...")
time.sleep(5) # Wait 5 seconds before checking again
attempt += 1

except Exception as e:
print(f"Error checking research status: {e}")
raise

if attempt >= max_attempts:
print("Research timed out after 5 minutes")
return None


def start_research_without_web_search(api_client, organization_id, pipeline_id):
"""Demonstrate research without web search."""
pipelines_api = v.PipelinesApi(api_client)

try:
response = pipelines_api.start_deep_research(
organization_id,
pipeline_id,
v.StartDeepResearchRequest(
query="Explain quantum computing in simple terms",
web_search=False # Only use your pipeline's data
)
)

print(f"✅ Research started without web search: {response.research_id}")
return response.research_id

except Exception as e:
print(f"❌ Error starting research without web search: {e}")
return None


def main():
"""Main function demonstrating deep research functionality."""
print("=== Deep Research Example ===\n")

try:
# Get configuration
configuration, organization_id = get_api_config()

print(f"⚙️ Configuration:")
print(f" Organization ID: {organization_id}")
print(f" Host: {configuration.host}\n")

# Initialize API client
# Initialize API client with proper headers for local env
with v.ApiClient(configuration) as api_client:
# Create a pipeline for deep research
pipeline_id, source_connector_id = create_pipeline_for_research(api_client, organization_id)

# Example 1: Deep research with web search
print("🧠 Starting Deep Research with Web Search")
print(" Query: 'What is the meaning of life?'")
print(" Web Search: Enabled\n")

research_id = start_deep_research(api_client, organization_id, pipeline_id)

if research_id:
print("\n📊 Getting Research Results")
result = get_deep_research_result(api_client, organization_id, pipeline_id, research_id)

if result:
print(f"\n📄 Research Results Summary:")
print("=" * 60)
# Show first few lines of the result
result_lines = result.split('\n')[:10]
for line in result_lines:
if line.strip():
print(line)
if len(result.split('\n')) > 10:
print("... (truncated for display)")
print("=" * 60)
print(f"✅ Complete research result: {len(result)} characters")
else:
print("❌ Failed to get research results")

# Example 2: Research without web search
print(f"\n🔬 Deep Research without Web Search")
print(" Query: 'Explain quantum computing in simple terms'")
print(" Web Search: Disabled (uses only your pipeline's data)\n")

research_id_no_web = start_research_without_web_search(api_client, organization_id, pipeline_id)

if research_id_no_web:
print("⏳ Getting results (this may take a moment)...")
# You could also get the results for this query, but for brevity we'll just start it
print("✅ Research started successfully")

# Optional: Wait for and display these results too
# result_no_web = get_deep_research_result(api_client, organization_id, pipeline_id, research_id_no_web)

print(f"\n🎉 Deep research examples completed!")
print(" ✅ Demonstrated research with web search enhancement")
print(" ✅ Demonstrated research using only pipeline data")
print(" 💡 Deep research combines your data with AI reasoning for comprehensive insights")

# Clean up resources
print("\n🧹 Cleaning up resources...")
try:
pipelines_api = v.PipelinesApi(api_client)
pipelines_api.delete_pipeline(organization_id, pipeline_id)
print(" ✅ Pipeline deleted")

connectors_api = v.SourceConnectorsApi(api_client)
connectors_api.delete_source_connector(organization_id, source_connector_id)
print(" ✅ Source connector deleted")
except Exception as e:
print(f" ⚠️ Cleanup warning: {e}")

except ValueError as e:
print(f"❌ Configuration Error: {e}")
print("\n💡 Make sure to set the required environment variables:")
print(" export VECTORIZE_ORGANIZATION_ID='your-org-id'")
print(" export VECTORIZE_API_KEY='your-api-key'")

except Exception as e:
print(f"❌ Error: {e}")
sys.exit(1)


if __name__ == "__main__":
main()

Was this page helpful?