Skip to main content
Use the requests library for making HTTP requests.
pip install requests

Setup

import requests

API_KEY = 'ck_your_api_key'
BASE_URL = 'https://api.crawlkit.com/api/v1'

def crawlkit(endpoint, body):
    """Helper function for all CrawlKit requests."""
    response = requests.post(
        f'{BASE_URL}{endpoint}',
        headers={
            'Authorization': f'ApiKey {API_KEY}',
            'Content-Type': 'application/json',
        },
        json=body
    )
    return response.json()

Raw Crawl

Fetch a webpage and get its HTML content.

Basic Request

result = crawlkit('/crawl/raw', {
    'url': 'https://example.com'
})

print(result['data']['body'])        # HTML content
print(result['data']['statusCode'])  # 200

With Options

result = crawlkit('/crawl/raw', {
    'url': 'https://example.com',
    'options': {
        'timeout': 30000,
        'followRedirects': True,
        'maxRedirects': 5,
        'headers': {
            'User-Agent': 'MyApp/1.0'
        }
    }
})

Complete Example

def fetch_page(url, **options):
    """Fetch a webpage and return its HTML content."""
    result = crawlkit('/crawl/raw', {
        'url': url,
        'options': options
    })

    if not result['success']:
        raise Exception(result['error']['message'])

    data = result['data']
    print(f"Fetched {data['finalUrl']}")
    print(f"Status: {data['statusCode']}")
    print(f"Size: {data['contentLength']} bytes")
    print(f"Time: {data['timing']['total']}ms")
    print(f"Credits remaining: {data['creditsRemaining']}")

    return data['body']

# Usage
html = fetch_page('https://example.com')

Search the web using DuckDuckGo.
result = crawlkit('/crawl/search', {
    'query': 'web scraping python'
})

for item in result['data']['results']:
    print(f"{item['position']}. {item['title']}")
    print(f"   {item['url']}")
    print(f"   {item['snippet']}\n")

With Filters

result = crawlkit('/crawl/search', {
    'query': 'web scraping python',
    'options': {
        'language': 'en-US',
        'region': 'us-en',
        'timeRange': 'm',    # Last month
        'maxResults': 20
    }
})

Complete Example

def search(query, **options):
    """Search the web and return results."""
    result = crawlkit('/crawl/search', {
        'query': query,
        'options': options
    })

    if not result['success']:
        raise Exception(result['error']['message'])

    data = result['data']
    print(f"Found {data['totalResults']} results for '{query}'")

    return [
        {
            'title': r['title'],
            'url': r['url'],
            'snippet': r['snippet']
        }
        for r in data['results']
    ]

# Usage
results = search('python tutorial', maxResults=10)
for r in results:
    print(r['title'])

Screenshot

Take a full-page screenshot of any website.

Basic Screenshot

result = crawlkit('/crawl/screenshot', {
    'url': 'https://example.com'
})

print('Screenshot URL:', result['data']['url'])

With Options

result = crawlkit('/crawl/screenshot', {
    'url': 'https://example.com',
    'options': {
        'width': 1920,
        'height': 1080,
        'timeout': 30000,
        'waitForSelector': '.content-loaded'
    }
})

Download Screenshot

def take_screenshot(url, filename, **options):
    """Take a screenshot and save it to a file."""
    result = crawlkit('/crawl/screenshot', {
        'url': url,
        'options': options
    })

    if not result['success']:
        raise Exception(result['error']['message'])

    # Download the image
    image_url = result['data']['url']
    response = requests.get(image_url)

    with open(filename, 'wb') as f:
        f.write(response.content)

    print(f'Saved to {filename}')
    return filename

# Usage
take_screenshot('https://example.com', 'screenshot.png')

Error Handling

def safe_crawl(url):
    """Crawl a URL with proper error handling."""
    result = crawlkit('/crawl/raw', {'url': url})

    if not result['success']:
        error = result['error']
        code = error['code']
        message = error['message']

        if code == 'INSUFFICIENT_CREDITS':
            print('Out of credits! Please purchase more.')
        elif code == 'INVALID_URL':
            print('Invalid URL provided.')
        elif code == 'TIMEOUT':
            print('Request timed out. Try increasing timeout.')
        else:
            print(f'Error: {message}')

        return None

    return result['data']

Using Environment Variables

import os

API_KEY = os.environ.get('CRAWLKIT_API_KEY')

if not API_KEY:
    raise Exception('CRAWLKIT_API_KEY environment variable not set')
Set the environment variable:
export CRAWLKIT_API_KEY="ck_your_api_key"
python your_script.py

Batch Processing

from concurrent.futures import ThreadPoolExecutor, as_completed

def crawl_multiple(urls, max_workers=5):
    """Crawl multiple URLs in parallel."""
    results = {}

    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        future_to_url = {
            executor.submit(fetch_page, url): url
            for url in urls
        }

        for future in as_completed(future_to_url):
            url = future_to_url[future]
            try:
                results[url] = future.result()
            except Exception as e:
                results[url] = f'Error: {e}'

    return results

# Usage
urls = [
    'https://example.com',
    'https://httpbin.org/html',
    'https://jsonplaceholder.typicode.com'
]

results = crawl_multiple(urls)
for url, html in results.items():
    print(f'{url}: {len(html)} bytes')