Node.js & Python SDKs
Official client libraries for WebPeel. Zero dependencies (Python), minimal footprint (Node.js).
Node.js SDK
Installation
npm install webpeel
Quick Start
import { peel } from 'webpeel';
// Simple fetch
const result = await peel('https://example.com');
console.log(result.title);
console.log(result.content); // Markdown
// With options
const result2 = await peel('https://example.com', {
format: 'text',
render: true,
selector: 'article'
});
Core Functions
peel(url, options)
Fetch and extract content from a URL.
import { peel } from 'webpeel';
const result = await peel('https://example.com', {
// Output format
format: 'markdown', // 'markdown' | 'text' | 'html'
// Rendering options
render: false, // Force browser mode
stealth: false, // Use stealth mode
wait: 0, // Wait time in ms
// Content filtering
selector: 'article',
exclude: ['.sidebar', '.ads'],
includeTags: ['article', 'main'],
excludeTags: ['nav', 'footer'],
// Features
screenshot: true,
images: true,
maxTokens: 5000,
// Page actions
actions: [
{ type: 'click', selector: '.load-more' },
{ type: 'wait', ms: 2000 }
],
// Structured extraction
extract: {
selectors: {
title: 'h1',
price: '.price'
}
}
});
console.log(result.url);
console.log(result.title);
console.log(result.content);
console.log(result.metadata);
console.log(result.links);
console.log(result.method); // 'simple' | 'browser' | 'stealth'
crawl(url, options)
Crawl a website recursively.
import { crawl } from 'webpeel';
const results = await crawl('https://example.com', {
maxPages: 50,
maxDepth: 2,
excludePatterns: ['/admin/', '/login'],
respectRobotsTxt: true,
rateLimitMs: 1000
});
results.forEach(page => {
console.log(page.url);
console.log(page.title);
console.log(page.markdown);
});
mapDomain(url, options)
Discover all URLs on a domain.
import { mapDomain } from 'webpeel';
const result = await mapDomain('https://example.com', {
maxUrls: 5000,
includePatterns: ['/docs/', '/blog/'],
excludePatterns: ['/admin/']
});
console.log(`Found ${result.total} URLs`);
result.urls.forEach(url => console.log(url));
extractBranding(page)
Extract design system and branding.
import { peel } from 'webpeel';
const result = await peel('https://example.com', {
render: true,
branding: true
});
console.log(result.branding);
// {
// colors: ['#8B5CF6', '#FAFAF8', ...],
// fonts: ['Inter', 'Instrument Serif'],
// ...
// }
trackChange(url, content, fingerprint)
Track content changes over time.
import { trackChange } from 'webpeel';
const change = await trackChange('https://example.com', content, fingerprint);
if (change.changed) {
console.log('Content changed!');
console.log('Added:', change.added);
console.log('Removed:', change.removed);
}
runAgent(options)
Run an autonomous research agent.
import { runAgent } from 'webpeel';
const result = await runAgent({
prompt: 'Find the top 5 AI coding tools and compare them',
llmApiKey: process.env.OPENAI_API_KEY,
maxPages: 20,
schema: {
type: 'array',
items: {
type: 'object',
properties: {
name: { type: 'string' },
features: { type: 'array' }
}
}
},
onProgress: (progress) => {
console.log(progress.message);
}
});
console.log(result.data);
console.log(result.sources);
summarizeContent(content, options)
Generate AI summary of content.
import { summarizeContent } from 'webpeel';
const summary = await summarizeContent(content, {
apiKey: process.env.OPENAI_API_KEY,
model: 'gpt-4o-mini',
maxWords: 150
});
console.log(summary);
extractWithLLM(content, options)
AI-powered structured extraction.
import { extractWithLLM } from 'webpeel';
const data = await extractWithLLM(content, {
prompt: 'Extract product name, price, and rating',
llmApiKey: process.env.OPENAI_API_KEY,
schema: {
type: 'object',
properties: {
name: { type: 'string' },
price: { type: 'number' },
rating: { type: 'number' }
}
}
});
console.log(data);
Python SDK
Installation
pip install webpeel
Zero Dependencies
The Python SDK uses only the standard library — no external dependencies!
Quick Start
from webpeel import WebPeel
# Initialize client
client = WebPeel(api_key="your-api-key") # Free tier: no API key needed
# Simple scrape
result = client.scrape("https://example.com")
print(result.title)
print(result.content)
# With options
result = client.scrape(
"https://example.com",
formats=["markdown"],
render=True,
stealth=True
)
Core Methods
scrape(url, **kwargs)
Scrape a single URL.
from webpeel import WebPeel
client = WebPeel(api_key="your-api-key")
result = client.scrape(
"https://example.com",
formats=["markdown"],
max_tokens=5000,
render=False,
stealth=False,
actions=[
{"type": "click", "selector": ".load-more"},
{"type": "wait", "ms": 2000}
],
extract={
"selectors": {
"title": "h1",
"price": ".price"
}
},
raw=False,
wait=0,
timeout=30
)
print(result.url)
print(result.title)
print(result.content)
print(result.markdown) # Alias for content
print(result.metadata)
print(result.links)
print(result.method) # 'simple' | 'browser' | 'stealth'
print(result.extracted) # Structured data
search(query, limit=5, scrape_results=False)
Search the web.
from webpeel import WebPeel
client = WebPeel()
results = client.search("python web scraping", limit=10)
for item in results.data.get("web", []):
print(item["title"])
print(item["url"])
print(item["snippet"])
print("---")
crawl(url, limit=50, max_depth=3, webhook=None)
Start an async crawl job.
from webpeel import WebPeel
client = WebPeel(api_key="your-api-key")
# Start crawl
job = client.crawl("https://example.com", limit=100, max_depth=2)
print(f"Job ID: {job.id}")
# Check status later
status = client.get_job(job.id)
print(status["status"]) # 'pending' | 'running' | 'completed' | 'failed'
if status["status"] == "completed":
for page in status["data"]:
print(page["title"])
print(page["url"])
map(url, search=None)
Discover all URLs on a domain.
from webpeel import WebPeel
client = WebPeel()
result = client.map("https://example.com")
print(f"Found {result.total} URLs")
for url in result.urls[:20]:
print(url)
batch_scrape(urls, **kwargs)
Batch scrape multiple URLs.
from webpeel import WebPeel
client = WebPeel(api_key="your-api-key")
urls = [
"https://example.com/1",
"https://example.com/2",
"https://example.com/3"
]
job = client.batch_scrape(urls, formats=["markdown"])
print(f"Job ID: {job.id}")
# Poll for results
status = client.get_job(job.id)
if status["status"] == "completed":
for result in status["data"]:
print(result["title"])
Exception Handling
from webpeel import WebPeel, WebPeelError, AuthError, RateLimitError, TimeoutError
client = WebPeel(api_key="your-api-key")
try:
result = client.scrape("https://example.com")
except AuthError as e:
print(f"Authentication failed: {e}")
except RateLimitError as e:
print(f"Rate limit exceeded: {e}")
except TimeoutError as e:
print(f"Request timeout: {e}")
except WebPeelError as e:
print(f"API error: {e}")
Framework Integrations
LangChain
from langchain_community.document_loaders import WebPeelLoader
loader = WebPeelLoader(
url="https://example.com",
api_key="your-api-key",
render=True
)
documents = loader.load()
for doc in documents:
print(doc.page_content)
print(doc.metadata)
LlamaIndex
from llama_index.readers.webpeel import WebPeelReader
reader = WebPeelReader(api_key="your-api-key")
documents = reader.load_data(url="https://example.com")
for doc in documents:
print(doc.text)
print(doc.metadata)
Advanced Examples
Monitoring Price Changes
import { peel, trackChange } from 'webpeel';
const url = 'https://example.com/product';
// First fetch
const result = await peel(url, {
extract: {
selectors: {
price: '.price',
stock: '.stock-status'
}
}
});
const fingerprint = result.fingerprint;
// Later...
const change = await trackChange(url, result.content, fingerprint);
if (change.changed && result.extracted.price !== previousPrice) {
console.log('Price changed!');
console.log('Old:', previousPrice);
console.log('New:', result.extracted.price);
}
from webpeel import WebPeel
client = WebPeel()
url = "https://example.com/product"
# First fetch
result = client.scrape(
url,
extract={
"selectors": {
"price": ".price",
"stock": ".stock-status"
}
}
)
fingerprint = result.fingerprint
previous_price = result.extracted["price"]
# Later...
new_result = client.scrape(url, extract={"selectors": {"price": ".price"}})
if new_result.fingerprint != fingerprint:
print(f"Price changed from {previous_price} to {new_result.extracted['price']}")
Batch Documentation Scraping
import { mapDomain, peelBatch } from 'webpeel';
// 1. Discover all docs URLs
const map = await mapDomain('https://docs.example.com', {
includePatterns: ['/docs/'],
excludePatterns: ['/api/']
});
// 2. Batch scrape
const results = await peelBatch(map.urls, {
concurrency: 5,
selector: 'article',
format: 'markdown'
});
// 3. Save to files
import { writeFileSync } from 'fs';
results.forEach(result => {
if ('content' in result) {
const filename = result.url.split('/').pop() + '.md';
writeFileSync(filename, result.content);
}
});
from webpeel import WebPeel
client = WebPeel(api_key="your-api-key")
# 1. Discover docs URLs
map_result = client.map("https://docs.example.com")
doc_urls = [url for url in map_result.urls if '/docs/' in url]
# 2. Batch scrape
job = client.batch_scrape(doc_urls[:50])
# 3. Wait and save
import time
while True:
status = client.get_job(job.id)
if status["status"] == "completed":
break
time.sleep(5)
for i, result in enumerate(status["data"]):
if "content" in result:
with open(f"doc_{i}.md", "w") as f:
f.write(result["content"])