Installation
Install Puppeteer in your project:Copy
npm install puppeteer-core
Use
puppeteer-core instead of puppeteer since you’ll be connecting to Orchestrator’s browsers rather than launching local ones.Basic Setup
Here’s how to connect Puppeteer to an Orchestrator session:Copy
const puppeteer = require('puppeteer-core');
async function connectToOrchestrator() {
// Create a new browser session
const sessionResponse = await fetch('https://api.orchestratorhq.com/api/sessions', {
method: 'POST',
headers: {
'Authorization': 'Bearer orch_your_api_key_here',
'Content-Type': 'application/json'
},
body: JSON.stringify({
session_name: 'Puppeteer Automation',
duration: '1h'
})
});
const session = await sessionResponse.json();
console.log('Session created:', session.id);
// Connect to the browser using browserWSEndpoint (session is active when API responds)
const browser = await puppeteer.connect({
browserWSEndpoint: session.cdp_url
});
return { browser, sessionId: session.id };
}
// Usage example
async function main() {
const { browser, sessionId } = await connectToOrchestrator();
try {
const pages = await browser.pages();
const page = pages[0]; // Use the existing page
// Your automation code here
await page.goto('https://example.com');
console.log('Page title:', await page.title());
// Take a screenshot
await page.screenshot({ path: 'example.png' });
} finally {
// Clean up
await browser.disconnect();
// Stop the session
await fetch(`https://api.orchestratorhq.com/api/sessions/${sessionId}`, {
method: 'DELETE',
headers: {
'Authorization': 'Bearer orch_your_api_key_here'
}
});
}
}
main().catch(console.error);
Web Scraping Example
Here’s a practical web scraping example using Puppeteer with Orchestrator:Copy
const puppeteer = require('puppeteer-core');
async function scrapeNews() {
// Create and connect to session
const sessionResponse = await fetch('https://api.orchestratorhq.com/api/sessions', {
method: 'POST',
headers: {
'Authorization': 'Bearer orch_your_api_key_here',
'Content-Type': 'application/json'
},
body: JSON.stringify({
session_name: 'News Scraper',
duration: '30m'
})
});
const session = await sessionResponse.json();
// Connect to browser (session is active when API responds)
const browser = await puppeteer.connect({
browserWSEndpoint: session.cdp_url
});
try {
const page = await browser.newPage();
// Set user agent to avoid bot detection
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36');
// Navigate to news website
await page.goto('https://news.ycombinator.com/', {
waitUntil: 'networkidle2'
});
// Extract news articles
const articles = await page.evaluate(() => {
const rows = document.querySelectorAll('.athing');
return Array.from(rows).slice(0, 10).map(row => {
const titleElement = row.querySelector('.titleline > a');
const scoreElement = row.nextElementSibling?.querySelector('.score');
const commentsElement = row.nextElementSibling?.querySelector('a[href*="item?id="]');
return {
title: titleElement?.textContent || '',
url: titleElement?.href || '',
score: scoreElement?.textContent || '0 points',
comments: commentsElement?.textContent || '0 comments',
id: row.id
};
});
});
console.log('Scraped articles:', articles);
// Take a screenshot of the page
await page.screenshot({
path: 'hackernews.png',
fullPage: true
});
return articles;
} finally {
await browser.disconnect();
// Clean up session
await fetch(`https://api.orchestratorhq.com/api/sessions/${session.id}`, {
method: 'DELETE',
headers: { 'Authorization': 'Bearer orch_your_api_key_here' }
});
}
}
scrapeNews().then(articles => {
console.log(`Scraped ${articles.length} articles`);
}).catch(console.error);
Form Automation Example
Automate form filling and submission:Copy
const puppeteer = require('puppeteer-core');
async function automateForm() {
const { browser, sessionId } = await connectToOrchestrator();
try {
const page = await browser.newPage();
// Navigate to a form page
await page.goto('https://httpbin.org/forms/post');
// Fill out the form
await page.type('input[name="custname"]', 'John Doe');
await page.type('input[name="custtel"]', '+1-555-123-4567');
await page.type('input[name="custemail"]', 'john.doe@example.com');
await page.select('select[name="size"]', 'medium');
// Select radio button
await page.click('input[name="topping"][value="bacon"]');
// Fill textarea
await page.type('textarea[name="comments"]', 'This is an automated test using Puppeteer and Orchestrator.');
// Take screenshot before submission
await page.screenshot({ path: 'form-filled.png' });
// Submit the form
await page.click('input[type="submit"]');
// Wait for response page
await page.waitForNavigation({ waitUntil: 'networkidle2' });
// Extract the response
const response = await page.evaluate(() => {
const preElement = document.querySelector('pre');
return preElement ? preElement.textContent : 'No response found';
});
console.log('Form submission response:', response);
// Take screenshot of result
await page.screenshot({ path: 'form-result.png' });
} finally {
await browser.disconnect();
// Stop session
await fetch(`https://api.orchestratorhq.com/api/sessions/${sessionId}`, {
method: 'DELETE',
headers: { 'Authorization': 'Bearer orch_your_api_key_here' }
});
}
}
automateForm().catch(console.error);
PDF Generation
Generate PDFs from web pages:Copy
const puppeteer = require('puppeteer-core');
async function generatePDF() {
const { browser, sessionId } = await connectToOrchestrator();
try {
const page = await browser.newPage();
// Navigate to the page you want to convert to PDF
await page.goto('https://example.com', {
waitUntil: 'networkidle2'
});
// Generate PDF
const pdf = await page.pdf({
path: 'example.pdf',
format: 'A4',
printBackground: true,
margin: {
top: '20px',
right: '20px',
bottom: '20px',
left: '20px'
}
});
console.log('PDF generated successfully');
// You can also generate PDF from HTML content
await page.setContent(`
<html>
<head>
<style>
body { font-family: Arial, sans-serif; }
.header { color: #333; text-align: center; }
.content { margin: 20px; line-height: 1.6; }
</style>
</head>
<body>
<div class="header">
<h1>Generated with Orchestrator + Puppeteer</h1>
</div>
<div class="content">
<p>This PDF was generated using Puppeteer connected to an Orchestrator browser session.</p>
<p>Date: ${new Date().toLocaleDateString()}</p>
</div>
</body>
</html>
`);
await page.pdf({
path: 'custom-content.pdf',
format: 'A4',
printBackground: true
});
console.log('Custom PDF generated successfully');
} finally {
await browser.disconnect();
// Stop session
await fetch(`https://api.orchestratorhq.com/api/sessions/${sessionId}`, {
method: 'DELETE',
headers: { 'Authorization': 'Bearer orch_your_api_key_here' }
});
}
}
generatePDF().catch(console.error);
Advanced Features
Request Interception
Intercept and modify network requests:Copy
async function interceptRequests() {
const { browser, sessionId } = await connectToOrchestrator();
try {
const page = await browser.newPage();
// Enable request interception
await page.setRequestInterception(true);
page.on('request', (request) => {
// Block images to speed up loading
if (request.resourceType() === 'image') {
request.abort();
return;
}
// Modify headers
const headers = Object.assign({}, request.headers(), {
'X-Custom-Header': 'Orchestrator-Puppeteer'
});
request.continue({ headers });
});
// Navigate to a page that shows request headers
await page.goto('https://httpbin.org/headers');
const content = await page.content();
console.log('Page content with custom headers:', content);
} finally {
await browser.disconnect();
// Stop session...
}
}
Performance Monitoring
Monitor page performance metrics:Copy
async function monitorPerformance() {
const { browser, sessionId } = await connectToOrchestrator();
try {
const page = await browser.newPage();
// Enable performance monitoring
await page.coverage.startJSCoverage();
await page.coverage.startCSSCoverage();
const startTime = Date.now();
await page.goto('https://example.com', {
waitUntil: 'networkidle2'
});
const loadTime = Date.now() - startTime;
// Get performance metrics
const metrics = await page.metrics();
console.log('Performance metrics:', metrics);
// Get coverage data
const jsCoverage = await page.coverage.stopJSCoverage();
const cssCoverage = await page.coverage.stopCSSCoverage();
console.log(`Page loaded in ${loadTime}ms`);
console.log(`JS files loaded: ${jsCoverage.length}`);
console.log(`CSS files loaded: ${cssCoverage.length}`);
// Calculate total bytes
const totalJSBytes = jsCoverage.reduce((total, entry) => total + entry.text.length, 0);
const totalCSSBytes = cssCoverage.reduce((total, entry) => total + entry.text.length, 0);
console.log(`Total JS: ${totalJSBytes} bytes`);
console.log(`Total CSS: ${totalCSSBytes} bytes`);
} finally {
await browser.disconnect();
// Stop session...
}
}
Mobile Emulation
Emulate mobile devices:Copy
const puppeteer = require('puppeteer-core');
async function emulateMobile() {
const { browser, sessionId } = await connectToOrchestrator();
try {
const page = await browser.newPage();
// Emulate iPhone 12
await page.emulate(puppeteer.devices['iPhone 12']);
await page.goto('https://example.com');
// Take screenshot in mobile view
await page.screenshot({
path: 'mobile-view.png',
fullPage: true
});
// Check if mobile-specific elements are present
const isMobile = await page.evaluate(() => {
return window.innerWidth <= 768;
});
console.log('Mobile emulation active:', isMobile);
// Test touch events
await page.tap('body'); // Simulate touch
} finally {
await browser.disconnect();
// Stop session...
}
}
Error Handling and Best Practices
Connection Management
Connection Management
Always properly disconnect from browsers and clean up sessions:
Copy
class OrchestratorPuppeteer {
constructor(apiKey) {
this.apiKey = apiKey;
this.browser = null;
this.sessionId = null;
}
async connect(sessionName = 'Puppeteer Session', duration = '1h') {
try {
// Create session
const sessionResponse = await fetch('https://api.orchestratorhq.com/api/sessions', {
method: 'POST',
headers: {
'Authorization': `Bearer ${this.apiKey}`,
'Content-Type': 'application/json'
},
body: JSON.stringify({ session_name: sessionName, duration })
});
const session = await sessionResponse.json();
this.sessionId = session.id;
// Connect browser (session is active when API responds)
this.browser = await puppeteer.connect({
browserWSEndpoint: session.cdp_url
});
return this.browser;
} catch (error) {
await this.cleanup();
throw error;
}
}
async cleanup() {
if (this.browser) {
try {
await this.browser.disconnect();
} catch (error) {
console.error('Error disconnecting browser:', error);
}
this.browser = null;
}
if (this.sessionId) {
try {
await fetch(`https://api.orchestratorhq.com/api/sessions/${this.sessionId}`, {
method: 'DELETE',
headers: { 'Authorization': `Bearer ${this.apiKey}` }
});
} catch (error) {
console.error('Error stopping session:', error);
}
this.sessionId = null;
}
}
}
// Usage
const orchestrator = new OrchestratorPuppeteer(process.env.ORCHESTRATOR_API_KEY);
try {
const browser = await orchestrator.connect();
const page = await browser.newPage();
// Your automation code
await page.goto('https://example.com');
} finally {
await orchestrator.cleanup();
}
Timeout Configuration
Timeout Configuration
Configure appropriate timeouts for different operations:
Copy
const page = await browser.newPage();
// Set default timeout
page.setDefaultTimeout(30000); // 30 seconds
// Set navigation timeout
page.setDefaultNavigationTimeout(60000); // 60 seconds
// Use specific timeouts
await page.goto('https://example.com', { timeout: 45000 });
await page.waitForSelector('.dynamic-content', { timeout: 10000 });
await page.click('button', { timeout: 5000 });
Error Recovery
Error Recovery
Implement retry logic for unreliable operations:
Copy
async function retryOperation(operation, maxRetries = 3, delay = 1000) {
for (let i = 0; i < maxRetries; i++) {
try {
return await operation();
} catch (error) {
if (i === maxRetries - 1) throw error;
console.log(`Attempt ${i + 1} failed: ${error.message}`);
await new Promise(resolve => setTimeout(resolve, delay * (i + 1)));
}
}
}
// Usage
await retryOperation(async () => {
await page.click('.sometimes-missing-button');
});
Debugging Tips
Use VNC for Visual Debugging
Use VNC for Visual Debugging
Access the live browser through the Orchestrator dashboard to see what’s happening visually.
Enable Slow Motion
Enable Slow Motion
Add delays between actions:
await page.waitForTimeout(1000) to slow down execution for debugging.Take Screenshots
Take Screenshots
Capture screenshots at key points:
await page.screenshot({ path: 'debug.png' }).Log Network Activity
Log Network Activity
Monitor network requests:
page.on('response', response => console.log(response.url())).