Skip to main content
Puppeteer is a Node.js library that provides a high-level API to control Chrome browsers. With Orchestrator, you can run Puppeteer scripts against cloud-hosted browser instances without managing browser infrastructure yourself.

Installation

Install Puppeteer in your project:
npm install puppeteer-core
Use puppeteer-core instead of puppeteer since you’ll be connecting to Orchestrator’s browsers rather than launching local ones.

Basic Setup

Here’s how to connect Puppeteer to an Orchestrator session:
const puppeteer = require('puppeteer-core');

async function connectToOrchestrator() {
  // Create a new browser session
  const sessionResponse = await fetch('https://api.orchestratorhq.com/api/sessions', {
    method: 'POST',
    headers: {
      'Authorization': 'Bearer orch_your_api_key_here',
      'Content-Type': 'application/json'
    },
    body: JSON.stringify({
      session_name: 'Puppeteer Automation',
      duration: '1h'
    })
  });

  const session = await sessionResponse.json();
  console.log('Session created:', session.id);

  // Connect to the browser using browserWSEndpoint (session is active when API responds)
  const browser = await puppeteer.connect({
    browserWSEndpoint: session.cdp_url
  });

  return { browser, sessionId: session.id };
}

// Usage example
async function main() {
  const { browser, sessionId } = await connectToOrchestrator();

  try {
    const pages = await browser.pages();
    const page = pages[0]; // Use the existing page

    // Your automation code here
    await page.goto('https://example.com');
    console.log('Page title:', await page.title());

    // Take a screenshot
    await page.screenshot({ path: 'example.png' });

  } finally {
    // Clean up
    await browser.disconnect();

    // Stop the session
    await fetch(`https://api.orchestratorhq.com/api/sessions/${sessionId}`, {
      method: 'DELETE',
      headers: {
        'Authorization': 'Bearer orch_your_api_key_here'
      }
    });
  }
}

main().catch(console.error);

Web Scraping Example

Here’s a practical web scraping example using Puppeteer with Orchestrator:
const puppeteer = require('puppeteer-core');

async function scrapeNews() {
  // Create and connect to session
  const sessionResponse = await fetch('https://api.orchestratorhq.com/api/sessions', {
    method: 'POST',
    headers: {
      'Authorization': 'Bearer orch_your_api_key_here',
      'Content-Type': 'application/json'
    },
    body: JSON.stringify({
      session_name: 'News Scraper',
      duration: '30m'
    })
  });

  const session = await sessionResponse.json();

  // Connect to browser (session is active when API responds)
  const browser = await puppeteer.connect({
    browserWSEndpoint: session.cdp_url
  });

  try {
    const page = await browser.newPage();

    // Set user agent to avoid bot detection
    await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36');

    // Navigate to news website
    await page.goto('https://news.ycombinator.com/', { 
      waitUntil: 'networkidle2' 
    });

    // Extract news articles
    const articles = await page.evaluate(() => {
      const rows = document.querySelectorAll('.athing');
      return Array.from(rows).slice(0, 10).map(row => {
        const titleElement = row.querySelector('.titleline > a');
        const scoreElement = row.nextElementSibling?.querySelector('.score');
        const commentsElement = row.nextElementSibling?.querySelector('a[href*="item?id="]');

        return {
          title: titleElement?.textContent || '',
          url: titleElement?.href || '',
          score: scoreElement?.textContent || '0 points',
          comments: commentsElement?.textContent || '0 comments',
          id: row.id
        };
      });
    });

    console.log('Scraped articles:', articles);

    // Take a screenshot of the page
    await page.screenshot({ 
      path: 'hackernews.png',
      fullPage: true 
    });

    return articles;

  } finally {
    await browser.disconnect();
    
    // Clean up session
    await fetch(`https://api.orchestratorhq.com/api/sessions/${session.id}`, {
      method: 'DELETE',
      headers: { 'Authorization': 'Bearer orch_your_api_key_here' }
    });
  }
}

scrapeNews().then(articles => {
  console.log(`Scraped ${articles.length} articles`);
}).catch(console.error);

Form Automation Example

Automate form filling and submission:
const puppeteer = require('puppeteer-core');

async function automateForm() {
  const { browser, sessionId } = await connectToOrchestrator();

  try {
    const page = await browser.newPage();

    // Navigate to a form page
    await page.goto('https://httpbin.org/forms/post');

    // Fill out the form
    await page.type('input[name="custname"]', 'John Doe');
    await page.type('input[name="custtel"]', '+1-555-123-4567');
    await page.type('input[name="custemail"]', 'john.doe@example.com');
    await page.select('select[name="size"]', 'medium');

    // Select radio button
    await page.click('input[name="topping"][value="bacon"]');

    // Fill textarea
    await page.type('textarea[name="comments"]', 'This is an automated test using Puppeteer and Orchestrator.');

    // Take screenshot before submission
    await page.screenshot({ path: 'form-filled.png' });

    // Submit the form
    await page.click('input[type="submit"]');

    // Wait for response page
    await page.waitForNavigation({ waitUntil: 'networkidle2' });

    // Extract the response
    const response = await page.evaluate(() => {
      const preElement = document.querySelector('pre');
      return preElement ? preElement.textContent : 'No response found';
    });

    console.log('Form submission response:', response);

    // Take screenshot of result
    await page.screenshot({ path: 'form-result.png' });

  } finally {
    await browser.disconnect();
    
    // Stop session
    await fetch(`https://api.orchestratorhq.com/api/sessions/${sessionId}`, {
      method: 'DELETE',
      headers: { 'Authorization': 'Bearer orch_your_api_key_here' }
    });
  }
}

automateForm().catch(console.error);

PDF Generation

Generate PDFs from web pages:
const puppeteer = require('puppeteer-core');

async function generatePDF() {
  const { browser, sessionId } = await connectToOrchestrator();

  try {
    const page = await browser.newPage();

    // Navigate to the page you want to convert to PDF
    await page.goto('https://example.com', { 
      waitUntil: 'networkidle2' 
    });

    // Generate PDF
    const pdf = await page.pdf({
      path: 'example.pdf',
      format: 'A4',
      printBackground: true,
      margin: {
        top: '20px',
        right: '20px',
        bottom: '20px',
        left: '20px'
      }
    });

    console.log('PDF generated successfully');

    // You can also generate PDF from HTML content
    await page.setContent(`
      <html>
        <head>
          <style>
            body { font-family: Arial, sans-serif; }
            .header { color: #333; text-align: center; }
            .content { margin: 20px; line-height: 1.6; }
          </style>
        </head>
        <body>
          <div class="header">
            <h1>Generated with Orchestrator + Puppeteer</h1>
          </div>
          <div class="content">
            <p>This PDF was generated using Puppeteer connected to an Orchestrator browser session.</p>
            <p>Date: ${new Date().toLocaleDateString()}</p>
          </div>
        </body>
      </html>
    `);

    await page.pdf({
      path: 'custom-content.pdf',
      format: 'A4',
      printBackground: true
    });

    console.log('Custom PDF generated successfully');

  } finally {
    await browser.disconnect();
    
    // Stop session
    await fetch(`https://api.orchestratorhq.com/api/sessions/${sessionId}`, {
      method: 'DELETE',
      headers: { 'Authorization': 'Bearer orch_your_api_key_here' }
    });
  }
}

generatePDF().catch(console.error);

Advanced Features

Request Interception

Intercept and modify network requests:
async function interceptRequests() {
  const { browser, sessionId } = await connectToOrchestrator();

  try {
    const page = await browser.newPage();

    // Enable request interception
    await page.setRequestInterception(true);

    page.on('request', (request) => {
      // Block images to speed up loading
      if (request.resourceType() === 'image') {
        request.abort();
        return;
      }

      // Modify headers
      const headers = Object.assign({}, request.headers(), {
        'X-Custom-Header': 'Orchestrator-Puppeteer'
      });

      request.continue({ headers });
    });

    // Navigate to a page that shows request headers
    await page.goto('https://httpbin.org/headers');

    const content = await page.content();
    console.log('Page content with custom headers:', content);

  } finally {
    await browser.disconnect();
    // Stop session...
  }
}

Performance Monitoring

Monitor page performance metrics:
async function monitorPerformance() {
  const { browser, sessionId } = await connectToOrchestrator();

  try {
    const page = await browser.newPage();

    // Enable performance monitoring
    await page.coverage.startJSCoverage();
    await page.coverage.startCSSCoverage();

    const startTime = Date.now();

    await page.goto('https://example.com', { 
      waitUntil: 'networkidle2' 
    });

    const loadTime = Date.now() - startTime;

    // Get performance metrics
    const metrics = await page.metrics();
    console.log('Performance metrics:', metrics);

    // Get coverage data
    const jsCoverage = await page.coverage.stopJSCoverage();
    const cssCoverage = await page.coverage.stopCSSCoverage();

    console.log(`Page loaded in ${loadTime}ms`);
    console.log(`JS files loaded: ${jsCoverage.length}`);
    console.log(`CSS files loaded: ${cssCoverage.length}`);

    // Calculate total bytes
    const totalJSBytes = jsCoverage.reduce((total, entry) => total + entry.text.length, 0);
    const totalCSSBytes = cssCoverage.reduce((total, entry) => total + entry.text.length, 0);

    console.log(`Total JS: ${totalJSBytes} bytes`);
    console.log(`Total CSS: ${totalCSSBytes} bytes`);

  } finally {
    await browser.disconnect();
    // Stop session...
  }
}

Mobile Emulation

Emulate mobile devices:
const puppeteer = require('puppeteer-core');

async function emulateMobile() {
  const { browser, sessionId } = await connectToOrchestrator();

  try {
    const page = await browser.newPage();

    // Emulate iPhone 12
    await page.emulate(puppeteer.devices['iPhone 12']);

    await page.goto('https://example.com');

    // Take screenshot in mobile view
    await page.screenshot({ 
      path: 'mobile-view.png',
      fullPage: true 
    });

    // Check if mobile-specific elements are present
    const isMobile = await page.evaluate(() => {
      return window.innerWidth <= 768;
    });

    console.log('Mobile emulation active:', isMobile);

    // Test touch events
    await page.tap('body'); // Simulate touch

  } finally {
    await browser.disconnect();
    // Stop session...
  }
}

Error Handling and Best Practices

Always properly disconnect from browsers and clean up sessions:
class OrchestratorPuppeteer {
  constructor(apiKey) {
    this.apiKey = apiKey;
    this.browser = null;
    this.sessionId = null;
  }

  async connect(sessionName = 'Puppeteer Session', duration = '1h') {
    try {
      // Create session
      const sessionResponse = await fetch('https://api.orchestratorhq.com/api/sessions', {
        method: 'POST',
        headers: {
          'Authorization': `Bearer ${this.apiKey}`,
          'Content-Type': 'application/json'
        },
        body: JSON.stringify({ session_name: sessionName, duration })
      });

      const session = await sessionResponse.json();
      this.sessionId = session.id;

      // Connect browser (session is active when API responds)
      this.browser = await puppeteer.connect({
        browserWSEndpoint: session.cdp_url
      });

      return this.browser;

    } catch (error) {
      await this.cleanup();
      throw error;
    }
  }

  async cleanup() {
    if (this.browser) {
      try {
        await this.browser.disconnect();
      } catch (error) {
        console.error('Error disconnecting browser:', error);
      }
      this.browser = null;
    }

    if (this.sessionId) {
      try {
        await fetch(`https://api.orchestratorhq.com/api/sessions/${this.sessionId}`, {
          method: 'DELETE',
          headers: { 'Authorization': `Bearer ${this.apiKey}` }
        });
      } catch (error) {
        console.error('Error stopping session:', error);
      }
      this.sessionId = null;
    }
  }
}

// Usage
const orchestrator = new OrchestratorPuppeteer(process.env.ORCHESTRATOR_API_KEY);

try {
  const browser = await orchestrator.connect();
  const page = await browser.newPage();
  
  // Your automation code
  await page.goto('https://example.com');
  
} finally {
  await orchestrator.cleanup();
}
Configure appropriate timeouts for different operations:
const page = await browser.newPage();

// Set default timeout
page.setDefaultTimeout(30000); // 30 seconds

// Set navigation timeout
page.setDefaultNavigationTimeout(60000); // 60 seconds

// Use specific timeouts
await page.goto('https://example.com', { timeout: 45000 });
await page.waitForSelector('.dynamic-content', { timeout: 10000 });
await page.click('button', { timeout: 5000 });
Implement retry logic for unreliable operations:
async function retryOperation(operation, maxRetries = 3, delay = 1000) {
  for (let i = 0; i < maxRetries; i++) {
    try {
      return await operation();
    } catch (error) {
      if (i === maxRetries - 1) throw error;
      
      console.log(`Attempt ${i + 1} failed: ${error.message}`);
      await new Promise(resolve => setTimeout(resolve, delay * (i + 1)));
    }
  }
}

// Usage
await retryOperation(async () => {
  await page.click('.sometimes-missing-button');
});

Debugging Tips

Access the live browser through the Orchestrator dashboard to see what’s happening visually.
Add delays between actions: await page.waitForTimeout(1000) to slow down execution for debugging.
Capture screenshots at key points: await page.screenshot({ path: 'debug.png' }).
Monitor network requests: page.on('response', response => console.log(response.url())).

Next Steps