Puppeteer is a Node.js library that provides a high-level API to control Chrome browsers. With Orchestrator, you can run Puppeteer scripts against cloud-hosted browser instances without managing browser infrastructure yourself.

Installation

Install Puppeteer in your project:
npm install puppeteer-core
Use puppeteer-core instead of puppeteer since you’ll be connecting to Orchestrator’s browsers rather than launching local ones.

Basic Setup

Here’s how to connect Puppeteer to an Orchestrator session:
const puppeteer = require('puppeteer-core');

async function connectToOrchestrator() {
  // Create a new browser session
  const sessionResponse = await fetch('https://api.orchestratorhq.com/api/sessions', {
    method: 'POST',
    headers: {
      'Authorization': 'Bearer orch_your_api_key_here',
      'Content-Type': 'application/json'
    },
    body: JSON.stringify({
      session_name: 'Puppeteer Automation',
      duration: '1h'
    })
  });

  const session = await sessionResponse.json();
  console.log('Session created:', session.id);

  // Connect to the browser using browserWSEndpoint (session is active when API responds)
  const browser = await puppeteer.connect({
    browserWSEndpoint: session.cdp_url
  });

  return { browser, sessionId: session.id };
}

// Usage example
async function main() {
  const { browser, sessionId } = await connectToOrchestrator();

  try {
    const pages = await browser.pages();
    const page = pages[0]; // Use the existing page

    // Your automation code here
    await page.goto('https://example.com');
    console.log('Page title:', await page.title());

    // Take a screenshot
    await page.screenshot({ path: 'example.png' });

  } finally {
    // Clean up
    await browser.disconnect();

    // Stop the session
    await fetch(`https://api.orchestratorhq.com/api/sessions/${sessionId}`, {
      method: 'DELETE',
      headers: {
        'Authorization': 'Bearer orch_your_api_key_here'
      }
    });
  }
}

main().catch(console.error);

Web Scraping Example

Here’s a practical web scraping example using Puppeteer with Orchestrator:
const puppeteer = require('puppeteer-core');

async function scrapeNews() {
  // Create and connect to session
  const sessionResponse = await fetch('https://api.orchestratorhq.com/api/sessions', {
    method: 'POST',
    headers: {
      'Authorization': 'Bearer orch_your_api_key_here',
      'Content-Type': 'application/json'
    },
    body: JSON.stringify({
      session_name: 'News Scraper',
      duration: '30m'
    })
  });

  const session = await sessionResponse.json();

  // Connect to browser (session is active when API responds)
  const browser = await puppeteer.connect({
    browserWSEndpoint: session.cdp_url
  });

  try {
    const page = await browser.newPage();

    // Set user agent to avoid bot detection
    await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36');

    // Navigate to news website
    await page.goto('https://news.ycombinator.com/', { 
      waitUntil: 'networkidle2' 
    });

    // Extract news articles
    const articles = await page.evaluate(() => {
      const rows = document.querySelectorAll('.athing');
      return Array.from(rows).slice(0, 10).map(row => {
        const titleElement = row.querySelector('.titleline > a');
        const scoreElement = row.nextElementSibling?.querySelector('.score');
        const commentsElement = row.nextElementSibling?.querySelector('a[href*="item?id="]');

        return {
          title: titleElement?.textContent || '',
          url: titleElement?.href || '',
          score: scoreElement?.textContent || '0 points',
          comments: commentsElement?.textContent || '0 comments',
          id: row.id
        };
      });
    });

    console.log('Scraped articles:', articles);

    // Take a screenshot of the page
    await page.screenshot({ 
      path: 'hackernews.png',
      fullPage: true 
    });

    return articles;

  } finally {
    await browser.disconnect();
    
    // Clean up session
    await fetch(`https://api.orchestratorhq.com/api/sessions/${session.id}`, {
      method: 'DELETE',
      headers: { 'Authorization': 'Bearer orch_your_api_key_here' }
    });
  }
}

scrapeNews().then(articles => {
  console.log(`Scraped ${articles.length} articles`);
}).catch(console.error);

Form Automation Example

Automate form filling and submission:
const puppeteer = require('puppeteer-core');

async function automateForm() {
  const { browser, sessionId } = await connectToOrchestrator();

  try {
    const page = await browser.newPage();

    // Navigate to a form page
    await page.goto('https://httpbin.org/forms/post');

    // Fill out the form
    await page.type('input[name="custname"]', 'John Doe');
    await page.type('input[name="custtel"]', '+1-555-123-4567');
    await page.type('input[name="custemail"]', 'john.doe@example.com');
    await page.select('select[name="size"]', 'medium');

    // Select radio button
    await page.click('input[name="topping"][value="bacon"]');

    // Fill textarea
    await page.type('textarea[name="comments"]', 'This is an automated test using Puppeteer and Orchestrator.');

    // Take screenshot before submission
    await page.screenshot({ path: 'form-filled.png' });

    // Submit the form
    await page.click('input[type="submit"]');

    // Wait for response page
    await page.waitForNavigation({ waitUntil: 'networkidle2' });

    // Extract the response
    const response = await page.evaluate(() => {
      const preElement = document.querySelector('pre');
      return preElement ? preElement.textContent : 'No response found';
    });

    console.log('Form submission response:', response);

    // Take screenshot of result
    await page.screenshot({ path: 'form-result.png' });

  } finally {
    await browser.disconnect();
    
    // Stop session
    await fetch(`https://api.orchestratorhq.com/api/sessions/${sessionId}`, {
      method: 'DELETE',
      headers: { 'Authorization': 'Bearer orch_your_api_key_here' }
    });
  }
}

automateForm().catch(console.error);

PDF Generation

Generate PDFs from web pages:
const puppeteer = require('puppeteer-core');

async function generatePDF() {
  const { browser, sessionId } = await connectToOrchestrator();

  try {
    const page = await browser.newPage();

    // Navigate to the page you want to convert to PDF
    await page.goto('https://example.com', { 
      waitUntil: 'networkidle2' 
    });

    // Generate PDF
    const pdf = await page.pdf({
      path: 'example.pdf',
      format: 'A4',
      printBackground: true,
      margin: {
        top: '20px',
        right: '20px',
        bottom: '20px',
        left: '20px'
      }
    });

    console.log('PDF generated successfully');

    // You can also generate PDF from HTML content
    await page.setContent(`
      <html>
        <head>
          <style>
            body { font-family: Arial, sans-serif; }
            .header { color: #333; text-align: center; }
            .content { margin: 20px; line-height: 1.6; }
          </style>
        </head>
        <body>
          <div class="header">
            <h1>Generated with Orchestrator + Puppeteer</h1>
          </div>
          <div class="content">
            <p>This PDF was generated using Puppeteer connected to an Orchestrator browser session.</p>
            <p>Date: ${new Date().toLocaleDateString()}</p>
          </div>
        </body>
      </html>
    `);

    await page.pdf({
      path: 'custom-content.pdf',
      format: 'A4',
      printBackground: true
    });

    console.log('Custom PDF generated successfully');

  } finally {
    await browser.disconnect();
    
    // Stop session
    await fetch(`https://api.orchestratorhq.com/api/sessions/${sessionId}`, {
      method: 'DELETE',
      headers: { 'Authorization': 'Bearer orch_your_api_key_here' }
    });
  }
}

generatePDF().catch(console.error);

Advanced Features

Request Interception

Intercept and modify network requests:
async function interceptRequests() {
  const { browser, sessionId } = await connectToOrchestrator();

  try {
    const page = await browser.newPage();

    // Enable request interception
    await page.setRequestInterception(true);

    page.on('request', (request) => {
      // Block images to speed up loading
      if (request.resourceType() === 'image') {
        request.abort();
        return;
      }

      // Modify headers
      const headers = Object.assign({}, request.headers(), {
        'X-Custom-Header': 'Orchestrator-Puppeteer'
      });

      request.continue({ headers });
    });

    // Navigate to a page that shows request headers
    await page.goto('https://httpbin.org/headers');

    const content = await page.content();
    console.log('Page content with custom headers:', content);

  } finally {
    await browser.disconnect();
    // Stop session...
  }
}

Performance Monitoring

Monitor page performance metrics:
async function monitorPerformance() {
  const { browser, sessionId } = await connectToOrchestrator();

  try {
    const page = await browser.newPage();

    // Enable performance monitoring
    await page.coverage.startJSCoverage();
    await page.coverage.startCSSCoverage();

    const startTime = Date.now();

    await page.goto('https://example.com', { 
      waitUntil: 'networkidle2' 
    });

    const loadTime = Date.now() - startTime;

    // Get performance metrics
    const metrics = await page.metrics();
    console.log('Performance metrics:', metrics);

    // Get coverage data
    const jsCoverage = await page.coverage.stopJSCoverage();
    const cssCoverage = await page.coverage.stopCSSCoverage();

    console.log(`Page loaded in ${loadTime}ms`);
    console.log(`JS files loaded: ${jsCoverage.length}`);
    console.log(`CSS files loaded: ${cssCoverage.length}`);

    // Calculate total bytes
    const totalJSBytes = jsCoverage.reduce((total, entry) => total + entry.text.length, 0);
    const totalCSSBytes = cssCoverage.reduce((total, entry) => total + entry.text.length, 0);

    console.log(`Total JS: ${totalJSBytes} bytes`);
    console.log(`Total CSS: ${totalCSSBytes} bytes`);

  } finally {
    await browser.disconnect();
    // Stop session...
  }
}

Mobile Emulation

Emulate mobile devices:
const puppeteer = require('puppeteer-core');

async function emulateMobile() {
  const { browser, sessionId } = await connectToOrchestrator();

  try {
    const page = await browser.newPage();

    // Emulate iPhone 12
    await page.emulate(puppeteer.devices['iPhone 12']);

    await page.goto('https://example.com');

    // Take screenshot in mobile view
    await page.screenshot({ 
      path: 'mobile-view.png',
      fullPage: true 
    });

    // Check if mobile-specific elements are present
    const isMobile = await page.evaluate(() => {
      return window.innerWidth <= 768;
    });

    console.log('Mobile emulation active:', isMobile);

    // Test touch events
    await page.tap('body'); // Simulate touch

  } finally {
    await browser.disconnect();
    // Stop session...
  }
}

Error Handling and Best Practices

Debugging Tips

Next Steps