Node.js 성능 최적화 가이드 — 클러스터부터 메모리까지

Node.js 성능 최적화가 필요한 이유

Node.js는 싱글 스레드 이벤트 루프 모델로 동작합니다. I/O 작업에는 뛰어난 성능을 보이지만, CPU 집약적인 작업이나 메모리 관리를 소홀히 하면 전체 서버 성능이 급격히 저하됩니다. 이 글에서는 클러스터 모드, 워커 스레드, 메모리 관리, 프로파일링 등 Node.js 성능을 끌어올리는 핵심 기법을 정리합니다.

이벤트 루프 이해

Node.js의 이벤트 루프는 6개의 페이즈로 구성됩니다. 이벤트 루프를 블로킹하지 않는 것이 성능의 핵심입니다.

const { performance, PerformanceObserver } = require("node:perf_hooks");

// 이벤트 루프 블로킹 감지
function detectEventLoopDelay() {
  let lastCheck = performance.now();

  setInterval(() => {
    const now = performance.now();
    const delay = now - lastCheck - 1000; // 1초 간격 기준
    if (delay > 100) {
      console.warn(`[경고] 이벤트 루프 지연: ${delay.toFixed(0)}ms`);
    }
    lastCheck = now;
  }, 1000);
}

// 나쁜 예: 이벤트 루프를 블로킹하는 동기 작업
function badExample() {
  const start = Date.now();
  // 5초 동안 이벤트 루프 블로킹 — 모든 요청이 멈춤
  while (Date.now() - start < 5000) {
    // CPU를 점유하는 무거운 연산
  }
}

// 좋은 예: 청크 단위로 분할하여 이벤트 루프 양보
async function goodExample(items) {
  const CHUNK_SIZE = 1000;
  const results = [];

  for (let i = 0; i < items.length; i += CHUNK_SIZE) {
    const chunk = items.slice(i, i + CHUNK_SIZE);
    const processed = chunk.map((item) => item * 2); // 가벼운 연산
    results.push(...processed);

    // 이벤트 루프에 제어권을 양보
    if (i + CHUNK_SIZE < items.length) {
      await new Promise((resolve) => setImmediate(resolve));
    }
  }
  return results;
}

페이즈	역할	예시
timers	`setTimeout`, `setInterval` 콜백 실행	지연된 작업
pending callbacks	시스템 작업 콜백	TCP 에러
poll	I/O 콜백 처리	파일 읽기, 네트워크
check	`setImmediate` 콜백	poll 후 즉시 실행
close callbacks	종료 콜백	`socket.on('close')`

클러스터 모드 — 멀티코어 활용

Node.js는 기본적으로 하나의 CPU 코어만 사용합니다. cluster 모듈로 여러 프로세스를 생성하면 멀티코어를 활용할 수 있습니다.

const cluster = require("node:cluster");
const http = require("node:http");
const os = require("node:os");

const NUM_WORKERS = os.cpus().length; // CPU 코어 수

if (cluster.isPrimary) {
  console.log(`마스터 프로세스 ${process.pid} 시작`);
  console.log(`워커 ${NUM_WORKERS}개 생성 중...`);

  // CPU 코어 수만큼 워커 생성
  for (let i = 0; i < NUM_WORKERS; i++) {
    cluster.fork();
  }

  // 워커 종료 시 자동 재시작
  cluster.on("exit", (worker, code, signal) => {
    console.warn(
      `워커 ${worker.process.pid} 종료 (코드: ${code}). 재시작 중...`
    );
    cluster.fork();
  });
} else {
  // 각 워커가 동일한 포트에서 요청 처리
  http
    .createServer((req, res) => {
      res.writeHead(200, { "Content-Type": "application/json" });
      res.end(
        JSON.stringify({
          pid: process.pid,
          message: "응답 완료",
        })
      );
    })
    .listen(3000);

  console.log(`워커 ${process.pid} 시작됨`);
}

// 출력:
// 마스터 프로세스 12345 시작
// 워커 8개 생성 중...
// 워커 12346 시작됨
// 워커 12347 시작됨
// ...

워커 스레드 — CPU 집약적 작업 분리

worker_threads 모듈은 CPU 집약적인 작업을 별도 스레드에서 실행하여 메인 스레드를 블로킹하지 않습니다.

// worker.js — 워커 스레드에서 실행되는 코드
const { parentPort, workerData } = require("node:worker_threads");

function fibonacci(n) {
  if (n <= 1) return n;
  return fibonacci(n - 1) + fibonacci(n - 2);
}

// 부모에게 결과 전달
const result = fibonacci(workerData.number);
parentPort.postMessage({ number: workerData.number, result });

// main.js — 메인 스레드
const { Worker } = require("node:worker_threads");

function runFibonacci(number) {
  return new Promise((resolve, reject) => {
    const worker = new Worker("./worker.js", {
      workerData: { number },
    });

    worker.on("message", resolve);
    worker.on("error", reject);
    worker.on("exit", (code) => {
      if (code !== 0) {
        reject(new Error(`워커 종료 코드: ${code}`));
      }
    });
  });
}

// 여러 피보나치 계산을 병렬로 실행
async function main() {
  console.time("병렬 실행");
  const results = await Promise.all([
    runFibonacci(40),
    runFibonacci(41),
    runFibonacci(42),
  ]);
  console.timeEnd("병렬 실행");

  results.forEach(({ number, result }) => {
    console.log(`fibonacci(${number}) = ${result}`);
  });
}

main();
// 병렬 실행: 2340ms (순차 실행 대비 약 3배 빠름)
// fibonacci(40) = 102334155
// fibonacci(41) = 165580141
// fibonacci(42) = 267914296

방식	적합한 작업	메모리 공유	통신 방식
클러스터	HTTP 서버 스케일링	불가	IPC
워커 스레드	CPU 집약적 계산	SharedArrayBuffer	postMessage

메모리 관리와 누수 탐지

V8 엔진의 힙 메모리를 모니터링하고 누수를 탐지하는 방법입니다.

// 메모리 사용량 모니터링
function logMemoryUsage(label = "") {
  const usage = process.memoryUsage();
  const format = (bytes) => `${(bytes / 1024 / 1024).toFixed(1)}MB`;

  console.log(`[메모리${label ? ` - ${label}` : ""}]`, {
    rss: format(usage.rss),             // 전체 메모리
    heapTotal: format(usage.heapTotal), // 힙 전체
    heapUsed: format(usage.heapUsed),   // 힙 사용량
    external: format(usage.external),   // C++ 객체
  });
}

// 메모리 누수 예시와 해결
class CacheManager {
  #cache = new Map();
  #maxSize;

  constructor(maxSize = 1000) {
    this.#maxSize = maxSize;
  }

  set(key, value) {
    // LRU 전략: 최대 크기 초과 시 오래된 항목 삭제
    if (this.#cache.size >= this.#maxSize) {
      const firstKey = this.#cache.keys().next().value;
      this.#cache.delete(firstKey);
    }
    this.#cache.set(key, value);
  }

  get(key) {
    if (!this.#cache.has(key)) return undefined;
    // 접근한 항목을 맨 뒤로 이동 (LRU)
    const value = this.#cache.get(key);
    this.#cache.delete(key);
    this.#cache.set(key, value);
    return value;
  }

  get size() {
    return this.#cache.size;
  }
}

// WeakRef/FinalizationRegistry로 약한 참조 관리
const registry = new FinalizationRegistry((key) => {
  console.log(`[GC] ${key} 객체가 가비지 컬렉션됨`);
});

logMemoryUsage("시작");
// [메모리 - 시작] { rss: '25.3MB', heapTotal: '6.2MB', heapUsed: '4.1MB', external: '0.4MB' }

프로파일링 — 병목 지점 찾기

const { performance } = require("node:perf_hooks");

// 함수 실행 시간 측정 유틸리티
function measureTime(label, fn) {
  const start = performance.now();
  const result = fn();
  const elapsed = performance.now() - start;
  console.log(`[${label}] ${elapsed.toFixed(2)}ms`);
  return result;
}

// 스트림으로 대용량 파일 처리 (메모리 효율적)
const fs = require("node:fs");
const { createReadStream, createWriteStream } = fs;
const { pipeline } = require("node:stream/promises");
const { Transform } = require("node:stream");

async function processLargeFile(inputPath, outputPath) {
  const upperCaseTransform = new Transform({
    transform(chunk, encoding, callback) {
      // 청크 단위로 처리 — 전체 파일을 메모리에 올리지 않음
      callback(null, chunk.toString().toUpperCase());
    },
  });

  await pipeline(
    createReadStream(inputPath),
    upperCaseTransform,
    createWriteStream(outputPath)
  );
  console.log("파일 처리 완료 (스트림 방식)");
}

# Node.js 내장 프로파일러
node --prof app.js
node --prof-process isolate-*.log > profile.txt

# Chrome DevTools로 프로파일링
node --inspect app.js
# chrome://inspect에서 연결

실전 팁

이벤트 루프 블로킹 금지: 100ms 이상 걸리는 동기 작업은 워커 스레드로 분리합니다
스트림 활용: 대용량 데이터는 readFile 대신 createReadStream으로 처리합니다
커넥션 풀링: DB 연결은 매번 생성하지 않고 풀(pool)로 관리합니다
캐시 크기 제한: 무제한 캐시는 메모리 누수의 원인입니다. LRU 전략을 적용합니다
클러스터 + PM2: 프로덕션에서는 PM2의 클러스터 모드를 사용하면 프로세스 관리가 편리합니다
힙 스냅샷: --inspect 플래그로 Chrome DevTools에서 메모리 스냅샷을 비교합니다
HTTP Keep-Alive: 연결 재사용으로 TCP 핸드셰이크 오버헤드를 줄입니다