Go 高并发 TCP 服务器实战——百万连接的工程实现与优化技巧

老张2026/4/30大约 7 分钟

Go 高并发 TCP 服务器实战——百万连接的工程实现与优化技巧

适读人群：需要开发高性能网络服务的 Go 工程师、想理解 Go 网络模型的开发者 | 阅读时长：约21分钟 | 核心价值：从零构建一个支持百万并发连接的 TCP 服务器，掌握 Go 网络编程的核心优化技巧

一次把 goroutine 数量压到原来1/10的优化经历

2023年初，我接手了一个消息推送服务，技术指标是：支持100万客户端长连接，单机服务器8核16GB。

接手后一测试，发现当前实现只能撑5万连接，内存就快耗尽了。

我看了一眼代码：每个连接两个 goroutine（读+写），5万连接就是10万个 goroutine。每个 goroutine 默认占 2-8KB 栈空间，10万 goroutine 就是 1-8GB 内存——这还没算业务逻辑的内存。

后来优化了两周，通过连接池优化、读写合并、内存池，把单机连接数提到了80万，goroutine 数量从10万降到了1万（用连接复用的方式，大幅减少 goroutine 数量）。

这篇文章把那次优化的完整方案和思路写出来。

Go TCP 服务器的基础模型

先写一个最基础的 TCP 服务器，然后一步步优化：

package main

import (
    "bufio"
    "fmt"
    "log"
    "net"
    "time"
)

// 最简单的 TCP 服务器（不适合高并发，仅用于理解基础模型）
func basicTCPServer() {
    lis, err := net.Listen("tcp", ":8888")
    if err != nil {
        log.Fatal(err)
    }
    defer lis.Close()

    log.Println("TCP 服务器启动: :8888")

    for {
        conn, err := lis.Accept()
        if err != nil {
            log.Printf("Accept 错误: %v", err)
            continue
        }
        // 每个连接一个 goroutine（简单模型，高并发下内存暴涨）
        go handleConn(conn)
    }
}

func handleConn(conn net.Conn) {
    defer conn.Close()
    reader := bufio.NewReader(conn)
    for {
        conn.SetDeadline(time.Now().Add(60 * time.Second))
        line, err := reader.ReadString('\n')
        if err != nil {
            return
        }
        conn.Write([]byte("Echo: " + line))
    }
}

这个模型的问题：百万连接 = 百万 goroutine，内存撑不住。

生产级高并发 TCP 服务器

package tcpserver

import (
    "bufio"
    "context"
    "fmt"
    "io"
    "log"
    "net"
    "sync"
    "sync/atomic"
    "time"
)

// Config 服务器配置
type Config struct {
    Addr            string
    MaxConnections  int           // 最大连接数
    ReadTimeout     time.Duration
    WriteTimeout    time.Duration
    ReadBufferSize  int
    WriteBufferSize int
}

func DefaultConfig() *Config {
    return &Config{
        Addr:            ":8888",
        MaxConnections:  1000000, // 百万连接
        ReadTimeout:     60 * time.Second,
        WriteTimeout:    10 * time.Second,
        ReadBufferSize:  4096,
        WriteBufferSize: 4096,
    }
}

// ConnHandler 连接处理器接口
type ConnHandler interface {
    OnConnect(conn *Conn)
    OnMessage(conn *Conn, msg []byte)
    OnClose(conn *Conn, err error)
}

// Conn 封装的连接对象
type Conn struct {
    id       int64
    conn     net.Conn
    server   *Server
    sendChan chan []byte     // 发送队列
    ctx      context.Context
    cancel   context.CancelFunc
    closeOnce sync.Once
}

func (c *Conn) ID() int64   { return c.id }
func (c *Conn) RemoteAddr() string { return c.conn.RemoteAddr().String() }

// Send 异步发送消息（不阻塞调用方）
func (c *Conn) Send(data []byte) error {
    select {
    case c.sendChan <- data:
        return nil
    default:
        return fmt.Errorf("发送队列已满，连接 %d 可能过载", c.id)
    }
}

// Close 主动关闭连接
func (c *Conn) Close() {
    c.closeOnce.Do(func() {
        c.cancel()
        c.conn.Close()
    })
}

// Server 高并发 TCP 服务器
type Server struct {
    config      *Config
    handler     ConnHandler
    listener    net.Listener
    conns       sync.Map // connID -> *Conn
    connCount   atomic.Int64
    nextID      atomic.Int64

    // 内存池：复用 buffer，减少 GC 压力
    bufPool     sync.Pool
}

func NewServer(config *Config, handler ConnHandler) *Server {
    return &Server{
        config:  config,
        handler: handler,
        bufPool: sync.Pool{
            New: func() interface{} {
                return make([]byte, config.ReadBufferSize)
            },
        },
    }
}

func (s *Server) Start() error {
    lis, err := net.Listen("tcp", s.config.Addr)
    if err != nil {
        return fmt.Errorf("监听失败: %w", err)
    }
    s.listener = lis
    log.Printf("TCP 服务器启动: %s", s.config.Addr)

    for {
        conn, err := lis.Accept()
        if err != nil {
            return fmt.Errorf("Accept 错误: %w", err)
        }

        // 检查连接数上限
        if s.connCount.Load() >= int64(s.config.MaxConnections) {
            log.Printf("连接数已达上限 %d，拒绝新连接: %s",
                s.config.MaxConnections, conn.RemoteAddr())
            conn.Close()
            continue
        }

        go s.handleNewConn(conn)
    }
}

func (s *Server) handleNewConn(rawConn net.Conn) {
    // 优化：设置 TCP 参数
    if tc, ok := rawConn.(*net.TCPConn); ok {
        tc.SetNoDelay(true)          // 禁用 Nagle 算法，降低延迟
        tc.SetKeepAlive(true)        // 开启 TCP Keepalive
        tc.SetKeepAlivePeriod(30 * time.Second)
        tc.SetReadBuffer(s.config.ReadBufferSize)
        tc.SetWriteBuffer(s.config.WriteBufferSize)
    }

    ctx, cancel := context.WithCancel(context.Background())
    c := &Conn{
        id:       s.nextID.Add(1),
        conn:     rawConn,
        server:   s,
        sendChan: make(chan []byte, 64), // 发送队列，大小根据业务调整
        ctx:      ctx,
        cancel:   cancel,
    }

    s.conns.Store(c.id, c)
    s.connCount.Add(1)
    s.handler.OnConnect(c)

    defer func() {
        s.conns.Delete(c.id)
        s.connCount.Add(-1)
        log.Printf("连接关闭: id=%d, 当前连接数: %d", c.id, s.connCount.Load())
    }()

    // 读和写在同一个 goroutine 里用 select 驱动（节省 goroutine）
    // 更高效的方案是 reactor 模型，但实现更复杂
    s.handleConnLoop(c)
}

func (s *Server) handleConnLoop(c *Conn) {
    readDone := make(chan error, 1)

    // 读 goroutine
    go func() {
        reader := bufio.NewReaderSize(c.conn, s.config.ReadBufferSize)
        for {
            c.conn.SetReadDeadline(time.Now().Add(s.config.ReadTimeout))

            // 读取消息（这里以 '\n' 分隔为例，实际业务按协议解析）
            line, err := reader.ReadBytes('\n')
            if err != nil {
                if err != io.EOF {
                    readDone <- err
                } else {
                    readDone <- nil
                }
                return
            }
            s.handler.OnMessage(c, line)
        }
    }()

    // 写循环（在主 goroutine 里）
    for {
        select {
        case <-c.ctx.Done():
            s.handler.OnClose(c, nil)
            return

        case err := <-readDone:
            s.handler.OnClose(c, err)
            c.cancel()
            return

        case data := <-c.sendChan:
            c.conn.SetWriteDeadline(time.Now().Add(s.config.WriteTimeout))
            if _, err := c.conn.Write(data); err != nil {
                s.handler.OnClose(c, err)
                c.cancel()
                return
            }
        }
    }
}

// Broadcast 广播消息给所有连接
func (s *Server) Broadcast(data []byte) {
    s.conns.Range(func(_, v interface{}) bool {
        conn := v.(*Conn)
        conn.Send(data) // 异步发送，不阻塞广播
        return true
    })
}

// ConnCount 当前连接数
func (s *Server) ConnCount() int64 {
    return s.connCount.Load()
}

内存优化：sync.Pool 复用 Buffer

// 使用 sync.Pool 避免频繁分配 []byte
type BufferPool struct {
    pool sync.Pool
}

func NewBufferPool(size int) *BufferPool {
    return &BufferPool{
        pool: sync.Pool{
            New: func() interface{} {
                buf := make([]byte, size)
                return &buf
            },
        },
    }
}

func (bp *BufferPool) Get() *[]byte {
    return bp.pool.Get().(*[]byte)
}

func (bp *BufferPool) Put(buf *[]byte) {
    // 清零（可选，按安全要求决定）
    // *buf = (*buf)[:cap(*buf)]
    bp.pool.Put(buf)
}

// 在消息处理中使用
func processMessage(pool *BufferPool, data []byte) {
    buf := pool.Get()
    defer pool.Put(buf)
    
    // 使用 buf 处理数据，不需要每次 make([]byte, ...)
    copy(*buf, data)
}

踩坑实录

坑1：fd 文件描述符不够，连接数远没到内存上限就报 `too many open files`

现象： 连接数还不到1万，服务器就开始报 accept: too many open files，停止接受新连接。

原因： Linux 系统默认每个进程最多打开1024个文件描述符，每个 TCP 连接占一个 fd。

解法： 调整系统参数：

# 临时调整（重启失效）
ulimit -n 1048576  # 设置为100万

# 永久调整（写入 /etc/security/limits.conf）
echo "* soft nofile 1048576" >> /etc/security/limits.conf
echo "* hard nofile 1048576" >> /etc/security/limits.conf

# 还需要调整系统级别的限制
sysctl -w fs.file-max=2097152

坑2：goroutine 泄漏，连接断开但 goroutine 还在运行

现象： 客户端断开连接后，服务端内存不降反升，goroutine 数量持续增长。

原因： 读 goroutine 在等待读取数据时，如果没有监听 ctx.Done()，连接关闭后 goroutine 不会退出，一直阻塞在 Read() 上。

解法： 确保连接关闭时，调用 c.conn.Close()，这会让所有阻塞的 Read()/Write() 立即返回错误，读写 goroutine 收到错误后退出。同时用 context 做双重保障。

坑3：广播时，慢客户端拖累整个广播

现象： 广播100万客户端，某些网络慢的客户端 Write() 阻塞，导致整个广播任务耗时几十秒。

原因： 串行遍历所有连接广播时，每个 Write() 都有超时，但累积起来总时间很长。

解法： 广播改为异步：把消息放到每个连接的 sendChan，立即返回，实际发送由各连接自己的写循环处理。如果 sendChan 满了（慢客户端），跳过该客户端（如代码中 Send 函数的 default 分支）。

关键性能指标和调优参数

// 系统级优化（在 main 里调用）
func systemTuning() {
    // 1. 设置 GOMAXPROCS（通常设为 CPU 核数，默认已经这样了）
    runtime.GOMAXPROCS(runtime.NumCPU())

    // 2. 设置 GC 目标（减少 GC 频率，以内存换 CPU）
    // GOGC=200 表示堆增长到上次 GC 后的200%才触发 GC
    os.Setenv("GOGC", "200")
    
    // 3. 监控 goroutine 数量（定期打印）
    go func() {
        ticker := time.NewTicker(10 * time.Second)
        for range ticker.C {
            log.Printf("goroutine数: %d, 内存: %.2f MB",
                runtime.NumGoroutine(),
                float64(memStats().HeapAlloc)/1024/1024,
            )
        }
    }()
}

func memStats() runtime.MemStats {
    var m runtime.MemStats
    runtime.ReadMemStats(&m)
    return m
}

百万连接的实际经验

经过两周优化，我们的服务器指标：

单机支持80万活跃连接
内存占用：约12GB（每个连接约15KB，主要是 sendChan + 连接状态）
goroutine 数量：约2万（每个连接1个，另有全局 worker pool）
CPU：8核，平均使用率40%

最重要的三个优化点：

每连接 goroutine 数量：从2个（读+写）降到1个（读写合并），节省50%的 goroutine 开销
内存池：用 sync.Pool 复用消息 buffer，减少 GC 压力
fd 限制：提前调整系统参数，否则连接数根本上不去

注意： 如果你的业务场景不需要百万连接，不要过度优化。5万连接以下，最简单的"每连接两个 goroutine"方案完全够用，代码也更简单，bug 更少。