Python 内存管理深度解析——引用计数、垃圾回收、内存泄漏排查

老张2026/4/30大约 7 分钟

Python 内存管理深度解析——引用计数、垃圾回收、内存泄漏排查

适读人群：Python 中高级开发者、遇到内存问题的工程师 | 阅读时长：约18分钟 | 核心价值：真正理解 Python 内存管理机制，具备独立排查内存泄漏的能力

那台内存一直涨不停的服务器

两年前，我们有个长期运行的 Python 数据处理服务，每小时处理一批数据，理论上内存应该保持平稳。但监控显示，进程内存每天稳定增长约30MB，一个月后已经占用了将近1GB，触发了 OOM 告警。

我们排查了将近三天，最终发现问题出在一个全局列表里：每次处理完数据，结果会 append 到一个模块级变量里，用来做"历史统计"，但这个统计从未被清理，也从未有人读取过。就这么默默地累积了一个月。

这次排查让我系统地学了 Python 内存管理，彻底搞清楚了引用计数、垃圾回收、内存泄漏的原理和排查方法。今天来系统分享。

一、Python 内存管理基础——引用计数

Python 使用引用计数（Reference Counting）作为主要的内存管理机制：每个对象都有一个计数器，记录有多少个引用指向它。计数降到0时，内存立即释放。

import sys
import ctypes


def demo_refcount():
    """演示引用计数"""
    # 创建对象
    a = [1, 2, 3]
    print(f"创建 a 后引用计数: {sys.getrefcount(a)}")  # 2（a 本身 + getrefcount 参数）

    b = a   # b 也指向同一个列表
    print(f"b = a 后引用计数: {sys.getrefcount(a)}")    # 3

    del b   # 删除 b
    print(f"del b 后引用计数: {sys.getrefcount(a)}")    # 2

    c = [a, a]  # 两次引用
    print(f"c = [a, a] 后引用计数: {sys.getrefcount(a)}")  # 4

    del c
    print(f"del c 后引用计数: {sys.getrefcount(a)}")    # 2

    # a 离开作用域后，引用计数降为 0，内存释放


# 演示不可变对象的小整数缓存
# Python 缓存 -5 到 256 的小整数
x = 100
y = 100
print(f"x is y (100): {x is y}")   # True（同一个对象）

x = 1000
y = 1000
print(f"x is y (1000): {x is y}")  # False（不同对象）

引用计数的弱点——循环引用

import gc


def demo_circular_reference():
    """演示循环引用"""
    class Node:
        def __init__(self, value):
            self.value = value
            self.next = None
            self.prev = None

    # 创建循环链表
    a = Node(1)
    b = Node(2)
    a.next = b
    b.prev = a  # 循环引用！

    # 即使 del 了 a 和 b，内存也不会立即释放
    # 因为 a 和 b 互相引用，引用计数都不为 0
    del a, b

    # Python 的循环垃圾收集器会最终清理它
    # 可以手动触发
    collected = gc.collect()
    print(f"手动 GC 回收了 {collected} 个对象")

二、gc 模块——循环垃圾回收

Python 有一个分代垃圾回收器（Generational Garbage Collector）来处理循环引用：

import gc
import time


# 垃圾回收器分三代：0代（新生对象）、1代、2代（老对象）
print(f"各代阈值: {gc.get_threshold()}")  # (700, 10, 10)
print(f"各代统计: {gc.get_count()}")

# 禁用 GC（性能敏感的场景）
gc.disable()
# 手动触发 GC
gc.collect(0)  # 只回收 0 代
gc.collect()   # 完整回收

# 调整 GC 阈值（提高性能，但可能增加内存使用）
gc.set_threshold(1000, 15, 15)

# GC 回调——用于监控
def gc_callback(phase, info):
    if phase == "start":
        pass
    elif phase == "stop":
        if info["collected"] > 0:
            print(f"GC: 回收了 {info['collected']} 个对象")

gc.callbacks.append(gc_callback)
gc.enable()


# 发现循环引用的对象
def find_circular_refs() -> list:
    """查找当前所有循环引用的对象"""
    gc.collect()
    return gc.garbage  # gc.garbage 包含无法释放的循环引用对象

三、内存泄漏排查工具

# pip install memory-profiler tracemalloc objgraph psutil
import tracemalloc
import linecache
import gc
from typing import List


class MemoryProfiler:
    """内存分析工具"""

    def __init__(self):
        self._snapshot_before = None

    def start(self):
        tracemalloc.start(25)  # 保存25帧的调用栈

    def snapshot(self) -> tracemalloc.Snapshot:
        return tracemalloc.take_snapshot()

    def compare_snapshots(
        self,
        snap1: tracemalloc.Snapshot,
        snap2: tracemalloc.Snapshot,
        top_n: int = 20,
    ):
        """对比两个快照，找出内存增长点"""
        stats = snap2.compare_to(snap1, "lineno")
        print(f"\n内存增长前 {top_n} 的代码位置:")
        for stat in stats[:top_n]:
            if stat.size_diff > 0:
                print(f"  +{stat.size_diff/1024:.1f} KB: {stat.traceback.format()[-1]}")

    def find_large_objects(self, min_size_kb: int = 100):
        """找出所有大于指定大小的对象"""
        snapshot = tracemalloc.take_snapshot()
        stats = snapshot.statistics("traceback")
        print(f"\n大对象 (>{min_size_kb}KB):")
        for stat in stats:
            if stat.size / 1024 > min_size_kb:
                print(f"  {stat.size/1024:.1f} KB: {stat.traceback.format()[-1]}")

    def stop(self):
        tracemalloc.stop()


# 使用 tracemalloc 做精确定位
def memory_hungry_function():
    """模拟内存泄漏"""
    leak_list = []  # 这个局部变量不会泄漏（函数返回时释放）
    for i in range(10000):
        leak_list.append({"id": i, "data": "x" * 100})
    return leak_list


def track_memory_usage():
    """跟踪内存使用"""
    import psutil, os
    proc = psutil.Process(os.getpid())
    return proc.memory_info().rss / 1024 / 1024  # MB


# 完整的内存泄漏排查流程
tracemalloc.start()
snap1 = tracemalloc.take_snapshot()

# 执行可能有问题的代码
data = memory_hungry_function()

snap2 = tracemalloc.take_snapshot()

# 分析差异
top_stats = snap2.compare_to(snap1, "lineno")
for stat in top_stats[:5]:
    print(stat)

objgraph——可视化对象引用图

import objgraph


def find_memory_leaks():
    """使用 objgraph 找内存泄漏"""
    # 显示增长最多的对象类型
    objgraph.show_growth(limit=10)

    # 找出所有 list 对象
    lists = objgraph.by_type("list")
    print(f"当前 list 对象数量: {len(lists)}")

    # 找出大 dict 对象
    big_dicts = [d for d in objgraph.by_type("dict") if len(d) > 1000]
    print(f"超过1000个键的 dict: {len(big_dicts)} 个")

    # 找出某个对象的所有引用方
    # 非常有用！可以找出是谁在"持有"这个对象
    if big_dicts:
        objgraph.show_backrefs(
            big_dicts[0],
            max_depth=3,
            filename="refs.png"  # 生成引用图
        )

四、常见内存泄漏模式和修复

踩坑实录1：全局列表/字典无限增长

# 问题代码
_result_history = []  # 模块级变量

def process_data(data):
    result = expensive_computation(data)
    _result_history.append(result)  # 永远在增长！
    return result


# 修复方案1：限制大小（deque with maxlen）
from collections import deque

_result_history = deque(maxlen=1000)  # 最多保留1000条，超出自动丢弃

# 修复方案2：使用 WeakRef
import weakref

_result_cache: dict = {}

def process_with_weak_cache(obj_id, data):
    # WeakValueDictionary：当 value 没有其他强引用时自动清理
    if not hasattr(process_with_weak_cache, '_cache'):
        process_with_weak_cache._cache = weakref.WeakValueDictionary()

    result = expensive_computation(data)
    # 只保留弱引用，不阻止 GC
    return result

踩坑实录2：闭包意外持有大对象

# 问题代码
def create_handler(big_data):
    # big_data 是一个几百MB的数据
    def handler(request):
        # 只用到了 big_data 的一小部分
        result = big_data["key"]  # 但闭包持有了整个 big_data
        return result
    return handler

# 修复：只提取需要的数据
def create_handler_fixed(big_data):
    small_data = big_data["key"]  # 只保留需要的部分
    del big_data  # 明确释放
    def handler(request):
        return small_data  # 闭包只持有 small_data
    return handler

现象：实现了 __del__ 的对象，如果存在循环引用，会被放入 gc.garbage，永远无法被回收。
原因：Python < 3.4 中，有 __del__ 的循环引用对象无法被 GC 安全处理。Python 3.4+ 已修复，但仍应谨慎使用 __del__。
解法：用 contextlib.contextmanager 或 weakref.finalize 替代 __del__。

import weakref


class ResourceManager:
    """正确的资源管理"""
    def __init__(self, resource):
        self._resource = resource
        # 使用 weakref.finalize 代替 __del__
        self._finalizer = weakref.finalize(
            self, self._cleanup, resource
        )

    @staticmethod
    def _cleanup(resource):
        """清理函数，不持有 self 的强引用"""
        print(f"清理资源: {resource}")
        resource.close()

    def use(self):
        return self._resource

五、内存优化技巧

import sys
from dataclasses import dataclass


# 使用 __slots__ 减少实例内存占用
class WithoutSlots:
    def __init__(self, x, y, z):
        self.x = x
        self.y = y
        self.z = z


class WithSlots:
    __slots__ = ("x", "y", "z")

    def __init__(self, x, y, z):
        self.x = x
        self.y = y
        self.z = z


# 对比内存
no_slots = WithoutSlots(1, 2, 3)
with_slots = WithSlots(1, 2, 3)
print(f"无 slots: {sys.getsizeof(no_slots)} bytes")  # 约 48 bytes
print(f"有 slots: {sys.getsizeof(with_slots)} bytes")  # 约 56 bytes（实例本身小，无 __dict__）

# 大量对象时差异更显著
n = 1_000_000
import gc
gc.collect()

start_mem = track_memory_usage()
without = [WithoutSlots(i, i, i) for i in range(n)]
mem_without = track_memory_usage() - start_mem

del without
gc.collect()

start_mem = track_memory_usage()
with_s = [WithSlots(i, i, i) for i in range(n)]
mem_with = track_memory_usage() - start_mem

print(f"无 slots: {mem_without:.1f} MB")
print(f"有 slots: {mem_with:.1f} MB")
print(f"节省: {(1 - mem_with/mem_without)*100:.0f}%")
# 典型输出：节省约 30-40%

六、选型建议

内存问题的排查步骤：

监控：用 psutil 持续监控进程内存
定位：用 tracemalloc 找增长最快的代码位置
分析：用 objgraph 查找是哪些对象在累积
验证：修复后观察内存是否稳定

内存泄漏大多数不是 Python 的问题，而是使用模式的问题：无界缓存、全局状态累积、闭包意外持有大对象是最常见的三种。理解了这些模式，遇到内存问题才不会无从下手。