Go 实现 WebShell 安全检测工具——静态分析 PHP/JSP 文件的恶意代码

老张2026/4/30大约 6 分钟

Go 实现 WebShell 安全检测工具——静态分析 PHP/JSP 文件的恶意代码

适读人群：安全工程师、Go 开发者、需要自动化检测 WebShell 的运维人员 | 阅读时长：约 16 分钟 | 核心价值：基于规则+特征的 WebShell 静态检测 Go 实现，可集成到 CI/CD 流水线

有个做电商的朋友，他们公司的服务器被黑了。攻击者通过一个文件上传漏洞上传了一个 PHP WebShell，藏在用户头像目录里，命名为 profile_pic_1234.php（后缀做了混淆）。等被发现的时候，已经被拖了好几个月的数据。

后来他们让我帮做一个定期扫描的工具，每天扫一遍 Web 目录，检测可疑文件。市面上的工具要么太重（需要安装代理），要么规则不透明（商业产品）。我用 Go 实现了一个轻量级的静态分析工具，这篇文章把核心实现写出来。

WebShell 的检测思路

WebShell 通常具备以下特征：

危险函数调用：eval, exec, system, passthru, shell_exec, assert, preg_replace（/e 修饰符）
编码混淆：base64_decode, str_rot13, gzinflate, hex2bin 组合使用
特征字符串："一句话木马"的固定模式，如 $_POST['...']($_POST['...'])
高熵值代码：经过多重编码的代码，字符集丰富度高

规则引擎设计

package detector

import (
    "math"
    "regexp"
    "strings"
)

// RiskLevel 风险等级
type RiskLevel int

const (
    RiskLow    RiskLevel = 1
    RiskMedium RiskLevel = 2
    RiskHigh   RiskLevel = 3
    RiskCritical RiskLevel = 4
)

// Rule 检测规则
type Rule struct {
    ID          string
    Name        string
    Description string
    Level       RiskLevel
    Pattern     *regexp.Regexp
    FileTypes   []string // 适用的文件类型
}

// DetectionResult 检测结果
type DetectionResult struct {
    FilePath   string
    FileType   string
    FileSize   int64
    RiskLevel  RiskLevel
    Findings   []Finding
    Entropy    float64
}

// Finding 单个发现
type Finding struct {
    RuleID   string
    RuleName string
    Level    RiskLevel
    LineNo   int
    Line     string
    Match    string
}

// RuleSet 检测规则集
var PHPRules = []*Rule{
    // 一句话木马特征
    {
        ID:    "PHP-001",
        Name:  "PHP一句话木马",
        Level: RiskCritical,
        Pattern: regexp.MustCompile(
            `(?i)(\$_(?:POST|GET|REQUEST|COOKIE)\s*\[.+?\])\s*\(\s*(\$_(?:POST|GET|REQUEST|COOKIE)\s*\[.+?\])\s*\)`,
        ),
        FileTypes: []string{".php", ".php3", ".php4", ".php5", ".phtml"},
    },
    // eval + base64
    {
        ID:    "PHP-002",
        Name:  "eval执行base64解码内容",
        Level: RiskCritical,
        Pattern: regexp.MustCompile(
            `(?i)eval\s*\(\s*(?:base64_decode|str_rot13|gzinflate|gzuncompress|str_replace|gzdecode)\s*\(`,
        ),
        FileTypes: []string{".php", ".php3", ".php4", ".php5", ".phtml"},
    },
    // assert 执行
    {
        ID:    "PHP-003",
        Name:  "assert执行外部输入",
        Level: RiskCritical,
        Pattern: regexp.MustCompile(
            `(?i)assert\s*\(\s*\$_(?:POST|GET|REQUEST|COOKIE)\s*\[`,
        ),
        FileTypes: []string{".php"},
    },
    // preg_replace /e 修饰符（历史漏洞）
    {
        ID:    "PHP-004",
        Name:  "preg_replace /e 修饰符",
        Level: RiskHigh,
        Pattern: regexp.MustCompile(
            `(?i)preg_replace\s*\(\s*['"]/.*?/e`,
        ),
        FileTypes: []string{".php"},
    },
    // 危险系统函数
    {
        ID:    "PHP-005",
        Name:  "危险系统命令执行函数",
        Level: RiskHigh,
        Pattern: regexp.MustCompile(
            `(?i)(?:system|exec|shell_exec|passthru|popen|proc_open)\s*\(\s*\$_(?:POST|GET|REQUEST|COOKIE|SERVER)`,
        ),
        FileTypes: []string{".php"},
    },
    // 反序列化
    {
        ID:    "PHP-006",
        Name:  "反序列化外部输入",
        Level: RiskHigh,
        Pattern: regexp.MustCompile(
            `(?i)unserialize\s*\(\s*\$_(?:POST|GET|REQUEST|COOKIE)`,
        ),
        FileTypes: []string{".php"},
    },
    // 文件写入
    {
        ID:    "PHP-007",
        Name:  "文件写入外部输入",
        Level: RiskMedium,
        Pattern: regexp.MustCompile(
            `(?i)file_put_contents\s*\(.+?\$_(?:POST|GET|REQUEST)`,
        ),
        FileTypes: []string{".php"},
    },
}

var JSPRules = []*Rule{
    {
        ID:    "JSP-001",
        Name:  "JSP Runtime执行命令",
        Level: RiskCritical,
        Pattern: regexp.MustCompile(
            `(?i)Runtime\s*\.\s*getRuntime\s*\(\s*\)\s*\.\s*exec\s*\(`,
        ),
        FileTypes: []string{".jsp", ".jspx"},
    },
    {
        ID:    "JSP-002",
        Name:  "JSP反射调用",
        Level: RiskHigh,
        Pattern: regexp.MustCompile(
            `(?i)Class\s*\.\s*forName\s*\(.+?request\s*\.\s*getParameter`,
        ),
        FileTypes: []string{".jsp", ".jspx"},
    },
}

检测器实现

package detector

import (
    "bufio"
    "fmt"
    "math"
    "os"
    "path/filepath"
    "strings"
)

// Detector WebShell 检测器
type Detector struct {
    rules         []*Rule
    entropyThreshold float64
}

func NewDetector() *Detector {
    rules := append(PHPRules, JSPRules...)
    return &Detector{
        rules:            rules,
        entropyThreshold: 5.0, // 高熵值阈值
    }
}

// DetectFile 检测单个文件
func (d *Detector) DetectFile(filePath string) (*DetectionResult, error) {
    info, err := os.Stat(filePath)
    if err != nil {
        return nil, err
    }

    ext := strings.ToLower(filepath.Ext(filePath))
    result := &DetectionResult{
        FilePath:  filePath,
        FileType:  ext,
        FileSize:  info.Size(),
        RiskLevel: RiskLow,
    }

    // 读取文件
    content, err := os.ReadFile(filePath)
    if err != nil {
        return nil, err
    }

    // 计算熵值
    result.Entropy = shannonEntropy(string(content))

    // 逐行检测规则
    scanner := bufio.NewScanner(strings.NewReader(string(content)))
    lineNo := 0
    for scanner.Scan() {
        lineNo++
        line := scanner.Text()

        for _, rule := range d.rules {
            // 检查文件类型
            if len(rule.FileTypes) > 0 {
                matched := false
                for _, ft := range rule.FileTypes {
                    if ext == ft {
                        matched = true
                        break
                    }
                }
                if !matched {
                    continue
                }
            }

            // 匹配规则
            if m := rule.Pattern.FindString(line); m != "" {
                finding := Finding{
                    RuleID:   rule.ID,
                    RuleName: rule.Name,
                    Level:    rule.Level,
                    LineNo:   lineNo,
                    Line:     truncateLine(line, 200),
                    Match:    m,
                }
                result.Findings = append(result.Findings, finding)

                if rule.Level > result.RiskLevel {
                    result.RiskLevel = rule.Level
                }
            }
        }
    }

    // 高熵值但没有其他发现，标记为中风险
    if result.Entropy > d.entropyThreshold && len(result.Findings) == 0 {
        result.RiskLevel = RiskMedium
        result.Findings = append(result.Findings, Finding{
            RuleID:   "ENTROPY-001",
            RuleName: "代码熵值异常（可能有混淆）",
            Level:    RiskMedium,
            Line:     fmt.Sprintf("文件熵值: %.2f", result.Entropy),
        })
    }

    return result, nil
}

// ScanDirectory 扫描整个目录
func (d *Detector) ScanDirectory(dir string, targetExts []string) ([]*DetectionResult, error) {
    extSet := make(map[string]bool)
    for _, e := range targetExts {
        extSet[strings.ToLower(e)] = true
    }

    var results []*DetectionResult

    err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {
        if err != nil {
            return nil // 跳过无法访问的文件
        }
        if info.IsDir() {
            // 跳过 .git 等目录
            if info.Name() == ".git" || info.Name() == "vendor" || info.Name() == "node_modules" {
                return filepath.SkipDir
            }
            return nil
        }

        ext := strings.ToLower(filepath.Ext(path))
        if len(extSet) > 0 && !extSet[ext] {
            return nil
        }

        result, err := d.DetectFile(path)
        if err != nil {
            fmt.Printf("Warning: failed to scan %s: %v\n", path, err)
            return nil
        }

        if result.RiskLevel > RiskLow {
            results = append(results, result)
        }

        return nil
    })

    return results, err
}

// shannonEntropy 计算字符串的香农熵
func shannonEntropy(s string) float64 {
    if len(s) == 0 {
        return 0
    }
    freq := make(map[rune]int)
    for _, c := range s {
        freq[c]++
    }
    total := float64(len([]rune(s)))
    entropy := 0.0
    for _, count := range freq {
        p := float64(count) / total
        entropy -= p * math.Log2(p)
    }
    return entropy
}

func truncateLine(s string, maxLen int) string {
    s = strings.TrimSpace(s)
    runes := []rune(s)
    if len(runes) > maxLen {
        return string(runes[:maxLen]) + "..."
    }
    return s
}

主程序：报告生成

func main() {
    dir := flag.String("dir", ".", "扫描目录")
    ext := flag.String("ext", ".php,.jsp,.jspx,.phtml", "扫描的文件扩展名")
    output := flag.String("output", "report.txt", "报告输出文件")
    flag.Parse()

    exts := strings.Split(*ext, ",")
    d := detector.NewDetector()

    fmt.Printf("扫描目录: %s\n", *dir)
    results, err := d.ScanDirectory(*dir, exts)
    if err != nil {
        log.Fatal(err)
    }

    // 统计
    criticalCount, highCount, mediumCount := 0, 0, 0
    for _, r := range results {
        switch r.RiskLevel {
        case detector.RiskCritical:
            criticalCount++
        case detector.RiskHigh:
            highCount++
        case detector.RiskMedium:
            mediumCount++
        }
    }

    fmt.Printf("扫描完成，发现: 严重=%d, 高危=%d, 中危=%d\n", criticalCount, highCount, mediumCount)

    // 写报告
    f, _ := os.Create(*output)
    defer f.Close()

    for _, r := range results {
        levelStr := map[detector.RiskLevel]string{
            detector.RiskCritical: "严重",
            detector.RiskHigh:    "高危",
            detector.RiskMedium:  "中危",
        }[r.RiskLevel]

        fmt.Fprintf(f, "[%s] %s (熵值: %.2f)\n", levelStr, r.FilePath, r.Entropy)
        for _, finding := range r.Findings {
            fmt.Fprintf(f, "  - 规则: %s [%s]\n", finding.RuleName, finding.RuleID)
            if finding.LineNo > 0 {
                fmt.Fprintf(f, "    第 %d 行: %s\n", finding.LineNo, finding.Line)
            }
        }
        fmt.Fprintln(f, "")
    }
}

踩坑实录

踩坑 1：误报太多，正常代码被标记

现象：检测结果里有大量误报，比如 PHPUnit 测试文件里的 eval() 调用被标记为高危。

解法：加白名单机制：

路径白名单：vendor/、tests/、test/ 目录直接跳过
规则级别调整：单独的 eval() 调用降为 Medium，只有 eval(base64_decode(...)) 才是 Critical

踩坑 2：大文件扫描很慢

现象：某些 JS 打包文件（几 MB）扫描特别慢。

原因：对这类大文件也做了逐行扫描。

解法：跳过超过 1MB 的文件，并跳过 .min.js、.bundle.js 等明显的打包文件。

踩坑 3：某些 WebShell 故意拆分函数名绕过检测

现象：某个真实 WebShell 用 $f = 'ev'.'al'; $f(...) 的方式拆分函数名，正则没有匹配到。

解法：加入对字符串拼接再调用的检测模式：

{
    ID:    "PHP-008",
    Name:  "拼接函数名后执行（混淆）",
    Level: RiskHigh,
    Pattern: regexp.MustCompile(
        `(?i)\$\w+\s*=\s*['"]ev['"]\s*\.\s*['"]al['"]\s*;`,
    ),
    FileTypes: []string{".php"},
},

集成到 CI/CD

#!/bin/bash
# 在 pre-deploy hook 里调用
./webshell-detector -dir /var/www/html -ext .php,.jsp -output /tmp/scan-report.txt
if [ $? -eq 0 ]; then
  CRITICAL=$(grep "\[严重\]" /tmp/scan-report.txt | wc -l)
  if [ "$CRITICAL" -gt "0" ]; then
    echo "WebShell检测发现 $CRITICAL 个严重威胁，部署终止"
    exit 1
  fi
fi