Golang 启动流程-流程分析版

还记得当时我们学习 TP(ThinkPHP)时候想搞清楚框架运行流程，然后就进行各种断点和代码阅读，现在就想搞清楚Go程序的启动流程是什么？

启动流程分析

请注意这篇博客中会有很多汇编代码，你需要提前了解一下这方面的知识（Go 汇编器快速入门请参考这里）。让我们开始吧！

当前的Go版本为：go1.11.5 darwin/amd64

package main

import "fmt"

func main() {
	fmt.Println("Hell asm!")
}

然后，编译并链接：

go build

生成可运行程序

objdump -d 可运行程序 > asm.txt

大概是一个 10w 多行的文件，在这里面寻找

针对不同的平台都有各自的特定的汇编文件，我这里通过rt0_darwin_amd64.s定位到runtime.rt0_go方法。先来看下asm_amd64.s的源码。文件很大，省略部分代码，留下初始化过程的重要步骤。

// 源码 src/runtime/rt0_darwin_amd64.s

__rt0_amd64_darwin:
 1052290:	e9 eb c6 ff ff 	jmp	-14613 <__rt0_amd64>

// 源码 src/runtime/asm_amd64.s
// _rt0_amd64是大多数amd64系统使用时的常用启动代码

 __rt0_amd64:
 ...
 104e989:	e9 02 00 00 00 	jmp	2 <runtime.rt0_go>

runtime.rt0_go:

// 1.查询cpu信息
// 2.如果有cgo，初始化cgo； 调用setg_gcc（g0），然后更新stackguard。
// 3.设置tls 
 104ea42:	e8 09 3c 00 00 	callq	15369 <runtime.settls>
// 4.src/runtime/stubs.go
 104ea63:	e8 18 1a 00 00 	callq	6680 <runtime.abort>
// 5.做一些运算检测 src/runtime/runtime1.go
 104ea87:	e8 34 71 fe ff 	callq	-102092 <runtime.check>
// 6.把二进制文件的绝对路径找出来 src/runtime/runtime1.go
 104ea9d:	e8 ce 6b fe ff 	callq	-103474 <runtime.args>
// 7.获取CPU核数与内存页大小 src/runtime/os_darwin.go
 104eaa2:	e8 49 62 fd ff 	callq	-171447 <runtime.osinit>
// 8. 命令行参数、环境变量、gc、栈空间、内存管理、所有P实例、HASH算法等初始化 src/runtime/proc.go
 104eaa7:	e8 34 ae fd ff 	callq	-152012 <runtime.schedinit>
// 9.新建一个goroutine，该goroutine绑定runtime.main，放在P的本地队列，等待调度 src/runtime/proc.go
 104eab6:	e8 65 17 fe ff 	callq	-125083 <runtime.newproc>
// 10.启动M，开始调度goroutine src/runtime/proc.go
 104eabd:	e8 7e cc fd ff 	callq	-144258 <runtime.mstart>

执行流程总结

按顺序总结下runtime.rt0_go里几件重要的事：

检查运行平台的CPU，设置好程序运行需要相关标志。

1.TLS的初始化。
2.runtime.args、runtime.osinit、runtime.schedinit 三个方法做好程序运行需要的各种变量与调度器。
3.runtime.newproc 创建新的goroutine用于绑定用户写的main方法。
4.runtime.mstart 开始goroutine的调度。

具体源码

下面接着针对上面几个runtime函数，粗略探索下干了什么事情。我们也只看一层代码，有兴趣的同学可以顺着这个顺序深入看下

runtime.args

就是把二进制文件的绝对路径找出来，并存在os.executablePath里。

func args(c int32, v **byte) {
	argc = c
	argv = v
	sysargs(c, v)
}

//go:linkname executablePath os.executablePath
var executablePath string

func sysargs(argc int32, argv **byte) {
	// skip over argv, envv and the first string will be the path
	n := argc + 1
	for argv_index(argv, n) != nil {
		n++
	}
	executablePath = gostringnocopy(argv_index(argv, n+1))

	// strip "executable_path=" prefix if available, it's added after OS X 10.11.
	const prefix = "executable_path="
	if len(executablePath) > len(prefix) && executablePath[:len(prefix)] == prefix {
		executablePath = executablePath[len(prefix):]
	}
}

runtime.osinit

获取CPU核数与内存页大小。按照本文的测试工程：

// BSD interface for threading.
func osinit() {
	// pthread_create delayed until end of goenvs so that we
	// can look at the environment first.

	ncpu = getncpu()
	physPageSize = getPageSize()
}

const (
	_CTL_HW      = 6
	_HW_NCPU     = 3
	_HW_PAGESIZE = 7
)

func getncpu() int32 {
	// Use sysctl to fetch hw.ncpu.
	mib := [2]uint32{_CTL_HW, _HW_NCPU}
	out := uint32(0)
	nout := unsafe.Sizeof(out)
	ret := sysctl(&mib[0], 2, (*byte)(unsafe.Pointer(&out)), &nout, nil, 0)
	if ret >= 0 && int32(out) > 0 {
		return int32(out)
	}
	return 1
}

func getPageSize() uintptr {
	// Use sysctl to fetch hw.pagesize.
	mib := [2]uint32{_CTL_HW, _HW_PAGESIZE}
	out := uint32(0)
	nout := unsafe.Sizeof(out)
	ret := sysctl(&mib[0], 2, (*byte)(unsafe.Pointer(&out)), &nout, nil, 0)
	if ret >= 0 && int32(out) > 0 {
		return uintptr(out)
	}
	return 0
}

runtime.schedinit

初始化程序运行需要环境

// The bootstrap sequence is:
//
//	call osinit
//	call schedinit
//	make & queue new G
//	call runtime·mstart
//
// The new G calls runtime·main.
func schedinit() {
	// raceinit must be the first call to race detector.
    // In particular, it must be done before mallocinit below calls racemapshadow.
    // 获取g实例
	_g_ := getg()
	if raceenabled {
		_g_.racectx, raceprocctx0 = raceinit()
    }
    
	sched.maxmcount = 10000 // 设置全局线程数上限

	tracebackinit() // 初始化一系列函数所在的PC计数器，用于traceback
	moduledataverify() // 验证链接器符号的正确性
	stackinit() // 栈的初始化
	mallocinit() // 内存分配器初始化
	mcommoninit(_g_.m)
	cpuinit()       // must run before alginit
	alginit()       // maps must not be used before this call
	modulesinit()   // provides activeModules
	typelinksinit() // uses maps, activeModules
	itabsinit()     // uses activeModules

	msigsave(_g_.m)
	initSigmask = _g_.m.sigmask

	goargs() // 获取命令行参数
	goenvs() // 获取所有的环境变量
	parsedebugvars() // GODEBUG 设置
	gcinit() // gc初始化

	sched.lastpoll = uint64(nanotime())
	procs := ncpu // P个数检查
	if n, ok := atoi32(gogetenv("GOMAXPROCS")); ok && n > 0 { // 设置 GOMAXPROCS 参数
		procs = n
	}
	if procresize(procs) != nil { // 所有P的初始化
		throw("unknown runnable goroutine during bootstrap")
	}

	// For cgocheck > 1, we turn on the write barrier at all times
	// and check all pointer writes. We can't do this until after
	// procresize because the write barrier needs a P.
	if debug.cgocheck > 1 {
		writeBarrier.cgo = true
		writeBarrier.enabled = true
		for _, p := range allp {
			p.wbBuf.reset()
		}
	}

	if buildVersion == "" {
		// Condition should never trigger. This code just serves
		// to ensure runtime·buildVersion is kept in the resulting binary.
		buildVersion = "unknown"
	}
}

runtime.newproc

newproc() 比较简单，只是获取参数的起始地址与相关寄存器。真正干活的是newproc1()。

runtime.newproc1()

newproc1() 就比较长了，这儿概括下它做了的事情：

从TLS拿到当前运行的G实例，并且使绑定到当前线程的M实例不可抢占。
从M实例上取到P实例，如果P实例本地上有free goroutine就拿过去，没有就到全局调度器那儿偷一些过来。这两个地方都没有，就按照最低栈大小2K new一个G实例（即goroutine）。
然后设置好G实例上的各种寄存器的信息，SP、PC等。
将G实例的状态变更为Grunnable，放到P实例的本地可运行队列里等待调度执行，若队列满了，就把一半的G移到全局调度器下。
释放M实例的不可抢占状态。返回新的G实例。

如果是程序刚启动，经由runtime.rt0_go调用newproc1时，实质干的事情就是创建一个G，把runtime.main(也包含main.main)放进去。在执行mstart时，触发调度。所以main实际是在一个新的G里运行的，而不是g0。

// Create a new g running fn with siz bytes of arguments.
// Put it on the queue of g's waiting to run.
// The compiler turns a go statement into a call to this.
// Cannot split the stack because it assumes that the arguments
// are available sequentially after &fn; they would not be
// copied if a stack split occurred.
//go:nosplit
func newproc(siz int32, fn *funcval) {
	argp := add(unsafe.Pointer(&fn), sys.PtrSize)
	gp := getg()
	pc := getcallerpc()
	systemstack(func() {
		newproc1(fn, (*uint8)(argp), siz, gp, pc)
	})
}

// Create a new g running fn with narg bytes of arguments starting
// at argp. callerpc is the address of the go statement that created
// this. The new g is put on the queue of g's waiting to run.
func newproc1(fn *funcval, argp *uint8, narg int32, callergp *g, callerpc uintptr)

runtime.mstart

启动M


// Called to start an M.
//
// This must not split the stack because we may not even have stack
// bounds set up yet.
//
// May run during STW (because it doesn't have a P yet), so write
// barriers are not allowed.
//
//go:nosplit
//go:nowritebarrierrec
func mstart() {
	_g_ := getg()

	osStack := _g_.stack.lo == 0
	if osStack {
		// Initialize stack bounds from system stack.
		// Cgo may have left stack size in stack.hi.
		// minit may update the stack bounds.
		size := _g_.stack.hi
		if size == 0 {
			size = 8192 * sys.StackGuardMultiplier
		}
		_g_.stack.hi = uintptr(noescape(unsafe.Pointer(&size)))
		_g_.stack.lo = _g_.stack.hi - size + 1024
	}
	// Initialize stack guards so that we can start calling
	// both Go and C functions with stack growth prologues.
	_g_.stackguard0 = _g_.stack.lo + _StackGuard
	_g_.stackguard1 = _g_.stackguard0
	mstart1()

	// Exit this thread.
	if GOOS == "windows" || GOOS == "solaris" || GOOS == "plan9" || GOOS == "darwin" {
		// Window, Solaris, Darwin and Plan 9 always system-allocate
		// the stack, but put it in _g_.stack before mstart,
		// so the logic above hasn't set osStack yet.
		osStack = true
	}
	mexit(osStack)
}

func mstart1() {
	_g_ := getg()

	if _g_ != _g_.m.g0 {
		throw("bad runtime·mstart")
	}

	// Record the caller for use as the top of stack in mcall and
	// for terminating the thread.
	// We're never coming back to mstart1 after we call schedule,
	// so other calls can reuse the current frame.
	save(getcallerpc(), getcallersp())
	asminit()
	minit() // 初始化新的 M

	// Install signal handlers; after minit so that minit can
	// prepare the thread to be able to handle the signals.
	if _g_.m == &m0 {
		mstartm0()
	}

	if fn := _g_.m.mstartfn; fn != nil {
		fn()
	}

	if _g_.m.helpgc != 0 {
		_g_.m.helpgc = 0
		stopm()
	} else if _g_.m != &m0 {
		acquirep(_g_.m.nextp.ptr())
		_g_.m.nextp = 0
	}
	schedule()
}