字符串(+拼接)

`+`字符串拼接

golang中使用+拼接字符串会调用concatstringX()相关函数。

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19


// a0 + a1
func concatstring2(buf *tmpBuf, a0, a1 string) string {
    return concatstrings(buf, []string{a0, a1})
}

// a0 + a1 + a2
func concatstring3(buf *tmpBuf, a0, a1, a2 string) string {
    return concatstrings(buf, []string{a0, a1, a2})
}

// a0 + a1 + a2 + a3
func concatstring4(buf *tmpBuf, a0, a1, a2, a3 string) string {
    return concatstrings(buf, []string{a0, a1, a2, a3})
}

// a0 + a1 + a2 + a3 + a4
func concatstring5(buf *tmpBuf, a0, a1, a2, a3, a4 string) string {
    return concatstrings(buf, []string{a0, a1, a2, a3, a4})
}

const

1
2
3
4
5
6


// The constant is known to the compiler.
// There is no fundamental theory behind this number.
//
// 该常量是编译器已知的
// 这个数字背后没有基本理论
const tmpStringBufSize = 32

type

1
2


// 当要拼接的字符串长度小于等于32字节大小，使用该临时缓存容器，否则重新生成一个内存空间使用
type tmpBuf [tmpStringBufSize]byte	// tmpBuf只是用于定义一个*[32]byte的缓冲

concatstrings()

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52


// concatstrings implements a Go string concatenation x+y+z+...
// The operands are passed in the slice a.
// If buf != nil, the compiler has determined that the result does not
// escape the calling function, so the string data can be stored in buf
// if small enough.
//
// concatstrings 实现了一个 Go 字符串连接 x+y+z+... 操作数在切片 a 中传递
// 如果 buf != nil，编译器已经确定结果不会转义调用函数，所以如果足够小，字符串数据可以存储在 buf 中
func concatstrings(buf *tmpBuf, a []string) string {
    idx := 0    // 记录a中最后一个不为空的下标索引值
    l := 0      // 统计a切片中所有字符串元素的字节数量，该值主要用于统计拼接总大小用于确定内存
    count := 0  // 统计a切片中有效的元素数量，不为空的字符串
    for i, x := range a {
        n := len(x) // 获取字符串长度字节
        if n == 0 {
            continue
        }
        // 该情况发生在字符串数量太长导致int类型溢出情况
        if l+n < l {	
            throw("string concatenation too long")
        }
        l += n  // 加上当前字符串的长度，记录的是所有的字节B数量
        count++ // 切片a中所有有效的的字符串，也就是不是空串的字符串数量
        idx = i // 记录最有一个有效的字符串的索引下标数
    }
    if count == 0 {
        return ""
    }

    // If there is just one string and either it is not on the stack
    // or our result does not escape the calling frame (buf != nil),
    // then we can return that string directly.
    // 
    // 如果只有一个字符串并且它不在堆栈上，或者我们的结果没有转义调用帧（buf！= nil），那么我们可以直接返回该字符串
    // !stringDataOnStack(a[idx]) 如果为true表示当前a[idx]不在当前goroutine的运行栈中，那么表示可以返回
    // 因为goroutine的栈会在g被运行完销毁，所以不适合直接返回
    // a[idx]在goroutine栈上时，继续去下面 rawstringtmp
    if count == 1 && (buf != nil || !stringDataOnStack(a[idx])) { 
        return a[idx]   // 如果count为1那么 idx存储的就是这个唯一的有效的字符串索引值
    }
    // s与b是长度为l的底层数组相互关联的，这所以这样是字符串是不可变类型我们需要通过切片处理
    s, b := rawstringtmp(buf, l)	
    // 把来自a的字符串拷贝到b中，也就是拷贝到s中
    for _, x := range a {
        // 拷贝x到b	
        // int copy([]byte, string)
        // 这一步操作是因为copy函数的拷贝机制
        copy(b, x)		
        b = b[len(x):]	
    }
    return s    // s则是拼接后的字符串
}

stringDataOnStack()

1
2
3
4
5
6
7
8
9


// stringDataOnStack reports whether the string's data is
// stored on the current goroutine's stack.
//
// stringDataOnStack 报告字符串的数据是否存储在当前 goroutine 的堆栈中
func stringDataOnStack(s string) bool {
    ptr := uintptr(stringStructOf(&s).str) // ptr获取是字符串的底层值
    stk := getg().stack	// stk是当前正在运行的goroutine的栈顶和栈底范围
    return stk.lo <= ptr && ptr < stk.hi // 判断当前字符串是否在这个范围内
}

stringStructOf()

1
2
3
4
5
6
7
8
9


func stringStructOf(sp *string) *stringStruct {
    /*
    type stringStruct struct {
        str unsafe.Pointer
        len int
    }
    */
    return (*stringStruct)(unsafe.Pointer(sp))
}

rawstringtmp()

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11


// s和b都分别指向buf，并且长度为l
func rawstringtmp(buf *tmpBuf, l int) (s string, b []byte) {
    // 当前要处理的长度l在buf的范围内
    if buf != nil && l <= len(buf) {
        b = buf[:l] // buf是数字指针，因此b是切片引用buf
        s = slicebytetostringtmp(&b[0], len(b)) // 将buf与s关联起来
    } else {
        s, b = rawstring(l) // 重新分配内存，把s和b关联起来
    }
    return
}

slicebytetostringtmp()

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32


// slicebytetostringtmp returns a "string" referring to the actual []byte bytes.
//
// Callers need to ensure that the returned string will not be used after
// the calling goroutine modifies the original slice or synchronizes with
// another goroutine.
//
// The function is only called when instrumenting
// and otherwise intrinsified by the compiler.
//
// Some internal compiler optimizations use this function.
// - Used for m[T1{... Tn{..., string(k), ...} ...}] and m[string(k)]
//   where k is []byte, T1 to Tn is a nesting of struct and array literals.
// - Used for "<"+string(b)+">" concatenation where b is []byte.
// - Used for string(b)=="foo" comparison where b is []byte.
func slicebytetostringtmp(ptr *byte, n int) (str string) {
    if raceenabled && n > 0 {
        racereadrangepc(unsafe.Pointer(ptr),
            uintptr(n),
            getcallerpc(),
            abi.FuncPCABIInternal(slicebytetostringtmp))
    }
    if msanenabled && n > 0 {
        msanread(unsafe.Pointer(ptr), uintptr(n))
    }
    if asanenabled && n > 0 {
        asanread(unsafe.Pointer(ptr), uintptr(n))
    }
    // 使str指向ptr，长度为n
    stringStructOf(&str).str = unsafe.Pointer(ptr)
    stringStructOf(&str).len = n
    return
}

rawstring()

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15


// rawstring allocates storage for a new string. The returned
// string and byte slice both refer to the same storage.
// The storage is not zeroed. Callers should use
// b to set the string contents and then drop b.
func rawstring(size int) (s string, b []byte) {
    p := mallocgc(uintptr(size), nil, false)	// 重新申请内存

    // 关联s和b
    stringStructOf(&s).str = p
    stringStructOf(&s).len = size

    *(*slice)(unsafe.Pointer(&b)) = slice{p, size, size}

    return
}

+字符串拼接#

const#

type#

concatstrings()#

stringDataOnStack()#

stringStructOf()#

rawstringtmp()#

slicebytetostringtmp()#

rawstring()#

`+`字符串拼接

const

type

concatstrings()

stringDataOnStack()

stringStructOf()

rawstringtmp()

slicebytetostringtmp()

rawstring()