一.String源码
1.Swift中String在内存中是如何存储的
这里我们定义了一个空字符串,想要通过内存信息来查看关于String的内存信息
var empty = ""
print(empty)
/*
(lldb) po withUnsafePointer(to: &empty){print($0)}
0x00000001000102c8
0 elements
(lldb) x/8g 0x00000001000102c8
0x1000102c8: 0x0000000000000000 0xe000000000000000
0x1000102d8: 0x0000000000000000 0x0000000000000000
0x1000102e8: 0x0000000000000000 0x0000000000000000
0x1000102f8: 0x0000000000000000 0x0000000000000000
(lldb)
*/
发现只有一个0xe000000000000000
信息,其它的都不清楚
因此,我们只能通过源码来分析
找到标准库里面的String.swift
@frozen
public struct String {
public // @SPI(Foundation)
var _guts: _StringGuts
@inlinable @inline(__always)
internal init(_ _guts: _StringGuts) {
self._guts = _guts
_invariantCheck()
}
// This is intentionally a static function and not an initializer, because
// an initializer would conflict with the Int-parsing initializer, when used
// as function name, e.g.
// [1, 2, 3].map(String.init)
@_alwaysEmitIntoClient
@_semantics("string.init_empty_with_capacity")
@_semantics("inline_late")
@inlinable
internal static func _createEmpty(withInitialCapacity: Int) -> String {
return String(_StringGuts(_initialCapacity: withInitialCapacity))
}
/// Creates an empty string.
///
/// Using this initializer is equivalent to initializing a string with an
/// empty string literal.
///
/// let empty = ""
/// let alsoEmpty = String()
@inlinable @inline(__always)
@_semantics("string.init_empty")
public init() { self.init(_StringGuts()) }
}
-
String
是一个结构体类型 - 里面有一个成员变量
_StringGuts
探究_StringGuts
,打开StringGuts.swift
@frozen
public // SPI(corelibs-foundation)
struct _StringGuts: UnsafeSendable {
@usableFromInline
internal var _object: _StringObject
@inlinable @inline(__always)
internal init(_ object: _StringObject) {
self._object = object
_invariantCheck()
}
// Empty string
@inlinable @inline(__always)
init() {
self.init(_StringObject(empty: ()))
}
}
-
_StringGuts
是一个结构体类型 - 在
_StringGuts
的初始化函数中又传入了_StringObject
探究_StringObject
,进入StringObject.swift
@inlinable @inline(__always)
internal init(empty:()) {
// Canonical empty pattern: small zero-length string
#if arch(i386) || arch(arm) || arch(arm64_32) || arch(wasm32)
//arm/x86执行的逻辑
self.init(
count: 0,
variant: .immortal(0),
discriminator: Nibbles.emptyString,
flags: 0)
#else
self._countAndFlagsBits = 0
self._object = Builtin.valueToBridgeObject(Nibbles.emptyString._value)
#endif
_internalInvariant(self.smallCount == 0)
_invariantCheck()
}
- 如果是
i386
或者是arm
架构执行前一个逻辑,调用自身的init函数
探究self.init(count: 0, variant: .immortal(0), discriminator: Nibbles.emptyString, flags: 0)
internal struct _StringObject{
...
enum Nibbles {}
@usableFromInline
internal var _count: Int
@usableFromInline
internal var _variant: Variant
@usableFromInline
internal var _discriminator: UInt8
@usableFromInline
internal var _flags: UInt16
@inlinable @inline(__always)
init(count: Int, variant: Variant, discriminator: UInt64, flags: UInt16) {
_internalInvariant(discriminator & 0xFF00_0000_0000_0000 == discriminator,
"only the top byte can carry the discriminator and small count")
//大小,代表当前字符串的大小
self._count = count
//枚举,来判断字符串类型
self._variant = variant
//识别器,区分当前字符串是否为ASCII码
self._discriminator = UInt8(truncatingIfNeeded: discriminator &>> 56)
//标志
self._flags = flags
self._invariantCheck()
}
...
}
-
_StringObject
也是一个结构体类型,存放的一些变量。 - 从目前来看
String
就是值类型
关于discriminator: Nibbles.emptyString
extension _StringObject.Nibbles {
// The canonical empty string is an empty small string
@inlinable @inline(__always)
internal static var emptyString: UInt64 {
//是否是ascii
return _StringObject.Nibbles.small(isASCII: true)
}
}
@inlinable @inline(__always)
internal static func small(isASCII: Bool) -> UInt64 {
return isASCII ? 0xE000_0000_0000_0000 : 0xA000_0000_0000_0000
}
// Discriminator for large, immortal, swift-native strings
// 代表原生的大的字符串
@inlinable @inline(__always)
internal static func largeImmortal() -> UInt64 {
return 0x8000_0000_0000_0000
}
- 这也就是我们刚开始时,观察
empty
字符串内存空间里有一个0xe000000000000000
- 如果我们将字符改为中文字符串,例如"我",那么此时这里就会返回
0xA000_0000_0000_0000
-
large string
会返回0x8000_0000_0000_0000
总结:识别器就是区分当前字符串存储类型`
2.小字符串
源码中small string
的注释
On 64-bit platforms, small strings have the following per-byte layout. When
stored in memory (little-endian), their first character ('a') is in the lowest
address and their top-nibble and count is in the highest address.
┌───────────────────────────────┬─────────────────────────────────────────────┐
│ _countAndFlags │ _object │
├───┬───┬───┬───┬───┬───┬───┬───┼───┬───┬────┬────┬────┬────┬────┬────────────┤
│ 0 │ 1 │ 2 │ 3 │ 4 │ 5 │ 6 │ 7 │ 8 │ 9 │ 10 │ 11 │ 12 │ 13 │ 14 │ 15 │
├───┼───┼───┼───┼───┼───┼───┼───┼───┼───┼────┼────┼────┼────┼────┼────────────┤
│ a │ b │ c │ d │ e │ f │ g │ h │ i │ j │ k │ l │ m │ n │ o │ 1x10 count │
└───┴───┴───┴───┴───┴───┴───┴───┴───┴───┴────┴────┴────┴────┴────┴────────────┘
On 32-bit platforms, we have less space to store code units, and it isn't
contiguous. However, we still use the above layout for the RawBitPattern
representation.
┌───────────────┬───────────────────┬────────┬─────────┐
│ _count │_variant .immortal │ _discr │ _flags │
├───┬───┬───┬───┼───┬───┬───┬───┬───┼────────┼────┬────┤
│ 0 │ 1 │ 2 │ 3 │ 4 │ 5 │ 6 │ 7 │ 8 │ 9 │ 10 │ 11 │
├───┼───┼───┼───┼───┴───┴───┴───┴───┼────────┼────┼────┤
│ a │ b │ c │ d │ e f g h │1x10 cnt│ i │ j │
└───┴───┴───┴───┴───────────────────┴────────┴────┴────┘
- 对于64位来说,从低位到高位存放字符串数据。最高位存放的是
count
(字符串个数) - 对于小字符串来说,并没有将该空间(16字节)占满的字符串
那么我们现在来尝试验证一下
var str = "ab"
print(str)
/*
(lldb) po withUnsafePointer(to: &str){print($0)}
0x00000001000102c8
0 elements
(lldb) x/8g 0x00000001000102c8
0x1000102c8: 0x0000000000006261 0xe200000000000000
0x1000102d8: 0x0000000000000000 0x0000000000000000
0x1000102e8: 0x0000000000000000 0x0000000000000000
0x1000102f8: 0x0000000000000000 0x0000000000000000
(lldb)
*/
- 低位存放
6261
也就是ab字符串的值。类似于NSString
的内存方式 - 高位存放的是识别器数据和字符串count
3.大字符串
var str = "abcdefghijklmnopq"
print(str)
/*
(lldb) x/8g 0x00000001000102c8
0x1000102c8: 0xd000000000000011 0x800000010000a570
0x1000102d8: 0x0000000000000000 0x0000000000000000
0x1000102e8: 0x0000000000000000 0x0000000000000000
0x1000102f8: 0x0000000000000000 0x0000000000000000
(lldb)
*/
此时的内存布局就变了,第二个8字节的高64位我们应该清楚表示的是一个large string
源码中large string
的注释
Large strings can either be "native", "shared", or "foreign".
Native strings have tail-allocated storage, which begins at an offset of
`nativeBias` from the storage object's address. String literals, which reside
in the constant section, are encoded as their start address minus `nativeBias`,
unifying code paths for both literals ("immortal native") and native strings.
Native Strings are always managed by the Swift runtime.
Shared strings do not have tail-allocated storage, but can provide access
upon query to contiguous UTF-8 code units. Lazily-bridged NSStrings capable of
providing access to contiguous ASCII/UTF-8 set the ObjC bit. Accessing shared
string's pointer should always be behind a resilience barrier, permitting
future evolution.
Foreign strings cannot provide access to contiguous UTF-8. Currently, this only
encompasses lazily-bridged NSStrings that cannot be treated as "shared". Such
strings may provide access to contiguous UTF-16, or may be discontiguous in
storage. Accessing foreign strings should remain behind a resilience barrier
for future evolution. Other foreign forms are reserved for the future.
Shared and foreign strings are always created and accessed behind a resilience
barrier, providing flexibility for the future.
┌────────────┐
│ nativeBias │
├────────────┤
│ 32 │
└────────────┘
┌───────────────┬────────────┐
│ b63:b60 │ b60:b0 │
├───────────────┼────────────┤
│ discriminator │ objectAddr │
└───────────────┴────────────┘
discriminator: See comment for _StringObject.Discriminator
objectAddr: The address of the beginning of the potentially-managed object.
TODO(Future): For Foreign strings, consider allocating a bit for whether they
can provide contiguous UTF-16 code units, which would allow us to avoid doing
the full call for non-contiguous NSString.
- 大字符串会开辟内存空间去存放字符串,并且在b0~b60中存放的是开辟的内存空间地址。
- 真正的字符串内存地址是需要将存放的
内存地址 + nativeBias(32)
那么我们去获取一下
(lldb) x/8g 0x00000001000102c8
0x1000102c8: 0xd000000000000011 0x800000010000a570
0x1000102d8: 0x0000000000000000 0x0000000000000000
0x1000102e8: 0x0000000000000000 0x0000000000000000
0x1000102f8: 0x0000000000000000 0x0000000000000000
(lldb) x/8g 0x10000a590
0x10000a590: 0x6867666564636261 0x706f6e6d6c6b6a69
0x10000a5a0: 0x00000020000a0071 0x0000000000000000
0x10000a5b0: 0x7365547466697773 0x7465677261542f74
0x10000a5c0: 0x74654d7373616c43 0x77732e6174616461
(lldb)
- 从该内存中已经能够获取到字符串信息了
那么之前第一个8字节0xd000000000000011
又是存放的什么信息呢?
首先想一个问题,之前小字符串的时候,高地址上存放的是识别器并且识别器的类型为ASCII
,此时的大字符串存放的是0x8000_0000_0000_0000
。导致了字符串类型并不清楚了。并且count存放在哪里也不清楚了(当然通过观察很明显的看出0x11就是count)。
通过源码中的注释来解释这个8字节的位域到底存放了哪些信息
// TODO(String docs): Combine this with Nibbles table, and perhaps small string
// table, into something that describes the higher-level structure of
// _StringObject.
All non-small forms share the same structure for the other half of the bits
(i.e. non-object bits) as a word containing code unit count and various
performance flags. The top 16 bits are for performance flags, which are not
semantically relevant but communicate that some operations can be done more
efficiently on this particular string, and the lower 48 are the code unit
count (aka endIndex).
┌─────────┬───────┬──────────────────┬─────────────────┬────────┬───────┐
│ b63 │ b62 │ b61 │ b60 │ b59:48 │ b47:0 │
├─────────┼───────┼──────────────────┼─────────────────┼────────┼───────┤
│ isASCII │ isNFC │ isNativelyStored │ isTailAllocated │ TBD │ count │
└─────────┴───────┴──────────────────┴─────────────────┴────────┴───────┘
isASCII: set when all code units are known to be ASCII, enabling:
- Trivial Unicode scalars, they're just the code units
- Trivial UTF-16 transcoding (just bit-extend)
- Also, isASCII always implies isNFC
isNFC: set when the contents are in normal form C
- Enables trivial lexicographical comparisons: just memcmp
- `isASCII` always implies `isNFC`, but not vice versa
isNativelyStored: set for native stored strings
- `largeAddressBits` holds an instance of `_StringStorage`.
- I.e. the start of the code units is at the stored address + `nativeBias`
isTailAllocated: contiguous UTF-8 code units starts at address + `nativeBias`
- `isNativelyStored` always implies `isTailAllocated`, but not vice versa
(e.g. literals)
- `isTailAllocated` always implies `isFastUTF8`
TBD: Reserved for future usage
- Setting a TBD bit to 1 must be semantically equivalent to 0
- I.e. it can only be used to "cache" fast-path information in the future
count: stores the number of code units, corresponds to `endIndex`.
NOTE: isNativelyStored is *specifically* allocated to b61 to align with the
bit-position of isSmall on the BridgeObject. This allows us to check for
native storage without an extra branch guarding against smallness. See
`_StringObject.hasNativeStorage` for this usage.
-
b63 -> isASCII
最高位存放的是否为ASCII
-
b62 -> isNFC
当内容为标准c时设置。如果为ASCII
总是为1 -
b61 -> isNativelyStored
是否为原生存储 -
b60 -> isTailAllocated
存储大字符串时为1 -
b59:48 -> TBD
占位,供以后使用 -
b47:0 -> count
,字符串长度
0xd000000000000011
-> 1101 ...
总结:前8个字节其实就是存储的flag + count
3.String.Index
let str = "Hello World"
/*
当前字符串不支持下标的存取方式
*/
//str[1]
//必须要使用String.Index来取字符串
print(str[str.index(str.startIndex, offsetBy: 1)]) // e
1.为什么String要这么繁琐?
聊到这个问题我们就必须要明白Swift String
代表的是什么?一系列的Characters(字符)
,字符的表示方式有很多,比如我们最熟悉的ASCII码
,ASCII码
一共规定了128个字符,对于英文字符来说128个字符已经够用了,但是相对于其它语言来说,这是远远不够的。
这就意味着不同国家的语言都需要有自己的编码格式,这个时候同一个二进制文件就有可能翻译成不同的字符,有没有一种编码能够把所有的符号都纳入其中,这就是我们熟悉的Unicode
,但是Unicode
只是规定了符号对应的二进制代码,并没有详细明确这个二进制代码应该如果存储。
这里举一个例子:假如我们有一个字符串"我是Kody",其中对于的Unicode、二进制信息分别为
我 -> 6212 -> 0110 0010 0001 0010
是 -> 662F -> 0110 0110 0010 1111
K -> 004B -> 0000 0000 0100 1011
O -> 006F -> 0000 0000 0110 1111
D -> 0064 -> 0000 0000 0110 0100
Y -> 0079 -> 0000 0000 0111 1001
此时的ASCII码
对应的Unicode
只占了1字节,如果统一使用2字节的形式存储的话,会造成很大的资源浪费
为了解决资源浪费的问题,就引入了UTF-8
编码方式。
UTF-8
最大的一个特点,就是它是一种变长的编码方式。它可以使用1~4字节表示一个符号,根据不同的符号而变化字节长度。这里简单介绍一下UTF-8
的规则:
单字节的字符,字节的第一位设为0,对于英语文本,
UTF-8码
只要占用一个字节,和ASCII码完全相同。(例如:“K”的UTF-8码
与ASCII码
一致都是0x4B
)n个字节的字符(n>1),第一个字节的前n位设为1,第n位设为0,后面字节的前两位都设为10,这n个字节的其余空位填充该字节的
unicode码
,高位用0补足
我 -> 11100110 10001000 10010010
是 -> 11100110 10011000 10101111
K -> 0100 1011
O -> 0110 1111
D -> 0110 0100
Y -> 0111 1001
比如存放“我”,“我”的Unicode码
为2个字节,UTF-8
使用3个字节存储。第一个前3位存放1,第4位存放0,其余2字节的前2位存放10。刚好规则占用了1字节,剩下的2字节刚好存放Unicode码
-
Unicode
规定了字符对应的二进制 -
UTF-8
规定了Unicode
在内存中存储方式
当我们了解了Unicode
和UTF-8
后,此时我们再来分析为什么Swift中的字符串不能通过下标(Int)来取字符?
对于Swift来说,String是一系列字符的集合,也就意味着String中的每一个元素是不等长的。那也就意味着我们在进行内存移动的时候步长是不一样的(每个字符的UTF-8
可能都不是一样长的,1~4个字节)。
比如说我要取str[1],那我是不是要把我
这个字段遍历完之后才能确定是
的偏移量,这个时候无疑增加了很多的内存消耗,也就是这个原因我们不能通过Int
作为下标去访问String
。
2.String.Index的本质到底是什么?
之前了解了因为编码的问题,导致的步长不同,因而不能使用Int
下标去访问字符串。
那么String.Index
是怎么去访问的呢?
进入StringIndex.swift
String's Index has the following layout:
┌──────────┬───────────────────╥────────────────┬──────────╥────────────────┐
│ b63:b16 │ b15:b14 ║ b13:b8 │ b7:b1 ║ b0 │
├──────────┼───────────────────╫────────────────┼──────────╫────────────────┤
│ position │ transcoded offset ║ grapheme cache │ reserved ║ scalar aligned │
└──────────┴───────────────────╨────────────────┴──────────╨────────────────┘
└──────── resilient ────────┘
Position, transcoded offset, and scalar aligned are fully exposed in the ABI.
Grapheme cache and reserved are partially resilient: the fact that there are 13
bits with a default value of `0` is ABI, but not the layout, construction, or
interpretation of those bits. All use of grapheme cache should be behind
non-inlinable function calls. Inlinable code should not set a non-zero value to
grapheme cache bits: doing so breaks back deployment as they will be interpreted
as a set cache.
- position aka `encodedOffset`: A 48-bit offset into the string's code units
- transcoded offset: a 2-bit sub-scalar offset, derived from transcoding
<resilience barrier>
- grapheme cache: A 6-bit value remembering the distance to the next grapheme
boundary.
- reserved: 7-bit for future use.
<resilience barrier>
- scalar aligned, whether this index is known to be scalar-aligned (see below)
-
position aka encodedOffset
一个48bit值,用来记录码位偏移量 -
transcoded offset
一个2bit的值,用来记录字符使用的码位数量 -
grapheme cache
一个6bit的值,用来记录下一个字符的边界 -
reserved
7bit的预留字段 -
scalar aligned
一个1bit的值,用来记录标量是否已经对齐过
比如说,访问当前的“我”,当前的encodedOffset(码位偏移量)
为0,当前的transcoded offset(码位数量)
为24(3字节)
“是”的encodedOffset
为24(3字节),transcoded offset
为24
“K”的encodedOffset
为48,transcoded offset
为8
所以当我们使用了String.Index
后,我们的字符串通过encodedOffset
和transcoded offset
去取出对应的字符串。
String.Index
的本质:
- 当我们在构建
String.Index
的时候,其实就是把encodedOffset
和transcoded offset
计算出来存入String.Index
中。 - 当我们去取字符串的时候,会根据
String.Index
的encodedOffset
和transcoded offset
信息去取出对应的字符串。
// Index
extension _StringGuts {
@usableFromInline
internal typealias Index = String.Index
//源码中的startIndex通过_encodedOffset传入0创建,本身startIndex就是在字符串开始地方
@inlinable @inline(__always)
internal var startIndex: String.Index {
return Index(_encodedOffset: 0)._scalarAligned
}
//源码中的endIndex传入count生成
@inlinable @inline(__always)
internal var endIndex: String.Index {
return Index(_encodedOffset: self.count)._scalarAligned
}
}
因此我们该怎么去取字符串的最后一个元素呢?
let str = "Hello World"
//数组越界
//print(str[str.endIndex]) //Swift/StringRangeReplaceableCollection.swift:302: Fatal error: String index is out of bounds
print(str[str.index(str.endIndex, offsetBy: -1)]) // d
那是因为源码中的endIndex
并没有取count -1
而是直接存入的count
导致的
二.Array源码
1.Array的数据结构
var arr = [1, 2, 3, 4, 5]
对应的SIL代码
// main
sil @main : $@convention(c) (Int32, UnsafeMutablePointer<Optional<UnsafeMutablePointer<Int8>>>) -> Int32 {
bb0(%0 : $Int32, %1 : $UnsafeMutablePointer<Optional<UnsafeMutablePointer<Int8>>>):
alloc_global @$s4main3arrSaySiGvp // id: %2
%3 = global_addr @$s4main3arrSaySiGvp : $*Array<Int> // user: %35
%4 = integer_literal $Builtin.Word, 5 // user: %6
// function_ref _allocateUninitializedArray<A>(_:)
//创建一个未初始化的Array的函数
%5 = function_ref @$ss27_allocateUninitializedArrayySayxG_BptBwlF : $@convention(thin) <τ_0_0> (Builtin.Word) -> (@owned Array<τ_0_0>, Builtin.RawPointer) // user: %6
//函数传入%4,也就是数组的大小。返回了一个元祖%6
%6 = apply %5<Int>(%4) : $@convention(thin) <τ_0_0> (Builtin.Word) -> (@owned Array<τ_0_0>, Builtin.RawPointer) // users: %8, %7
%7 = tuple_extract %6 : $(Array<Int>, Builtin.RawPointer), 0 // user: %34
//获取元素开始的首地址
%8 = tuple_extract %6 : $(Array<Int>, Builtin.RawPointer), 1 // user: %9
%9 = pointer_to_address %8 : $Builtin.RawPointer to [strict] $*Int // users: %12, %29, %24, %19, %14
//整型1
%10 = integer_literal $Builtin.Int64, 1 // user: %11
//将1构建成结构体{1}
%11 = struct $Int (%10 : $Builtin.Int64) // user: %12
//将%11整型结构体{1}存入%9
store %11 to %9 : $*Int // id: %12
%13 = integer_literal $Builtin.Word, 1 // user: %14
//%14就是%9偏移%13(1个字节)
%14 = index_addr %9 : $*Int, %13 : $Builtin.Word // user: %17
%15 = integer_literal $Builtin.Int64, 2 // user: %16
%16 = struct $Int (%15 : $Builtin.Int64) // user: %17
//将%16整型结构体{2}存入%14
store %16 to %14 : $*Int // id: %17
%18 = integer_literal $Builtin.Word, 2 // user: %19
//%19就是%9偏移%18(2个字节)
%19 = index_addr %9 : $*Int, %18 : $Builtin.Word // user: %22
%20 = integer_literal $Builtin.Int64, 3 // user: %21
%21 = struct $Int (%20 : $Builtin.Int64) // user: %22
//将%21整型结构体{3}存入%19
store %21 to %19 : $*Int // id: %22
/*
下面的逻辑就和上面一样依次往数组存入数据
*/
...
return %37 : $Int32 // id: %38
} // end sil function 'main'
源码查找关于allocateUninitializedArray
,在ArrayShared.swift
中
/// Returns an Array of `_count` uninitialized elements using the
/// given `storage`, and a pointer to uninitialized memory for the
/// first element.
///
/// This function is referenced by the compiler to allocate array literals.
///
/// - Precondition: `storage` is `_ContiguousArrayStorage`.
@inlinable // FIXME(inline-always)
@inline(__always)
@_semantics("array.uninitialized_intrinsic")
public // COMPILER_INTRINSIC
func _allocateUninitializedArray<Element>(_ builtinCount: Builtin.Word)
-> (Array<Element>, Builtin.RawPointer) {
let count = Int(builtinCount)
//如果count>0,创建内存空间
if count > 0 {
// Doing the actual buffer allocation outside of the array.uninitialized
// semantics function enables stack propagation of the buffer.
//allocWithTailElems_1最终会调用allocObject在堆区分配内存空间,来去存储数组的元素
let bufferObject = Builtin.allocWithTailElems_1(
_ContiguousArrayStorage<Element>.self, builtinCount, Element.self)
//创建Array,返回array和第一个元素的首地址
let (array, ptr) = Array<Element>._adoptStorage(bufferObject, count: count)
return (array, ptr._rawValue)
}
// For an empty array no buffer allocation is needed.
//count=0,创建空类型的数组
let (array, ptr) = Array<Element>._allocateUninitialized(count)
return (array, ptr._rawValue)
}
Array.swift
中的_adoptStorage
/// Returns an Array of `count` uninitialized elements using the
/// given `storage`, and a pointer to uninitialized memory for the
/// first element.
///
/// - Precondition: `storage is _ContiguousArrayStorage`.
@inlinable
@_semantics("array.uninitialized")
internal static func _adoptStorage(
_ storage: __owned _ContiguousArrayStorage<Element>, count: Int
) -> (Array, UnsafeMutablePointer<Element>) {
let innerBuffer = _ContiguousArrayBuffer<Element>(
count: count,
storage: storage)
//返回了array和array第一个元素的首地址
return (
Array(
_buffer: _Buffer(_buffer: innerBuffer, shiftedToStartIndex: 0)),
innerBuffer.firstElementAddress)
}
-
_adoptStorage
返回了array和第一个元素的首地址
此时为什么要返回第一个元素的首地址呢?
那肯定是数组还存放有其它信息,因此需要把第一个元素的地址返回回去,完成其它操作(比如说赋值)。
通过源码总结的Array数据结构
-
count
存放的是数组元素的个数 -
_capacityAndFlags
容量和标志位
通过LLDB验证
//Array:Struct{class}
//表现得像值类类型,其实是一个引用类型。值存放在堆区
var arr = [1, 2, 3, 4, 5]
print("end")
/*
(lldb) po withUnsafePointer(to: &arr) {print($0)}
0x00000001000102b0
0 elements
(lldb) x/8g 0x00000001000102b0
0x1000102b0: 0x0000000100720c10 0x0000000000000000
0x1000102c0: 0x0000000000000000 0x0000000000000000
0x1000102d0: 0x0000000000000000 0x0000000000000000
0x1000102e0: 0x0000000000000000 0x0000000000000000
(lldb) x/8g 0x0000000100720c10
0x100720c10: 0x00007ff84333e568 0x0000000000000003
0x100720c20: 0x0000000000000005 0x000000000000000a
0x100720c30: 0x0000000000000001 0x0000000000000002
0x100720c40: 0x0000000000000003 0x0000000000000004
(lldb) cat 0x00007ff84333e568
invalid command 'cat 0x00007ff84333e568'.
(lldb) cat address 0x00007ff84333e568
address:0x00007ff84333e568, e368full type metadata for Swift._ContiguousArrayStorage<Swift.Int> <+16> , ($ss23_ContiguousArrayStorageCySiGMf), External: NO libswiftCore.dylib.__DATA.__data +e368
(lldb)
*/
关于truncatingIfNeeded
//truncatingIfNeeded:大数转小数时会截掉多余的二进制位
let x: Int16 = 0b0000_0011_0101_1100
let x1 = Int8(truncatingIfNeeded: x) // 0b0101_1100
/*
truncatingIfNeeded:小数转大数
正数:用0占位
负数:用1占位。因为负数在计算机存的是补码(因此用1占位)
*/
2.Array扩容
var arr = [1, 2, 3, 4, 5]
arr.append(6)
我们在创建数组的时候,_storage(ContiguousStorage)
中的count
就已经固定为5了。那么我们在执行append()
的时候,数组就需要扩容了。
探究Array扩容
,进入Array.swift
找到append
@inlinable
@_semantics("array.append_element")
public mutating func append(_ newElement: __owned Element) {
// Separating uniqueness check and capacity check allows hoisting the
// uniqueness check out of a loop.
//检查当前_buffer是否存在其它引用
//如果存在其它引用的话,保证Array的值类型特性。创建新的buffer来存储老的数据,同时新buffer的count+1
_makeUniqueAndReserveCapacityIfNotUnique()
let oldCount = _buffer.mutableCount
_reserveCapacityAssumingUniqueBuffer(oldCount: oldCount)
_appendElementAssumeUniqueAndCapacity(oldCount, newElement: newElement)
_endMutation()
}
@inlinable
@_semantics("array.make_mutable")
internal mutating func _makeUniqueAndReserveCapacityIfNotUnique() {
//是否开辟的这块buffer是唯一引用,类似于写时复制。
//如果不是唯一引用,那就开辟新的内存空间来存放新的数据
if _slowPath(!_buffer.beginCOWMutation()) {
//此时不是唯一引用,创建新的内存空间来存储oldBuffer
//bufferIsUnique -> 缓存区是否唯一
//minimumCapacity -> 最小容量count + 1
//growForAppend -> 是否添加时需要扩容
_createNewBuffer(bufferIsUnique: false,
minimumCapacity: count + 1,
growForAppend: true)
}
}
/// Creates a new buffer, replacing the current buffer.
///
/// If `bufferIsUnique` is true, the buffer is assumed to be uniquely
/// referenced by this array and the elements are moved - instead of copied -
/// to the new buffer.
/// The `minimumCapacity` is the lower bound for the new capacity.
/// If `growForAppend` is true, the new capacity is calculated using
/// `_growArrayCapacity`, but at least kept at `minimumCapacity`.
@_alwaysEmitIntoClient
internal mutating func _createNewBuffer(
bufferIsUnique: Bool, minimumCapacity: Int, growForAppend: Bool
) {
_internalInvariant(!bufferIsUnique || _buffer.isUniquelyReferenced())
//扩容
_buffer = _buffer._consumeAndCreateNew(bufferIsUnique: bufferIsUnique,
minimumCapacity: minimumCapacity,
growForAppend: growForAppend)
}
/*
ArrayBuffer.swift中的beginCOWMutation
*/
/// Returns `true` and puts the buffer in a mutable state iff the buffer's
/// storage is uniquely-referenced.
///
/// - Precondition: The buffer must be immutable.
///
/// - Warning: It's a requirement to call `beginCOWMutation` before the buffer
/// is mutated.
@_alwaysEmitIntoClient
internal mutating func beginCOWMutation() -> Bool {
let isUnique: Bool
if !_isClassOrObjCExistential(Element.self) {
isUnique = _storage.beginCOWMutationUnflaggedNative()
} else if !_storage.beginCOWMutationNative() {
return false
} else {
isUnique = _isNative
}
#if INTERNAL_CHECKS_ENABLED
if isUnique {
_native.isImmutable = false
}
#endif
return isUnique
}
上面我们探究了_makeUniqueAndReserveCapacityIfNotUnique()
,因为Array
表现为值类型,为了不影响其它数据,也会采用写时复制
的策略来判断是否需要创建一个新的buffer
来存数据。
接下来继续分析append
中的_reserveCapacityAssumingUniqueBuffer(oldCount: oldCount)
@inlinable
@_semantics("array.mutate_unknown")
internal mutating func _reserveCapacityAssumingUniqueBuffer(oldCount: Int) {
// Due to make_mutable hoisting the situation can arise where we hoist
// _makeMutableAndUnique out of loop and use it to replace
// _makeUniqueAndReserveCapacityIfNotUnique that precedes this call. If the
// array was empty _makeMutableAndUnique does not replace the empty array
// buffer by a unique buffer (it just replaces it by the empty array
// singleton).
// This specific case is okay because we will make the buffer unique in this
// function because we request a capacity > 0 and therefore _copyToNewBuffer
// will be called creating a new buffer.
let capacity = _buffer.mutableCapacity
_internalInvariant(capacity == 0 || _buffer.isMutableAndUniquelyReferenced())
//如果添加元素后的count大于容量,此时就需要创建新的内存空间来存数据,也就是扩容
if _slowPath(oldCount + 1 > capacity) {
_createNewBuffer(bufferIsUnique: capacity > 0,
minimumCapacity: oldCount + 1,
growForAppend: true)
}
}
- 如果oldCount + 1大于容量,此时的数组就需要扩容,也就是执行
_createNewBuffer
通过_createNewBuffer
研究扩容原理,也就是_buffer._consumeAndCreateNew函数
/// Creates and returns a new uniquely referenced buffer which is a copy of
/// this buffer.
///
/// This buffer is consumed, i.e. it's released.
@_alwaysEmitIntoClient
@inline(never)
@_semantics("optimize.sil.specialize.owned2guarantee.never")
internal __consuming func _consumeAndCreateNew() -> _ArrayBuffer {
return _consumeAndCreateNew(bufferIsUnique: false,
minimumCapacity: count,
growForAppend: false)
}
/// Creates and returns a new uniquely referenced buffer which is a copy of
/// this buffer.
///
/// If `bufferIsUnique` is true, the buffer is assumed to be uniquely
/// referenced and the elements are moved - instead of copied - to the new
/// buffer.
/// The `minimumCapacity` is the lower bound for the new capacity.
/// If `growForAppend` is true, the new capacity is calculated using
/// `_growArrayCapacity`, but at least kept at `minimumCapacity`.
///
/// This buffer is consumed, i.e. it's released.
@_alwaysEmitIntoClient
@inline(never)
@_semantics("optimize.sil.specialize.owned2guarantee.never")
internal __consuming func _consumeAndCreateNew(
bufferIsUnique: Bool, minimumCapacity: Int, growForAppend: Bool
) -> _ArrayBuffer {
//通过_growArrayCapacity,获取新的容量
let newCapacity = _growArrayCapacity(oldCapacity: capacity,
minimumCapacity: minimumCapacity,
growForAppend: growForAppend)
let c = count
_internalInvariant(newCapacity >= c)
let newBuffer = _ContiguousArrayBuffer<Element>(
_uninitializedCount: c, minimumCapacity: newCapacity)
if bufferIsUnique {
// As an optimization, if the original buffer is unique, we can just move
// the elements instead of copying.
let dest = newBuffer.firstElementAddress
dest.moveInitialize(from: mutableFirstElementAddress,
count: c)
_native.mutableCount = 0
} else {
_copyContents(
subRange: 0..<c,
initializing: newBuffer.mutableFirstElementAddress)
}
return _ArrayBuffer(_buffer: newBuffer, shiftedToStartIndex: 0)
}
关于_growArrayCapacity
@inlinable
internal func _growArrayCapacity(_ capacity: Int) -> Int {
//扩容方式为2倍
return capacity * 2
}
@_alwaysEmitIntoClient
internal func _growArrayCapacity(
oldCapacity: Int, minimumCapacity: Int, growForAppend: Bool
) -> Int {
if growForAppend {
if oldCapacity < minimumCapacity {
// When appending to an array, grow exponentially.
return Swift.max(minimumCapacity, _growArrayCapacity(oldCapacity))
}
return oldCapacity
}
// If not for append, just use the specified capacity, ignoring oldCapacity.
// This means that we "shrink" the buffer in case minimumCapacity is less
// than oldCapacity.
return minimumCapacity
}
数组的扩容方式:2倍扩容
比如说var arr = [1, 2, 3, 4, 5]
,这里的初始容量就为count * 2 = 10
。当count为11(数据量超过10)的时候,此时就需要扩容。新的数组容量为count * 2 = 22
。然后依次类推
至于,数组在添加元素的时候采用的写时复制
原理,有兴趣的也可以通过代码及LLDB去验证一下。
三.Moya源码
直接借用Moya
官网上的一张图,我们日常都会和网络打交道不管是使用AFN
还是Alamofire
,其实都是封装了URLSession
,不同让我们使用官方繁琐的API。
久而久之我们就会发现项目中散落着和AFN
、Alamofire
相关代码,不便于统一管理,而且很多代码内容都是重复的,于是我们就会新建一个中间层Network layer
来统一管理我们的AFN
、Alamofire
。
我们仅仅希望我们的App只和我们的Network layer
打交道,不同关心底层使用的是哪个三方网络库,即使进行迁移,也应该对我们的上层业务逻辑毫无变化,因此我们都是通过Network layer
来耦合业务逻辑的。
但是因为抽象的颗粒度不够,我们往往写着写着就会出现越过Network layer
,直接和我们的三方网络库打交道,这样就违背了我们的设计原则。
Moya
就是对网络业务逻辑的抽象,我们只需遵循相关协议,就可以发起网络请求,不用关心底层细节
1.Moya的模块
-
Requst模块
请求的模块 -
Provider模块
发起请求的模块 -
Response模块
请求响应的模块
2.Moya流程图
TargetType
/// The protocol used to define the specifications necessary for a `MoyaProvider`.
public protocol TargetType {
/// The target's base `URL`.
var baseURL: URL { get }
/// The path to be appended to `baseURL` to form the full `URL`.
var path: String { get }
/// The HTTP method used in the request.
var method: Moya.Method { get }
/// Provides stub data for use in testing. Default is `Data()`.
var sampleData: Data { get }
/// The type of HTTP task to be performed.
var task: Task { get }
/// The type of validation to perform on the request. Default is `.none`.
var validationType: ValidationType { get }
/// The headers to be used in the request.
var headers: [String: String]? { get }
}
public extension TargetType {
/// The type of validation to perform on the request. Default is `.none`.
var validationType: ValidationType { .none }
/// Provides stub data for use in testing. Default is `Data()`.
var sampleData: Data { Data() }
}
第一步创建一个遵守TargetType
协议的枚举,这个过程中我们完成网络请求的基本配置
EndPoint
/// Class for reifying a target of the `Target` enum unto a concrete `Endpoint`.
/// - Note: As of Moya 11.0.0 Endpoint is no longer generic.
/// Existing code should work as is after removing the generic.
/// See #1529 and #1524 for the discussion.
open class Endpoint {
public typealias SampleResponseClosure = () -> EndpointSampleResponse
/// A string representation of the URL for the request.
public let url: String
/// A closure responsible for returning an `EndpointSampleResponse`.
public let sampleResponseClosure: SampleResponseClosure
/// The HTTP method for the request.
public let method: Moya.Method
/// The `Task` for the request.
public let task: Task
/// The HTTP header fields for the request.
public let httpHeaderFields: [String: String]?
public init(url: String,
sampleResponseClosure: @escaping SampleResponseClosure,
method: Moya.Method,
task: Task,
httpHeaderFields: [String: String]?) {
self.url = url
self.sampleResponseClosure = sampleResponseClosure
self.method = method
self.task = task
self.httpHeaderFields = httpHeaderFields
}
...
}
public typealias EndpointClosure = (Target) -> Endpoint
@escaping EndpointClosure = MoyaProvider.defaultEndpointMapping
//将Target转化为EndPoint
final class func defaultEndpointMapping(for target: Target) -> Endpoint {
Endpoint(
url: URL(target: target).absoluteString,
sampleResponseClosure: { .networkResponse(200, target.sampleData) },
method: target.method,
task: target.task,
httpHeaderFields: target.headers
)
}
第二步通过endpointClosure
生成一个EndPoint
。实际上EndPoint
就是TargetType
的再一次包装
RequstClosure
public typealias RequestResultClosure = (Result<URLRequest, MoyaError>) -> Void
//通过Endpoint构建RequstClosure
//也就是在判断URLRequest是否构建成功
final class func defaultRequestMapping(for endpoint: Endpoint, closure: RequestResultClosure) {
do {
let urlRequest = try endpoint.urlRequest()
closure(.success(urlRequest))
} catch MoyaError.requestMapping(let url) {
closure(.failure(MoyaError.requestMapping(url)))
} catch MoyaError.parameterEncoding(let error) {
closure(.failure(MoyaError.parameterEncoding(error)))
} catch {
closure(.failure(MoyaError.underlying(error, nil)))
}
}
根据Endpoint
构建URLRequest
,如果发生错误,返回错误的数据MoyaError
Provider
/// Designated request-making method. Returns a `Cancellable` token to cancel the request later.
@discardableResult
open func request(_ target: Target,
callbackQueue: DispatchQueue? = .none,
progress: ProgressBlock? = .none,
completion: @escaping Completion) -> Cancellable {
let callbackQueue = callbackQueue ?? self.callbackQueue
return requestNormal(target, callbackQueue: callbackQueue, progress: progress, completion: completion)
}
func requestNormal(_ target: Target, callbackQueue: DispatchQueue?, progress: Moya.ProgressBlock?, completion: @escaping Moya.Completion) -> Cancellable {
let endpoint = self.endpoint(target)
let stubBehavior = self.stubClosure(target)
//取消标识
let cancellableToken = CancellableWrapper()
// Allow plugins to modify response
let pluginsWithCompletion: Moya.Completion = { result in
let processedResult = self.plugins.reduce(result) { $1.process($0, target: target) }
completion(processedResult)
}
if trackInflights {
var inflightCompletionBlocks = self.inflightRequests[endpoint]
inflightCompletionBlocks?.append(pluginsWithCompletion)
self.internalInflightRequests[endpoint] = inflightCompletionBlocks
if inflightCompletionBlocks != nil {
return cancellableToken
} else {
self.internalInflightRequests[endpoint] = [pluginsWithCompletion]
}
}
let performNetworking = { (requestResult: Result<URLRequest, MoyaError>) in
//如果已经取消,直接调用取消的回调。return
if cancellableToken.isCancelled {
self.cancelCompletion(pluginsWithCompletion, target: target)
return
}
var request: URLRequest!
switch requestResult {
case .success(let urlRequest):
request = urlRequest
case .failure(let error):
pluginsWithCompletion(.failure(error))
return
}
//网络响应回调 -> Response
//public typealias Completion = (_ result: Result<Moya.Response, MoyaError>) -> Void
let networkCompletion: Moya.Completion = { result in
if self.trackInflights {
self.inflightRequests[endpoint]?.forEach { $0(result) }
self.internalInflightRequests.removeValue(forKey: endpoint)
} else {
pluginsWithCompletion(result)
}
}
cancellableToken.innerCancellable = self.performRequest(target, request: request, callbackQueue: callbackQueue, progress: progress, completion: networkCompletion, endpoint: endpoint, stubBehavior: stubBehavior)
}
//执行闭包
requestClosure(endpoint, performNetworking)
//返回取消标识,可以使用该标识取消任务
return cancellableToken
}
通过之前创建的URLRequest
发起网络请求request
。回调Response
信息,完成网络请求
四.高阶函数
高阶函数的本质也是函数
- 接受函数或者是闭包作为参数
- 返回值是一个函数或者是闭包
1.map
map函数
作用于Collection
中的每一个元素,然后返回一个新的Collection
let arr = ["APPLE", "PEAR", "BANANA"]
//需求将每一个元素的大写转化为小写
print(arr.map({$0.lowercased()})) // ["apple", "pear", "banana"]
2.flatMap
与Map
的区别在于会将Sequence
中的元素压平
。也就是如果是一个多维数组,会返回一个压平
后的Sequence
,也就是一个一维数据
let numArr = [[1, 2, 3, 4], [5, 6]]
print(numArr.map({$0})) // [[1, 2, 3, 4], [5, 6]]
print(numArr.flatMap({$0})) // [1, 2, 3, 4, 5, 6]
3.compactMap
当转化闭包返回可选值并且你期望得到的结果为非可选值的序列时,使用compactMap
let numArr = [1, 2, nil, 3, 4, 5, 6, nil]
print(numArr.compactMap({$0})) // [1, 2, 3, 4, 5, 6]
4.filter
filter
就是Sequence
中默认提供的方法,允许调用者传入一个闭包来过滤集合中的元素
let numArr = [1, 2, 3, 4, 5, 6]
print(numArr.filter({$0 < 3})) // [1, 2]
5.reduce
@inlinable public func reduce<Result>(_ initialResult: Result, _ nextPartialResult: (Result, Element) throws -> Result) rethrows -> Result
传入一个Result
参数和闭包表达式
返回Result
闭包表达式
返回值的类型与Result
一致
let numArr = [1, 2, 3, 4, 5, 6]
//初始值10加上集合元素累加值
let result = numArr.reduce(10, +) // 31
进入源码SequenceAlgorithms.swift
找到reduce
@inlinable
public func reduce<Result>(
_ initialResult: Result,
_ nextPartialResult:
(_ partialResult: Result, Element) throws -> Result
) rethrows -> Result {
//创建累加器
var accumulator = initialResult
//便利当前的序列Sequence
for element in self {
//执行传入的闭包表达式,返回的Result
//传入累加器和element
accumulator = try nextPartialResult(accumulator, element)
}
//返回累加器
return accumulator
}
使用reduce实现map
//1.使用reduce实现map
extension Sequence {
func customMap(_ transform: (Element) -> Element) -> [Element] {
// return reduce([Element]()) {
// var arr = $0
// arr.append(transform($1))
// return arr
// }
//这里也可以使用传地址的方式
//into -> 将地址传入进去
return reduce(into: [Element]()) {
$0.append(transform($1))
}
}
}
let arr = ["APPLE", "PEAR", "BANANA"]
print(arr.customMap({$0.lowercased()})) // ["apple", "pear", "banana"]
使用reduce找到最大值
//通过reduce找出数组中的最大值
extension Collection {
func findMaxValue() -> Element where Element: Comparable{
return reduce(into: self[startIndex]) {
$0 = $0 > $1 ? $0 : $1
}
}
}
let numArr = [1, 2, 3, 4, 5, 6]
print(numArr.findMaxValue()) // 6
reduce实现逆序
//通过reduce实现逆序
extension Sequence {
func customReverse() -> [Element] {
return reduce(into: [Element]()) {
$0 = [$1] + $0
//向数组index = 0的地方插入$1也是可以的
//$0.insert($1, at: 0)
}
}
}
let numArr = [1, 2, 3, 4, 5, 6]
print(numArr.customReverse()) // [6, 5, 4, 3, 2, 1]
使用reduce求出数组中奇数的和、以及偶数的乘积
//使用reduce求出数组中奇数的和、以及偶数的乘积
let result = numArr.reduce(into: (0,1)) {
if $1 % 2 == 0 {
$0.1 *= $1
}else {
$0.0 += $1
}
}
print(result) //(9, 48)
使用reduce求一个数组中偶数的平方和
//使用reduce求一个数组中偶数的平方和
let result = numArr.reduce(into: 0) { $0 += $1 % 2 == 0 ? $1 * $1 : 0 }
print(result) // 56