1. LinkedList数据结构
链表是基础数据结构,本质上是用离散的内存,用指针将数据串在一起。一个链表数据结构,从数据结构上,有LinkedList和Node两个抽象数据对象,操作一般有push_front、push_back、is_empty、len等。
写rust之前,先写一个python的LInkedList作为对比
class Node:
def __init__(self, data=None):
self.data = data
self.next = None
class LinkedList:
def __init__(self):
self.head = None
def insert_at_beginning(self, data):
new_node = Node(data)
new_node.next = self.head
self.head = new_node
def insert_at_end(self, data):
if not self.head:
self.head = Node(data)
return
cur = self.head
while cur.next:
cur = cur.next
cur.next = Node(data)
def delete_node(self, key):
cur = self.head
if cur and cur.data == key:
self.head = cur.next
cur = None
return
prev = None
while cur and cur.data != key:
prev = cur
cur = cur.next
if cur is None:
return
prev.next = cur.next
cur = None
def search(self, key):
cur = self.head
while cur:
if cur.data == key:
return True
cur = cur.next
return False
def display(self):
cur = self.head
while cur:
print(cur.data, end=' ')
cur = cur.next
print()
# 测试代码
linked_list = LinkedList()
linked_list.insert_at_beginning(1)
linked_list.insert_at_beginning(2)
linked_list.insert_at_end(3)
linked_list.display() # 输出:2 1 3
linked_list.delete_node(1)
linked_list.display() # 输出:2 3
print(linked_list.search(3)) # 输出:True
print(linked_list.search(4)) # 输出:False
这里一般使用abstract api思想,来封装LinkedList数据结构,python写是比较直观简单的。
2. rust写LinkedList
2.1 简化版LinkedList
如下是个rust源代码中扒拉出的简单版本的LinkedList
use core::ptr::NonNull;
use core::marker::PhantomData;
use core::mem;
use core::fmt;
struct MyNode<T> {
next: Option<NonNull<MyNode<T>>>,
prev: Option<NonNull<MyNode<T>>>,
element: T,
}
pub struct MyLinkedList<T> {
head: Option<NonNull<MyNode<T>>>,
tail: Option<NonNull<MyNode<T>>>,
len: usize,
marker: PhantomData<Box<MyNode<T>>>,
}
pub struct MyIter<'a, T: 'a> {
head: Option<NonNull<MyNode<T>>>,
tail: Option<NonNull<MyNode<T>>>,
len: usize,
marker: PhantomData<&'a MyNode<T>>,
}
impl<'a, T> Iterator for MyIter<'a, T> {
type Item = &'a T;
#[inline]
fn next(&mut self) -> Option<&'a T> {
if self.len == 0 {
None
} else {
self.head.map(|node| unsafe {
// Need an unbound lifetime to get 'a
let node = &*node.as_ptr();
self.len -= 1;
self.head = node.next;
&node.element
})
}
}
}
impl<T> Clone for MyIter<'_, T> {
fn clone(&self) -> Self {
MyIter { ..*self }
}
}
impl<T> MyNode<T> {
fn new(element: T) -> Self {
MyNode { next: None, prev: None, element }
}
fn into_element(self: Box<Self>) -> T {
self.element
}
}
impl<T: fmt::Debug> fmt::Debug for MyLinkedList<T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_list().entries(self).finish()
}
}
pub struct IntoIter<T> {
list: MyLinkedList<T>,
}
impl<T: fmt::Debug> fmt::Debug for IntoIter<T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_tuple("IntoIter").field(&self.list).finish()
}
}
impl<T> IntoIterator for MyLinkedList<T> {
type Item = T;
type IntoIter = IntoIter<T>;
/// Consumes the list into an iterator yielding elements by value.
#[inline]
fn into_iter(self) -> IntoIter<T> {
IntoIter { list: self }
}
}
impl<'a, T> IntoIterator for &'a MyLinkedList<T> {
type Item = &'a T;
type IntoIter = MyIter<'a, T>;
fn into_iter(self) -> MyIter<'a, T> {
self.iter()
}
}
impl<T> Iterator for IntoIter<T> {
type Item = T;
#[inline]
fn next(&mut self) -> Option<T> {
self.list.pop_front()
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
(self.list.len, Some(self.list.len))
}
}
impl<T> Default for MyLinkedList<T> {
/// Creates an empty `LinkedList<T>`.
#[inline]
fn default() -> Self {
Self::new()
}
}
impl<T> MyLinkedList<T> {
#[inline]
#[must_use]
pub const fn new() -> Self {
MyLinkedList { head: None, tail: None, len: 0, marker: PhantomData }
}
#[inline]
fn pop_front_node(&mut self) -> Option<Box<MyNode<T>>> {
// This method takes care not to create mutable references to whole nodes,
// to maintain validity of aliasing pointers into `element`.
self.head.map(|node| unsafe {
let node = Box::from_raw(node.as_ptr());
self.head = node.next;
match self.head {
None => self.tail = None,
// Not creating new mutable (unique!) references overlapping `element`.
Some(head) => (*head.as_ptr()).prev = None,
}
self.len -= 1;
node
})
}
/// Adds the given node to the back of the list.
#[inline]
fn push_back_node(&mut self, mut node: Box<MyNode<T>>) {
// This method takes care not to create mutable references to whole nodes,
// to maintain validity of aliasing pointers into `element`.
unsafe {
node.next = None;
node.prev = self.tail;
let node = Some(Box::leak(node).into());
match self.tail {
None => self.head = node,
// Not creating new mutable (unique!) references overlapping `element`.
Some(tail) => (*tail.as_ptr()).next = node,
}
self.tail = node;
self.len += 1;
}
}
pub fn pop_front(&mut self) -> Option<T> {
self.pop_front_node().map(MyNode::into_element)
}
#[inline]
pub fn iter(&self) -> MyIter<'_, T> {
MyIter { head: self.head, tail: self.tail, len: self.len, marker: PhantomData }
}
#[inline]
#[must_use]
pub fn is_empty(&self) -> bool {
self.head.is_none()
}
#[inline]
#[must_use]
pub fn len(&self) -> usize {
self.len
}
#[inline]
pub fn clear(&mut self) {
*self = Self::new();
}
pub fn contains(&self, x: &T) -> bool
where
T: PartialEq<T>,
{
self.iter().any(|e| e == x)
}
#[inline]
#[must_use]
pub fn front(&self) -> Option<&T> {
unsafe { self.head.as_ref().map(|node| &node.as_ref().element) }
}
#[inline]
#[must_use]
pub fn back(&self) -> Option<&T> {
unsafe { self.tail.as_ref().map(|node| &node.as_ref().element) }
}
pub fn push_back(&mut self, elt: T) {
self.push_back_node(Box::new(MyNode::new(elt)));
}
}
impl<T> Drop for MyLinkedList<T> {
fn drop(&mut self) {
struct DropGuard<'a, T>(&'a mut MyLinkedList<T>);
impl<'a, T> Drop for DropGuard<'a, T> {
fn drop(&mut self) {
// Continue the same loop we do below. This only runs when a destructor has
// panicked. If another one panics this will abort.
while self.0.pop_front_node().is_some() {}
}
}
while let Some(node) = self.pop_front_node() {
let guard = DropGuard(self);
drop(node);
mem::forget(guard);
}
}
}
fn main() {
let mut mylist = MyLinkedList::new();
mylist.push_back(1);
mylist.push_back(2);
mylist.push_back(3);
for element in &mylist {
println!("{}", element);
}
for value in mylist.iter() {
println!("{}", value); // 输出LinkedList中的每个元素
}
println!("{}", mylist.contains(&2));
for value in mylist.iter() {
println!("{}", value); // 输出LinkedList中的每个元素
}
}
发现比手写版的python LinkedList复杂很多。
2.2 数据结构定义
rust将abstruct api中的过程对象都具化了,抽象出2个数据结构
- MyLinkedList,保存头结点、尾节点、长度,另外增加了一个优化点,使用marker保存当前节点。
- MyNode,保存前向节点,后向节点,以及当前节点的值
struct MyNode<T> {
next: Option<NonNull<MyNode<T>>>,
prev: Option<NonNull<MyNode<T>>>,
element: T,
}
pub struct MyLinkedList<T> {
head: Option<NonNull<MyNode<T>>>,
tail: Option<NonNull<MyNode<T>>>,
len: usize,
marker: PhantomData<Box<MyNode<T>>>,
}
通过定义,可以看到:
-
Option<NonNull<MyNode<T>>>表示一个指针引用,虽然复杂,但是语义明确且安全
a> NonNull语义,表示如果有值,不可能为None;
b> Option表示指向None或者Some - 写链表指针相关的代码,记住Option<NonNull<T>>这个语义表达。
2.3 Iterator、IntoIterator
和python一样,如果要用迭代器访问,则需要实现Iterator这个trait
trait Iterator {
type Item;
fn next(&mut self) -> Option<Self::Item>;
…… // 很多默认方法
}
pub struct MyIter<'a, T: 'a> {
head: Option<NonNull<MyNode<T>>>,
tail: Option<NonNull<MyNode<T>>>,
len: usize,
marker: PhantomData<&'a MyNode<T>>,
}
impl<'a, T> Iterator for MyIter<'a, T> {
type Item = &'a T;
#[inline]
fn next(&mut self) -> Option<&'a T> {
if self.len == 0 {
None
} else {
self.head.map(|node| unsafe {
// Need an unbound lifetime to get 'a
let node = &*node.as_ptr();
self.len -= 1;
self.head = node.next;
&node.element
})
}
}
}
如上,定义MyIter,实现MyIter的Iterator,并且在LinkedList中要实现一个iter()方法,返回MyIter。
由于rust的Move语义,我们还要定义一个MyIterMut,实现MyIterMut的Iterator,同时在LinkedList中增加一个iter_mut()方法(代码中未实现)。
为了实现 for x in list这种语义,我们需要实现IntoIterator这种trait
trait IntoIterator where Self::IntoIter: Iterator<Item=Self::Item> {
type Item;
type IntoIter: Iterator;
fn into_iter(self) -> Self::IntoIter;
}
pub struct IntoIter<T> {
list: MyLinkedList<T>,
}
impl<T> IntoIterator for MyLinkedList<T> {
type Item = T;
type IntoIter = IntoIter<T>;
/// Consumes the list into an iterator yielding elements by value.
#[inline]
fn into_iter(self) -> IntoIter<T> {
IntoIter { list: self }
}
}
impl<'a, T> IntoIterator for &'a MyLinkedList<T> {
type Item = &'a T;
type IntoIter = MyIter<'a, T>;
fn into_iter(self) -> MyIter<'a, T> {
self.iter()
}
}
impl<T> Iterator for IntoIter<T> {
type Item = T;
#[inline]
fn next(&mut self) -> Option<T> {
self.list.pop_front()
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
(self.list.len, Some(self.list.len))
}
}
for x in list按照rust表达有三种情况:
- for x in list,对应impl<T> IntoIterator for MyLinkedList<T>
- for x in &list,对应impl<'a, T> IntoIterator for &'a MyLinkedList<T>
- for x in &mut list,对应impl<'a, T> IntoIterator for &mut 'a MyLinkedList<T>(代码中未实现)
2.4 Drop
LinkedList要实现智能指针,要实现Drop的trait
impl<T> Drop for MyLinkedList<T> {
fn drop(&mut self) {
struct DropGuard<'a, T>(&'a mut MyLinkedList<T>);
impl<'a, T> Drop for DropGuard<'a, T> {
fn drop(&mut self) {
while self.0.pop_front_node().is_some() {}
}
}
while let Some(node) = self.pop_front_node() {
let guard = DropGuard(self);
drop(node);
mem::forget(guard);
}
}
}
2.5 手写小结
rust是一种强类型强语义语言,相比于python,要完成LinkedList的编写,除了通性上明白数据结构的本质及原理,还需要熟悉rust的特性
- 要熟悉rust的内存管理(move、copy语义),时刻要考虑到要同时实现mut和非mut类型的操作
- 要熟悉rust的语法糖,这些语法糖被包裹在编译器中,例如:
a> 表达for x in list, list要实现IntoIterator,并且对于list、&list、&mut list有三种实现
b> 表达智能指针,list要实现Drop
c> 表达println!(“{:?}”, list),要实现Debug,并且list<T>的T也要实现Debug
d> 表达..list,要实现Default - 要熟悉rust的语义表达及操作
a> Optional,表达可选
b> Result,表达结果
c> NonNull,表达非空
...
rust新手写python代码信手拈来,但是写rust代码确困难重重,不熟悉rust的语言特性,就无法写rust代码。这就是不把rust的语法学完,就写不出来一个rust程序的原因。