Rust中有两种主要的字符串类型:
// 空字符串 let mut s = String::new(); // 从字符串字面量创建 let s = String::from("hello"); let s = "hello".to_string(); // 从其他类型创建 let s = format!("{}-{}-{:?}", "hello", 42, true);
追加字符串:
let mut s = String::from("foo"); s.push_str("bar"); // foobar let mut s = String::from("lo"); s.push('l'); // lol,push追加单个字符
连接字符串:
let s1 = String::from("Hello, "); let s2 = String::from("world!"); let s3 = s1 + &s2; // s1被移动,s2被借用 // 使用format!(不会获取所有权) let s1 = String::from("tic"); let s2 = String::from("tac"); let s3 = String::from("toe"); let s = format!("{}-{}-{}", s1, s2, s3); // s1, s2, s3都仍然有效
插入和删除:
let mut s = String::from("Hello"); // 插入字符 s.insert(5, ','); // "Hello," // 插入字符串 s.insert_str(6, " world"); // "Hello, world" // 删除字符(按索引的字节位置) s.remove(0); // 删除第一个字符 // 清除 s.clear(); // 清空字符串
Rust不支持按索引访问字符串:
let s = String::from("hello"); // let h = s[0]; // 错误!
原因:UTF-8编码
let hello = "中国人"; // 每个汉字占3字节 println!("字节数:{}", hello.len()); // 9 // 获取字节 for b in hello.bytes() { println!("{}", b); }
使用chars()遍历Unicode标量值:
for c in "中国人".chars() { println!("{}", c); } // 输出:中、国、人
使用char_indices():
for (i, c) in "中国人".char_indices() { println!("索引{}:字符'{}'", i, c); } // 索引0:字符'中' // 索引3:字符'国' // 索引6:字符'人'
创建Slice:
let s = String::from("hello world"); let hello = &s[0..5]; let world = &s[6..11];
注意字节边界:
let s = "中国人"; let zhong = &s[0..3]; // "中" - 正确 // let guo = &s[0..2]; // 错误!不是有效的UTF-8边界
for c in "hello 中国人".chars() { println!("{}", c); }
for b in "hello".bytes() { println!("{}", b); }
let text = "line1\nline2\nline3"; for line in text.lines() { println!("{}", line); }
let text = "hello world from Rust"; for word in text.split_whitespace() { println!("{}", word); }
let s = String::from("hello world"); // 查找子串位置 let pos = s.find("world"); // Some(6) let pos = s.find("xyz"); // None // 替换 let new_s = s.replace("world", "Rust"); // "hello Rust" // 替换指定次数 let new_s = s.replacen("l", "L", 1); // "heLlo world"
let s = "a,b,c,d"; // 按字符分割 let parts: Vec<&str> = s.split(',').collect(); // 按多个字符分割 let s = "a b\tc\nd"; let parts: Vec<&str> = s.split_whitespace().collect(); // 按字符串分割 let s = "hello::world::Rust"; let parts: Vec<&str> = s.split("::").collect(); // 限制分割次数 let s = "a,b,c,d"; let parts: Vec<&str> = s.splitn(2, ',').collect(); // ["a", "b,c,d"]
let s = " hello world "; // 去除两端 let trimmed = s.trim(); // "hello world" // 去除开头 let trimmed = s.trim_start(); // 去除结尾 let trimmed = s.trim_end(); // 去除指定字符 let s = "xxxhello worldxxx"; let trimmed = s.trim_matches('x'); // "hello world"
let s = "Hello"; println!("{}", s.to_uppercase()); // "HELLO" println!("{}", s.to_lowercase()); // "hello"
let s = "hello world"; // 是否以...开头 assert!(s.starts_with("hello")); // 是否以...结尾 assert!(s.ends_with("world")); // 是否包含 assert!(s.contains("lo wo")); // 是否符合模式 assert!(s.matches("l").count() == 3);
let s = String::from("hello"); // &操作符 let slice: &str = &s; // as_str() let slice = s.as_str(); // 自动解引用 fn takes_str(s: &str) {} takes_str(&s); // 自动转换
let slice = "hello"; // to_string() let s = slice.to_string(); // String::from() let s = String::from(slice); // into() let s: String = slice.into();
let s = "中国人"; // 字节数 println!("字节数:{}", s.len()); // 9 // 字符数(Unicode标量值) println!("字符数:{}", s.chars().count()); // 3
fn safe_slice(s: &str, start: usize, end: usize) -> Option<&str> { if s.is_char_boundary(start) && s.is_char_boundary(end) { Some(&s[start..end]) } else { None } } let s = "中国人"; println!("{:?}", safe_slice(s, 0, 3)); // Some("中") println!("{:?}", safe_slice(s, 0, 2)); // None
let s = String::from("hello"); // into_bytes()消耗所有权 let bytes = s.into_bytes(); // s在这里已失效
// 不好的做法 let s1 = String::from("hello"); let s2 = s1.clone(); // 深拷贝 let slice = &s2[0..2]; // 好的做法 let s1 = String::from("hello"); let slice = &s1[0..2]; // 借用
fn reverse(s: &str) -> String { s.chars().rev().collect() } fn main() { let s = "hello 中国"; println!("原字符串:{}", s); println!("反转后:{}", reverse(s)); }
fn is_palindrome(s: &str) -> bool { let s: String = s.chars() .filter(|c| c.is_alphanumeric()) .map(|c| c.to_lowercase().next().unwrap()) .collect(); s == s.chars().rev().collect::<String>() } fn main() { println!("{}", is_palindrome("A man, a plan, a canal: Panama")); // true println!("{}", is_palindrome("race a car")); // false println!("{}", is_palindrome("中国人中国")); // true }
fn analyze(s: &str) { println!("字符串:{}", s); println!("字节数:{}", s.len()); println!("字符数:{}", s.chars().count()); println!("单词数:{}", s.split_whitespace().count()); println!("行数:{}", s.lines().count()); // 统计各类字符 let mut letters = 0; let mut digits = 0; let mut spaces = 0; let mut others = 0; for c in s.chars() { if c.is_alphabetic() { letters += 1; } else if c.is_numeric() { digits += 1; } else if c.is_whitespace() { spaces += 1; } else { others += 1; } } println!("字母:{},数字:{},空白:{},其他:{}", letters, digits, spaces, others); } fn main() { let text = "Hello, Rust 2024!\n这是第2行。"; analyze(text); }
fn parse_csv_line(line: &str) -> Vec<&str> { line.split(',').map(|s| s.trim()).collect() } fn main() { let csv = "name, age, city\nAlice, 30, New York\nBob, 25, London"; for line in csv.lines() { let fields = parse_csv_line(line); println!("{:?}", fields); } }
本章学习了Rust的字符串处理:
理解String和&str的区别,以及UTF-8编码特性,是Rust字符串处理的关键。