目录
首先,导入正则表达式模块:
import re
1.简单的匹配
pattern = r'\d+' # 匹配一个或多个数字
text = "There are 123 apples"
match = re.search(pattern, text)
print(match.group()) # 输出: 123
2.匹配所有出现
matches = re.findall(pattern, text)
print(matches) # 输出: ['123']
3.替换文本
new_text = re.sub(pattern, '456', text)
print(new_text) # 输出: There are 456 apples
4.拆分字符串
split_text = re.split(r'\s+', text)
print(split_text) # 输出: ['There', 'are', '123', 'apples']
5.从字符串开头匹配
match = re.match(r'There', text)
print(match.group()) # 输出: There
6.使用组
pattern = r'(\d+)\s+apples'
match = re.search(pattern, text)
print(match.group(1)) # 输出: 123
7.非贪婪匹配
pattern = r'<.*?>'
html = "<div><span>Test</span></div>"
match = re.search(pattern, html)
print(match.group()) # 输出: <div>
8.匹配数字
pattern = r'\d+'
numbers = re.findall(pattern, "There are 3 cats and 4 dogs")
print(numbers) # 输出: ['3', '4']
9.匹配单词边界
pattern = r'\bcat\b'
text = "The cat is on the catwalk"
match = re.search(pattern, text)
print(match.group()) # 输出: cat
10.忽略大小写匹配
pattern = r'cat'
text = "The Cat is on the catwalk"
matches = re.findall(pattern, text, re.IGNORECASE)
print(matches) # 输出: ['Cat', 'cat']
11.匹配多行文本
pattern = r'^cat'
text = "cat\nDog\ncat"
matches = re.findall(pattern, text, re.MULTILINE)
print(matches) # 输出: ['cat', 'cat']