Python 文件和正则--with文件读取,深浅拷贝,正则表达式

1.with语句

open 方式打开文件

#open()方式打开文件的弊端.
f = open('1.txt','w')
f.write('hello world')
f.flush()
# f.close()  == 不释放资源--其它人无法操作被占用的文件.

#-------------发生异常导致没有释放资源---------
# f = open('1.txt','r')
# f.write('hello world')
# f.flush()
# f.close()

#  可以释放资源---但是格式复杂---麻烦!!!
f = None
try:
    f = open('1.txt','r')
    f.write('hello world')
    f.flush()
except Exception as e:
    print('出错误了')
finally:
    if f is not None:
        f.close()
        

while True:
    pass

with语句操作文件

with open("1.txt",'w') as f:
    f.write('hello\n')
    f.write('world\n')
    f.write('python\n')
    # f.close() # 使用with语句后.能帮我们自动释放资源.

while True:
    pass

2.深拷贝和浅拷贝

浅拷贝

# 语法: 1.导入模块 copy
import copy

# 不可变类型有: 数字、字符串、元组
# a1 = 123
# b1 = copy.copy(a1)
# print(id(a1))#2697886781688
# print(id(b1))#2697886781688
#
# b2 = a1
# print(id(a1))#2697886781688
# print(id(b2))#2697886781688

# a2 = 'abc'
# b2 = copy.copy(a2)
# print(id(a2))#2697886781168
# print(id(b2))#2697886781168

# a3 = (1,2,[3,4])
# b3 = copy.copy(a3)
# print(id(a3))#2165171410048
# print(id(b3))#2165171410048
# print(id(a3[2]))#2272505786496
# print(id(b3[2]))#2272505786496


# 可变类型有: 列表、字典、集合
# a4 = [1,2,3]
# b4 = copy.copy(a4)
# print(id(a4))#2281864200256
# print(id(b4))#2281863566720

# a5 = {"name":"tom","age":20}
# b5 = copy.copy(a5)
# print(id(a5))#1807425866112
# print(id(b5))#1807425866176

# a6 = {1,2,3}
# b6 = copy.copy(a6)
# print(id(a6))#2784928707520
# print(id(b6))#2784928707744

a7 = [1,2,[3,4]]
b7  = copy.copy(a7)
print(id(a7))#2081913886016
print(id(b7))#2081913871360

#如果使用浅拷贝拷贝列表中的可变元素.该元素不会被重新创建对象.
print(id(a7[2]))#2153571123328
print(id(b7[2]))#2153571123328

a7[2][0] = 9

print(a7)#[1,2,[9,4]]
print(b7)#[1,2,[9,4]]

深拷贝

# 语法: 1.导入模块 copy
import copy



a7 = [1,2,[3,4]]
b7  = copy.deepcopy(a7)
print(id(a7))#2081913886016
print(id(b7))#2081913871360

#如果使用浅拷贝拷贝列表中的可变元素.该元素不会被重新创建对象.
print(id(a7[2]))#2153571123328
print(id(b7[2]))#1720282193472

a7[2][0] = 9

print(a7)
print(b7)

3.正则表达式入门

# 需求: 校验密码格式是否合法: 格式:6-10位自然数.

#定义函数.去判断密码是否合法.
import re


def check(pwd):
    #拦截式判断
    if not pwd.isdigit():
       print('密码不是纯数字')
       return

    if len(pwd)<6:
       print('密码长度最低6位')
       return

    if len(pwd)>10:
       print('密码长度最高10位')
       return

    print('密码合法')

if __name__ == '__main__':
    pwd = input('请输入密码')
    # check(pwd)
    res = re.match("^[0-9]{6,10}$",pwd)
    if res:
        print('密码合格')
    else:
        print('密码不合格')

4.正则匹配单个字符

. 匹配任意1个字符（除了\n）
[] 匹配[ ]中列举的字符
\d 匹配数字，即0-9
\D 匹配非数字，即不是数字
\s 匹配空白，即空格，tab键
\S 匹配非空白
\w 匹配非特殊字符，即a-z、A-Z、0-9、_、汉字
\W 匹配特殊字符，即非字母、非数字、非汉字


import re

# res = re.match(".","\n")
res = re.match(".","abc")
print(res) # <re.Match object; span=(0, 1), match='a'>
print(type(res))#<class 're.Match'>
print(res.group())#a

res = re.match('.','\nbc')
# if res is not None:
if res:
    print(f'匹配成功:{res.group()}')
else:
    print('匹配失败!')

#匹配指定字符
res = re.match('[abc]','cbc')
if res:
    print(f'匹配成功:{res.group()}')
else:
    print('匹配失败!')

# 匹配一个纯数字
# res = re.match('[0-9]','1cbc')
res = re.match('\d','cbc')
if res:
    print(f'匹配成功:{res.group()}')
else:
    print('匹配失败!')

# 匹配非数字，即不是数字
res = re.match('\D','0cbc')
if res:
    print(f'匹配成功:{res.group()}')
else:
    print('匹配失败!')

# 匹配空白，即 空格，tab键
# res = re.match('\s','\tcbc')
# res = re.match('\s','\ncbc')
res = re.match('\s',' cbc')
if res:
    print(f'匹配成功:{res.group()}')
else:
    print('匹配失败!')

# \S    匹配非空白
# res = re.match('\S',' 你好bc')
res = re.match('\S','你好bc')
if res:
    print(f'匹配成功:{res.group()}')
else:
    print('匹配失败!')

# \w    匹配非特殊字符，即a-z、A-Z、0-9、_、汉字
# res = re.match('\w','你好bc')
# res = re.match('\w','_bc')
res = re.match('\w','&bc')
if res:
    print(f'匹配成功:{res.group()}')
else:
    print('匹配失败!')

# \W    匹配特殊字符，即非字母、非数字、非汉字
res = re.match('\W\W[abc]','&#fc')
if res:
    print(f'匹配成功:{res.group()}')
else:
    print('匹配失败!')

5.正则匹配多个字符

* 匹配前一个字符出现0次或者无限次，即可有可无
+    匹配前一个字符出现1次或者无限次，即至少有1次
?    匹配前一个字符出现1次或者0次，即要么有1次，要么没有
{m} 匹配前一个字符出现m次
{m,n}    匹配前一个字符出现从m到n次


import re
print(re.match('[abc]{3}','abc'))

# *    匹配前一个字符出现0次或者无限次，即可有可无
print(re.match('\d*','abc'))#match=''
print(re.match('\d*','1abc'))#match='1'
print(re.match('\d*','123abc'))#match='123'
# +    匹配前一个字符出现1次或者无限次，即至少有1次
print(re.match('\d+','abc'))#None
print(re.match('\d+','1abc'))#match='1'
print(re.match('\d+','123abc'))#match='123'
# ?    匹配前一个字符出现1次或者0次，即要么有1次，要么没有
print(re.match('\d?','abc'))#match=''
print(re.match('\d?','1abc'))#match='1'
print(re.match('\d?','12abc'))#match='1'

# {m}  匹配前一个字符出现m次
print(re.match('\d{3}','12abc'))#None
print(re.match('\d{3}','123abc'))#match='123'
print(re.match('\d{3}','1234bc'))#match='123'
# {m,n}    匹配前一个字符出现从m到n次
print(re.match('\d{3,5}','12abc'))#None
print(re.match('\d{3,5}','123abc'))#match='123'
print(re.match('\d{3,5}','1234abc'))#match='1234'
print(re.match('\d{3,5}','12345abc'))#match='12345'
print(re.match('\d{3,5}','123456abc'))#match='12345'

6.正则匹配开头和结尾

^ 匹配字符串开头
$ 匹配字符串结尾


# 1.匹配以数字开头的数据
import re

print(re.match("^\d.*","6hello"))#match='6hello'
print(re.match("^\d[a-z]*","6hello"))#match='6hello'

# 2.匹配以数字结尾的数据
print(re.match("\d$","3"))#match='3'
print(re.match("\d$","hello3"))#None
print(re.match(".*\d$","hello123"))#match='hello123'


# 3.匹配以数字开头中间内容不管以数字结尾
print(re.match("^\d.*\d$","6hello8"))#match='6hello8'
print(re.match("^\d.*\d$","61238"))#match='61238'
print(re.match("^\d.*\d$","68"))#match='68'
print(re.match("^\d.*\d$","6 8"))#match='6 8'


# 4.特殊语法:[^指定字符]: 取反: 表示除了指定字符都匹配
# 需求: 第一个字符除了aeiou的字符都匹配
print(re.match("[^aeiou]","bcd"))#match='b'>
print(re.match("[^aeiou]","123"))#match='1'>
print(re.match("[^aeiou]","acd"))#None
print(re.match("[^aeiou]","ecd"))#None
print(re.match("[^aeiou]","ucd"))#None

7.匹配分组

| 匹配左右任意一个表达式
(ab) 将括号中字符作为一个分组
\num 引用分组num匹配到的字符串
(?P<name>) 分组起别名
(?P=name) 引用别名为name分组匹配到的字符串


# 1.需求: 在列表中["apple", "banana", "orange", "pear"]，匹配apple和pear
import re

list_str = ["apple", "banana", "orange", "pear"]
for i in list_str:
    res = re.match("apple|pear",i)
    #如果res不是None,那就匹配到值了
    if res:
        print(res.group())
    else:
        print(f'匹配失败:{i}')


# 2.需求: 匹配出163、126、qq等邮箱
email1 = 'hanbaobao@163.com'
email2 = 'hanbaobao@126.com'
email3 = 'hanbaobao@qq.cn'
res = re.match('\w{8,20}@(163|126|qq)\\.(com|cn)',email3)
if res:
    print(f"匹配成功:{res.group()}")
else:
    print(f'匹配失败!')

# 3.需求: 匹配qq:10567这样的数据，提取出来qq文字和qq号码
qq = "qq:10567"
res = re.match('(qq):(\d{5})',qq)
print(res.group())#qq:10567
print(res.group(1))#qq
print(res.group(2))#10567
# print(res.group(3)) IndexError: no such group


# 4.需求: 匹配出<html>hh</html>
# \num  引用分组num匹配到的字符串
my_html = "<html>hh</html>"
res = re.match('<([a-zA-Z]+)>.*</\\1>',my_html)
print(res.group())


# 5.需求: 匹配出<html><h1>www.itcast.cn</h1></html>
my_html = "<html><h6>www.itcast.cn</h6></html>"
res = re.match('<([a-zA-Z]+)><([a-zA-Z0-9]+)>.*</\\2></\\1>',my_html)
print(res.group())

# 6.需求: 匹配出<html><h1>www.itcast.cn</h1></html>
# (?P<name>)    分组起别名
# (?P=name) 引用别名为name分组匹配到的字符串
my_html = "<html><h6>www.itcast.cn</h6></html>"
res = re.match('<(?P<name1>[a-zA-Z]+)><(?P<name2>[a-zA-Z0-9]+)>.*</(?P=name2)></(?P=name1)>',my_html)
print(res.group())

扩展: re.search() re.findall()

# 查询匹配方式.
import re
my_str = "见风使舵#美国#国会众议院通过了所谓#中国#不是发展#中国#家#法国#法案"

res = re.match('#(美国|中国)#',my_str)
print(res)

#匹配整个字符串，并返回第一个成功的匹配。如果匹配失败，则返回None
res = re.search('#(美国|中国)#',my_str)
print(res)# <re.Match object; span=(18, 22), match='#中国#'>

#re.findall()函数 -- 从开始到结尾去匹配.能匹配到结束.
# 默认正则匹配是贪婪的匹配模式:从开头到结尾.中间不会停留.
# *? 寻找到一个符合条件的结果就停下来. 继续往下寻找.
# +? 寻找到一个符合条件的结果就停下来. 继续往下寻找.
res = re.findall('#.*?#',my_str)
print(res)# ['#中国#', '#中国#', '#中国#']