Python - regex 模块_python的regex_tdev的博客-程序员宅基地

技术标签: python  regex  



PYPI:https://pypi.org/project/regex/

代码:https://bitbucket.org/mrabarnett/mrab-regex/src/hg/


安装:

(base) $ pip install regex

regex支持Python 2.5+和Python 3.1+


Examples

>>> regex.match(r'(?(?=\d)\d+|\w+)', '123abc')
<regex.Match object; span=(0, 3), match='123'>

>>> regex.match(r'(?(?=\d)\d+|\w+)', 'abc123')
<regex.Match object; span=(0, 6), match='abc123'>

>>> print(regex.match(r'(?:(?=\d)\d+\b|\w+)', '123abc'))
<regex.Match object; span=(0, 6), match='123abc'>

>>> print(regex.match(r'(?(?=\d)\d+\b|\w+)', '123abc'))
None


Added POSIX matching (leftmost longest)


>>> # Normal matching.
>>> regex.search(r'Mr|Mrs', 'Mrs')
<regex.Match object; span=(0, 2), match='Mr'>

>>> regex.search(r'one(self)?(selfsufficient)?', 'oneselfsufficient')
<regex.Match object; span=(0, 7), match='oneself'>

>>> # POSIX matching.
>>> regex.search(r'(?p)Mr|Mrs', 'Mrs')
<regex.Match object; span=(0, 3), match='Mrs'>

>>> regex.search(r'(?p)one(self)?(selfsufficient)?', 'oneselfsufficient')
<regex.Match object; span=(0, 17), match='oneselfsufficient'>

>>> m = regex.search(r'(\w\w\K\w\w\w)', 'abcdef')
>>> m[0]
'cde'
>>> m[1]
'abcde'
>>>
>>> m = regex.search(r'(?r)(\w\w\K\w\w\w)', 'abcdef')
>>> m[0]
'bc'
>>> m[1]
'bcdef'

>>> m = regex.match(r"(\w)+", "abc")
>>> m.expandf("{1}")
'c'
>>> m.expandf("{1[0]} {1[1]} {1[2]}")
'a b c'
>>> m.expandf("{1[-1]} {1[-2]} {1[-3]}")
'c b a'
>>>
>>> m = regex.match(r"(?P<letter>\w)+", "abc")
>>> m.expandf("{letter}")
'c'
>>> m.expandf("{letter[0]} {letter[1]} {letter[2]}")
'a b c'
>>> m.expandf("{letter[-1]} {letter[-2]} {letter[-3]}")
'c b a'


Added partial matches


>>> pattern = regex.compile(r'\d{4}')

>>> # Initially, nothing has been entered:
>>> print(pattern.fullmatch('', partial=True))
<regex.Match object; span=(0, 0), match='', partial=True>

>>> # An empty string is OK, but it's only a partial match.
>>> # The user enters a letter:
>>> print(pattern.fullmatch('a', partial=True))
None
>>> # It'll never match.


>>> # The user deletes that and enters a digit:
>>> print(pattern.fullmatch('1', partial=True))
<regex.Match object; span=(0, 1), match='1', partial=True>
>>> # It matches this far, but it's only a partial match.


>>> # The user enters 2 more digits:
>>> print(pattern.fullmatch('123', partial=True))
<regex.Match object; span=(0, 3), match='123', partial=True>
>>> # It matches this far, but it's only a partial match.

>>> # The user enters another digit:
>>> print(pattern.fullmatch('1234', partial=True))
<regex.Match object; span=(0, 4), match='1234'>
>>> # It's a complete match.


>>> # If the user enters another digit:
>>> print(pattern.fullmatch('12345', partial=True))
None
>>> # It's no longer a match.


>>> # This is a partial match:
>>> pattern.match('123', partial=True).partial
True


>>> # This is a complete match:
>>> pattern.match('1233', partial=True).partial
False


regex.sub

# Python 3.7 and later
>>> regex.sub('.*', 'x', 'test')
'xx'

>>> regex.sub('.*?', '|', 'test')
'|||||||||'


# Python 3.6 and earlier
>>> regex.sub('(?V0).*', 'x', 'test')
'x'

>>> regex.sub('(?V1).*', 'x', 'test')
'xx'

>>> regex.sub('(?V0).*?', '|', 'test')
'|t|e|s|t|'

>>> regex.sub('(?V1).*?', '|', 'test')
'|||||||||'


match

>>> m = regex.match(r"(?:(?P<word>\w+) (?P<digits>\d+)\n)+", "one 1\ntwo 2\nthree 3\n")
>
>>> m.groupdict()
{
    'word': 'three', 'digits': '3'}

>>> m.captures("word")
['one', 'two', 'three']

>>> m.captures("digits")
['1', '2', '3']

>>> m.capturesdict()
{
    'word': ['one', 'two', 'three'], 'digits': ['1', '2', '3']}



captures

>>> # With optional groups:
>>>
>>> # Both groups capture, the second capture 'overwriting' the first.
>>> m = regex.match(r"(?P<item>\w+)? or (?P<item>\w+)?", "first or second")

>>> m.group("item")
'second'


>>> m.captures("item")
['first', 'second']


>>> # Only the second group captures.
>>> m = regex.match(r"(?P<item>\w+)? or (?P<item>\w+)?", " or second")

>>> m.group("item")
'second'


>>> m.captures("item")
['second']


>>> # Only the first group captures.
>>> m = regex.match(r"(?P<item>\w+)? or (?P<item>\w+)?", "first or ")

>>> m.group("item")
'first'


>>> m.captures("item")
['first']


>>> # With mandatory groups:
>>> # Both groups capture, the second capture 'overwriting' the first.
>>> m = regex.match(r"(?P<item>\w*) or (?P<item>\w*)?", "first or second")


>>> m.group("item")
'second'


>>> m.captures("item")
['first', 'second']


>>> # Again, both groups capture, the second capture 'overwriting' the first.
>>> m = regex.match(r"(?P<item>\w*) or (?P<item>\w*)", " or second")

>>> m.group("item")
'second'


>>> m.captures("item")
['', 'second']


>>> # And yet again, both groups capture, the second capture 'overwriting' the first.
>>> m = regex.match(r"(?P<item>\w*) or (?P<item>\w*)", "first or ")

>>> m.group("item")
''

>>> m.captures("item")
['first', '']


fullmatch

>>> print(regex.fullmatch(r"abc", "abc").span())
(0, 3)

>>> print(regex.fullmatch(r"abc", "abcx"))
None

>>> print(regex.fullmatch(r"abc", "abcx", endpos=3).span())
(0, 3)

>>> print(regex.fullmatch(r"abc", "xabcy", pos=1, endpos=4).span())
(1, 4)


>>> regex.match(r"a.*?", "abcd").group(0)
'a'


>>> regex.fullmatch(r"a.*?", "abcd").group(0)
'abcd'


subf and subfn

>>> regex.subf(r"(\w+) (\w+)", "{0} => {2} {1}", "foo bar")
'foo bar => bar foo'
>>> regex.subf(r"(?P<word1>\w+) (?P<word2>\w+)", "{word2} {word1}", "foo bar")
'bar foo'

Added expandf to match object

>>> m = regex.match(r"(\w+) (\w+)", "foo bar")
>>> m.expandf("{0} => {2} {1}")
'foo bar => bar foo'
>>>
>>> m = regex.match(r"(?P<word1>\w+) (?P<word2>\w+)", "foo bar")
>>> m.expandf("{word2} {word1}")
'bar foo'

 

>>> m = regex.search(r"\w+", "Hello world")
>>> print(m.group())
Hello

>>> print(m.string)
Hello world

>>> m.detach_string()

>>> print(m.group())
Hello

>>> print(m.string)
None

>>> regex.match(r"(Tarzan|Jane) loves (?1)", "Tarzan loves Jane").groups()
('Tarzan',)

>>> regex.match(r"(Tarzan|Jane) loves (?1)", "Jane loves Tarzan").groups()
('Jane',)

>>> m = regex.search(r"(\w)(?:(?R)|(\w?))\1", "kayak")

>>> m.group(0, 1, 2)
('kayak', 'k', None)


>>> regex.match(r"(?iV1)strasse", "stra\N{LATIN SMALL LETTER SHARP S}e").span()
(0, 6)

>>> regex.match(r"(?iV1)stra\N{LATIN SMALL LETTER SHARP S}e", "STRASSE").span()
(0, 7)



>>> # A 'raw' fuzzy match:
>>> regex.fullmatch(r"(?:cats|cat){e<=1}", "cat").fuzzy_counts
(0, 0, 1)

>>> # 0 substitutions, 0 insertions, 1 deletion.

>>> # A better match might be possible if the ENHANCEMATCH flag used:
>>> regex.fullmatch(r"(?e)(?:cats|cat){e<=1}", "cat").fuzzy_counts
(0, 0, 0)

>>> # 0 substitutions, 0 insertions, 0 deletions.







>>> m = regex.search('(fuu){i<=2,d<=2,e<=5}', 'anaconda foo bar')

>>> m
<regex.Match object; span=(7, 10), match='a f', fuzzy_counts=(0, 2, 2)>

>>> m.fuzzy_changes
([], [7, 8], [10, 11])
 



>>> p = regex.compile(r"first|second|third|fourth|fifth")


>>> option_set = ["first", "second", "third", "fourth", "fifth"]

>>> p = regex.compile(r"\L<options>", options=option_set)

>>> print(p.named_lists)
# Python 3
{
    'options': frozenset({
    'fifth', 'first', 'fourth', 'second', 'third'})}

# Python 2
{
    'options': frozenset(['fifth', 'fourth', 'second', 'third', 'first'])}

>>> option_set = ["first", "second", "third", "fourth", "fifth"]

>>> p = regex.compile(r"\L<options>", options=option_set, other_options=[])
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "C:\Python37\lib\site-packages\regex\regex.py", line 348, in compile
    return _compile(pattern, flags, ignore_unused, kwargs)
  File "C:\Python37\lib\site-packages\regex\regex.py", line 585, in _compile
    raise ValueError('unused keyword argument {!a}'.format(any_one))
ValueError: unused keyword argument 'other_options'

>>> p = regex.compile(r"\L<options>", options=option_set, other_options=[], ignore_unused=True)




>>> m = regex.search(r"(\w{3})+", "123456789")
>>> m.group(1)
'789'
>>> m.captures(1)
['123', '456', '789']
>>> m.start(1)
6
>>> m.starts(1)
[0, 3, 6]
>>> m.end(1)
9
>>> m.ends(1)
[3, 6, 9]
>>> m.span(1)
(6, 9)
>>> m.spans(1)
[(0, 3), (3, 6), (6, 9)]
 



>>> m = regex.search(r"(?P<before>.*?)(?P<num>\d+)(?P<after>.*)", "pqr123stu")

>>> print(m["before"])
pqr

>>> print(len(m))
4

>>> print(m[:])
('pqr123stu', 'pqr', '123', 'stu')




findall


>>> regex.findall(r".", "abc")
['a', 'b', 'c']

>>> regex.findall(r"(?r).", "abc")
['c', 'b', 'a']


>>> regex.findall(r"..", "abcde")
['ab', 'cd']

>>> regex.findall(r"(?r)..", "abcde")
['de', 'bc']



Branch reset



>>> regex.match(r"(?|(first)|(second))", "first").groups()
('first',)

>>> regex.match(r"(?|(first)|(second))", "second").groups()
('second',)




\p{han} 可以匹配汉字, \p{Latin} 可以匹配拉丁字母


参考


版权声明:本文为博主原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。
本文链接:https://blog.csdn.net/weixin_45390999/article/details/120830583

智能推荐

Android Studio迁移到AndroidX详细教程_android studio 转androidx-程序员宅基地

你导入的项目报android.support.v7.XXX的错误?是时候迁移AndroidX了什么是AndroidX?androidx 是google对 android.support.xxx 包整理后的产物。由于之前的support包过于混乱,所以google推出了androidX。AndroidX具体情况可以参见这篇文章总是听到有人说AndroidX,到底什么是AndroidX?此..._android studio 转androidx

BZOJ2794/POI2012 Cloakroom_每个物品有三个属性 c+-程序员宅基地

Task 有n件物品,每件物品有三个属性a[i], b[i], c[i] .(a[i]<b[i]) 再给出q个询问,每个询问由非负整数m, k, s组成,问是否能够选出某些物品使得: 1. 对于每个选的物品i,满足a[i]<=m且b[i]>m+s。 2. 所有选出物品的c[i]的和正好是k。 n<=1,000, q<=1,000,000. c[i]<=1,000, 1<=a[i]<b[i_每个物品有三个属性 c+

Java并发编程学习笔记(第2章:线程安全性)_java 线程安全的计算器-程序员宅基地

1、线程安全性。当多个线程访问某个类时,这个类始终能表现出正确的行为,那么就称这个类是线程安全的。具体说,就是当多个线程访问某个类时,不管运行时环境采用何种调度方式将这些线程交替进行,并且在主调代码中不需要任何额外的同步或协同,这个类都能表现出正确的行为,那么就称这个类时线程安全的。2、无状态的对象一定是线程安全的。什么叫无状态的对象:无状态就是一次操作,不能保存数据。无状_java 线程安全的计算器

SQl基础_为聚合结果指定条件 HAVING_sql 怎么给聚合函数设置条件格式-程序员宅基地

学习重点使用count() 函数等对表中数据进行聚合操作时,为其指定条件的不是where 子句,而需要使用having子句。聚合函数可以在select子句、having子句和order by子句中使用。having子句要写在group by 子句之后。where子句用来指定数据行的条件,having子句用来指定分组的条件。having子句使用having 子句时select 语句的..._sql 怎么给聚合函数设置条件格式

mybatis简单应用_mybatis 应用-程序员宅基地

1.导包: mybatis-3.4.5.jar 2. mybatis-config.xml 配置文件:

jupyter notebook运行命令显示[*],解决办法_jupyter运行后*号-程序员宅基地

jupyter notebook运行命令,一直显示in[*],解决办法看下文章前我想先说一个问题1、因为notebook未安装正确2、因为jupyter包依赖的prompt-toolkit版本问题3、如果上面的问题无法解决。希望大家可以解决自己遇到的问题看下文章前我想先说一个问题遇到问题不要盲目的搜索答案,先看看终端提示什么错误,下面我总结一下会出现jupyter notebook运行命令,一..._jupyter运行后*号

随便推点

php 搜索图片,关于php图片搜索的10篇文章推荐_LaserComposites的博客-程序员宅基地

这节我们来实现一个在线图片搜索的小功能,听起来是不是很高大上呢?当然并非我们要实现一个图片搜索引擎,而是要站在巨人的肩膀上,借助API实现。我们的目的呢,当然是学习PHP开发啦!首先创建html页面,页面简单的实现输入和提交搜索功能:index.html代码如下:图片搜索1. PHP开发入门-在线图片搜索代码分享简介:这节我们来实现一个在线图片搜索的小功能,听起来是不是很高大上呢?当然并非我们要实..._php实现以图搜图

打印锯齿矩阵(C++)_131072k 锯齿矩阵是指每一行包含的元素个数不相同的矩阵,比如: 1 3 5 2 6 1 2 2_沫忆拾忆的博客-程序员宅基地

锯齿矩阵是指每一行包含的元素个数不相同的矩阵,比如:3 5 2 6 12 3 41 6 2 7读入若干对整数 (x,y),表示在第 x 行的末尾加上一个元素 y。输出最终的锯齿数组。初始时矩阵为空。输入格式第一行输入两个整数n,m(1≤n,m≤10000),其中 n 表示锯齿数组的行数,m 表示插入的元素总数。接下来一共 m 行,每行两个整数 x,y(1≤x≤n,0≤y≤10000),表示在第 x 行的末尾插入一个元素 y。输出格式一共输出 n 行,每行若干个用空格分隔的整数。如果某行没有_131072k 锯齿矩阵是指每一行包含的元素个数不相同的矩阵,比如: 1 3 5 2 6 1 2 2 3

空气电晕放电仿真_comsol放电-程序员宅基地

COMSOL空气电晕放电仿真。采用COMSOL等离子体模块,采用针板电极,施加直流电压,考虑具体的化学反应三十余种,可得到放电过程中电子密度,离子密度,电场强度等参数的时空分布,并可得到特里切尔脉冲。具体应用时,可根据需要自由更改曲率半径,间隙距离,电压类型,气压,温度等参数。该模型结果与SCI文献保持一致,可用于学习参考。部分结果图如下:..._comsol放电

python学习笔记——flask之渲染模板(Jinja2)-特殊变量和方法_IT技术学习的博客-程序员宅基地

在Flask中,有一些特殊的变量和方法是可以在模板文件中直接访问的。一、config 对象:config 对象就是Flask的config对象,也就是 app.config 对象{{ config.SQLALCHEMY_DATABASE_URI }}二、request 对象{{ request.url }}三、url_for 方法{{ url_for('index') }}{{ url_for('post', post_id=1024) }}四、get_flash

TextView,Button 等设置 setCompoundDrawables 无效-程序员宅基地

项目中遇到在代码中设置textview设置图标不显示的问题,后来知道图标必须setBounds viewHolder.idTvTitle.setText(MainStone); Drawable dra = getResources().getDrawable(R.drawable.icon_add2); dra.

【Linux】内核代码阅读 list_entry()_内核链表list_entry()_我要手撕源码的博客-程序员宅基地

内核代码学习list_entry()作用:获取ptr所属结构体的首地址#define list_entry(ptr, type, member) \ ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))// ptr:指向list_node类型的指针// type:一个结构体// member:结构type中的一个域使用示例ptr为head.memberptr所属结构体为num._内核链表list_entry()