Lookahead & lookbehind
# Capture an entire line that contains a word
(?=.*WORD).*
# Capture an entire line that contains two words in any order
(?=.*WORD1)(?=.*WORD2).*
# Capture entire lines that contain either WORD[1-3] from left to right
(?=.*WORD1|WORD2|WORD3).*
Capture entire lines that contain WORD[1-3] from left to right
(.*?=WORD1|WORD2|WORD3).*
# Capture every word in the string EXCEPT the words in the list
(?>[\w-]+)(?<!WORD1|WORD2|WORD3|WORD4)
# Capture every part of the string that comes before WORD
.*(?=WORD)
# Capture every part of the string that comes after WORD
(?<=WORD).*
# Capture every part of the string that comes between two words
(?<=WORD1).*(?=WORD2)
# Captures the first word after WORD
(?<=WORD)\s+(\w+)
# Captures the words on either side (left and right) of the word WORD
\w+(?=\sWORD)|(?<=WORD\s)\w+
Lookbehind
# Positive lookbehind
# Capture WORD if followed after MATCH
(?<=MATCH)WORD
# Negative lookbehind
# Capture WORD if WORD doesn't follow MATCH
(?<!MATCH)WORD
Word Boundaries
# Capture a phrase if it exists in the string and return that phrase
\b(To Be Or Not To Be)\b
# Capture a phrase if it exists in a string (this one does not use word boundaries so it must account for blank spaces)
(To\sBe\sOr\sNot\sTo\sBe)
# Capture any word that appears twice in a string
(\b\w+\b)(?=[\s\S]*\b\1\b)
# Capture any phrase that appears twice in a string
(\b\w+\s+\S+\b)(?=[\s\S]*\b\1\b)
# nbsp spaces
[\u202F\u00A0]
# all ASCII characters
[\u{0}-\u{7F}]
Subtract
# all letters but Q and W
[\p{L}--[QW]]
# all non-decimal numbers, plus 0-9
[\p{N}--[\p{Nd}--[0-9]]]
# all letters in the ASCII range, by subtracting non-letters
[\u{0}-\u{7F}--\P{letter}]
# Greek letters except alpha
[\p{Greek}--\N{GREEK SMALL LETTER ALPHA}]
# all assigned characters except for hex digits (using a broad definition)
[\p{Assigned}--\p{Decimal Digit Number}--[a-fA-Fa-fA-F]]
# either letter or ascii, but not both
[\p{letter}~~\p{ascii}]
# same as
[[\p{letter}\p{ascii}]--[\p{letter}&&\p{ascii}]]
# matches a single letter that is not a vowel
[a-z&&[^aeiuo]]
# is the same as [4-6]
# useful for emoji groups
[0-9&&[0-6&&[4-9]]]
[0-9-[0-6-[0-3]]]
# => first subtracts 0-3 from 0-6
# => yielding [0-9-[4-6]], or [0-37-9]
# => matches any character in the string 0123789
# subtract two classes "sequentially"-ish by adding those classes together first then subtracting
# To subtract ASCII characters and Greek characters from a class with all Unicode letters
[\p{L}-[\p{IsBasicLatin}\p{IsGreek}]]
# same as \w
[\p{Alphabetic}\p{GC=Mark}\p{GC=Decimal_Number}\p{GC=Connector_Punctuation}]
Remove emojis
# remove emojis
(?s)[\p{Basic_Emoji}&&\p{any}]
(?s)[\p{Basic_Emoji}&&\p{any}](?!.*\|)(?=[^\w\d[:punct:]])
# remove emojis that aren't used in a markdown table - needs work
(?s)[\p{Basic_Emoji}&&\p{any}](?!.*\|)
# html equivalent
[\p{Basic_Emoji}&&\p{any}](?!.*</t[dhr]>)
Sources:
-
reddit: r/shortcuts
-
regular-expressions.info: subtract