Upgrade to Pro — share decks privately, control downloads, hide ads and more …

PCRE With PHP

PCRE With PHP

PHP Benelux 2015

Avatar for Thomas Weinert

Thomas Weinert

January 24, 2015
Tweet

More Decks by Thomas Weinert

Other Decks in Programming

Transcript

  1. OFFSET $subject = 'aa ab ac ad'; $offset = 0;

    $length = strlen($subject); while ($offset < $length) { if (preg_match('(a.)', $subject, $match, PREG_OFFSET_CAPTURE, $offset)) { $offset = $match[0][1] + strlen($match[0][0]); var_dump($match[0][0]); } else { break; } } string(2) "aa" string(2) "ab" string(2) "ac" string(2) "ad"
  2. PATTERN String Escaping $pattern = '(\\\n)'; $text = <<<'TEXT' foo\nbar

    TEXT; preg_match($pattern, $text, $match); var_dump($pattern, $text, $match); string(5) "(\\n)" string(8) "foo\nbar" array(1) { [0]=> string(2) "\n" }
  3. MODIFIERS x - PCRE_EXTENDED u - PCRE_UTF8 D - PCRE_DOLLAR_ENDONLY

    s - PCRE_DOTALL m - PCRE_MULTILINE i - PCRE_CASELESS ...
  4. PCRE_EXTENDED $pattern = <<<'REGEX' (^ (d‐)? # optional country prefix

    (\d{5}) # german zip code $)Dix REGEX; var_dump((bool)preg_match($pattern, 'D‐50670')); bool(true)
  5. PCRE_DOLLAR_ENDONLY $examples = [ ["(^\\d+$)", "123"], ["(^\\d+$)", "123\n"], ["(^\\d+$)D", "123\n"],

    ["(\\A\\d+\\G)", "123\n"] ]; foreach ($examples as $example) { var_dump((bool)preg_match($example[0], $example[1], $match)); } bool(true) bool(true) bool(false) bool(false)
  6. PCRE_DOTALL $examples = [ ["(^.+$)", "123"], ["(^.+$)", "123\n456"], ["(^.+$)s", "123\n456"]

    ]; foreach ($examples as $example) { preg_match($example[0], $example[1], $match); var_dump($match); } array(1) { [0]=> string(3) "123" } array(0) { } array(1) { [0]=> string(7) "123 456" }
  7. PCRE_MULTILINE $examples = [ ["(^.+$)", "123"], ["(^.+$)", "123\n456"], ["(^.+$)m", "123\n456"]

    ]; foreach ($examples as $example) { preg_match($example[0], $example[1], $match); var_dump($match); } array(1) { [0]=> string(3) "123" } array(0) { } array(1) { [0]=> string(3) "123" }
  8. PCRE_CASELESS $examples = [ ["(foo)", "foo"], ["(foo)", "FOO"], ["(foo)i", "FOO"]

    ]; foreach ($examples as $example) { preg_match($example[0], $example[1], $match); var_dump($match); } array(1) { [0]=> string(3) "foo" } array(0) { } array(1) { [0]=> string(3) "FOO" }
  9. PREG_MATCH_ALL() $subject = 'aa ab ac ad'; preg_match_all('(a.)', $subject, $match);

    var_dump($match); array(1) { [0]=> array(4) { [0]=> string(2) "aa" [1]=> string(2) "ab" [2]=> string(2) "ac" [3]=> string(2) "ad" } }
  10. PREG_PATTERN_ORDER $subject = 'ab ac'; preg_match_all('(a(.))', $subject, $match); var_dump($match); array(2)

    { [0]=> array(2) { [0]=> string(2) "ab" [1]=> string(2) "ac" } [1]=> array(2) { [0]=> string(1) "b" [1]=> string(1) "c" } }
  11. PREG_SET_ORDER $subject = 'ab ac'; preg_match_all('(a(.))', $subject, $match, PREG_SET_ORDER); var_dump($match);

    array(2) { [0]=> array(2) { [0]=> string(2) "ab" [1]=> string(1) "b" } [1]=> array(2) { [0]=> string(2) "ac" [1]=> string(1) "c" } }
  12. PREG_REPLACE_CALLBACK() No need for modifier "e" (PREG_REPLACE_EVAL) var_dump( preg_replace_callback( '(a(.))',

    function ($match) { return strtoupper($match[1]); }, 'ab ac' ) ); string(3) "B C"
  13. FUNCTOR class Replacer { public function __invoke($match) { return strtoupper($match[1]);

    } } var_dump( preg_replace_callback( '(a(.))', new Replacer(), 'ab ac' ) );
  14. PREG_SPLIT() $pattern = '(\\R)u'; $subject = "one\rtwo\n\nthree\r\nfour"; $match = preg_split($pattern,

    $subject); var_dump($match); array(5) { [0]=> string(3) "one" [1]=> string(3) "two" [2]=> string(0) "" [3]=> string(5) "three" [4]=> string(4) "four" }
  15. PREG_SPLIT_NO_EMPTY $pattern = '(\\R)u'; $subject = "one\rtwo\n\nthree\r\nfour"; $match = preg_split($pattern,

    $subject, ‐1, PREG_SPLIT_NO_EMPTY); var_dump($match); array(4) { [0]=> string(3) "one" [1]=> string(3) "two" [2]=> string(5) "three" [3]=> string(4) "four" }
  16. PREG_SPLIT_OFFSET_CAPTURE $pattern = '(\\R)u'; $subject = "one\rtwo\n\nthree"; $flags = PREG_SPLIT_NO_EMPTY

    | PREG_SPLIT_OFFSET_CAPTURE; $match = preg_split($pattern, $subject, ‐1, $flags); var_dump($match); array(3) { [0]=> array(2) { [0]=> string(3) "one" [1]=> int(0) } [1]=> array(2) { [0]=> string(3) "two" [1]=> int(4) } [2]=> array(2) { [0]=> string(5) "three" [1]=> int(9) } }
  17. PREG_SPLIT_DELIM_CAPTURE $highlights = ['small' => '*', 'short' => '_']; $pattern

    = '((small|short))u'; $subject = "A small, short example"; $match = preg_split($pattern, $subject, ‐1, PREG_SPLIT_DELIM_CAPTURE); foreach ($match as $part) { if (isset($highlights[$part])) { echo $highlights[$part], $part, $highlights[$part]; } else { echo $part; } } A *small*, _short_ example
  18. REGEXITERATOR $data = new ArrayIterator(['aa', 'ab']); $iterator = new RegexIterator(

    $data, '(.(.))', RegexIterator::REPLACE ); $iterator‐>replacement = '$1'; var_dump(iterator_to_array($iterator)); array(2) { [0] => string(1) "a" [1] => string(1) "b" }
  19. UNICODE Modifier u All: \X Token: \x{A9} Category: \p{L} Negation:

    \P{L}, \p{^L} Scripts: \p{Hangul} Blocks: \p{Arrows}
  20. UNICODE EXAMPLE $data = <<<'DATA' English German 한국어 日本語 DATA;

    preg_match_all('(\\pL+)u', $data, $match); var_dump($match[0]); array(4) { [0] => string(7) "English" [1] => string(6) "German" [2] => string(9) "한국어" [3] => string(9) "日本語" }
  21. SUBPATTERN MODIFIERS (?i‐sm) $examples = [ ["((?i)foo)", "FOO"], ["((?‐i)foo)i", "FOO"]

    ]; foreach ($examples as $example) { preg_match($example[0], $example[1], $match); var_dump($match); } array(1) { [0]=> string(3) "FOO" } array(0) { }
  22. NAMED SUBPATTERNS $pattern = "(^ (?P<year>\d{4}) (?:‐(?<month>\d{1,2}))? (?:‐(?'day'\d{1,2}))? )x"; preg_match($pattern,

    "2015‐01‐24", $match); var_dump($match);</month></year> array(7) { [0]=> string(10) "2015‐01‐24" ["year"]=> string(4) "2015" [1]=> string(4) "2015" ["month"]=> string(2) "01" [2]=> string(2) "01" ["day"]=> string(2) "24" [3]=> string(2) "24" }
  23. PRE-DEFINED SUBROUTINES $pattern = "( ^ (?&number) (?:\\.(?&number)){3} $ (?(DEFINE)

    (?'number'25[0‐5]|2[1‐4]\d|1\d{2}|\d{1,2}) ) )x"; var_dump((bool)preg_match($pattern, "127.0.0.1", $match)); var_dump((bool)preg_match($pattern, "355.0.0.1", $match)); bool(true) bool(false)
  24. LOOK AHEAD $examples = [ ["(h(?=e))", "hello"], ["(h(?=e)llo)", "hello"], ["(h(?=e).llo)",

    "hello"] ]; foreach ($examples as $example) { preg_match($example[0], $example[1], $match); var_dump($match); } array(1) { [0]=> string(1) "h" } array(0) { } array(1) { [0]=> string(5) "hello" }
  25. LOOK AHEAD - NEGATION $examples = [ ["(h(?!e))", "hello"], ["(h(?!e))",

    "hallo"] ]; foreach ($examples as $example) { preg_match($example[0], $example[1], $match); var_dump($match); } array(0) { } array(1) { [0]=> string(1) "h" }
  26. LOOK BEHIND $examples = [ ["((?<=h).)", "hello"], ["((?<!h).)", "hallo"] ];

    foreach ($examples as $example) { preg_match($example[0], $example[1], $match); var_dump($match); } array(1) { [0]=> string(1) "e" } array(1) { [0]=> string(1) "h" }
  27. LOOK BEHIND - ALTERNATIVES $examples = [ ["((?<=e|ha|.{2})l)", "hello"], ["((?<=e|ha)l)",

    "hallo"], ["((?<=e|.{2})l)", "hallo"] ]; foreach ($examples as $example) { preg_match($example[0], $example[1], $match); var_dump($match); } array(1) { [0]=> string(1) "l" } array(1) { [0]=> string(1) "l" } array(1) { [0]=> string(1) "l" }
  28. LOOK BEHIND - UNKNOWN LENGTH preg_match("((?<=.{2,})l)", 'hello', $match); Warning: preg_match():

    Compilation failed: lookbehind assertion is not fixed length at offset 9 in /tmp... on line 2
  29. CONDITIONALS $pattern = '((?<quote>[\'"])?(?(quote).*?\\k<quote>|\\w+))'; $data = ['foo', '"foo"', "'foo'", 'foo

    bar', '"foo bar"']; foreach ($data as $subject) { if (preg_match($pattern, $subject, $match)) { echo $match[0], "\n"; } }</quote></quote> foo "foo" 'foo' foo "foo bar"
  30. RECURSIONS $pattern = <<<'PCRE' ( \( ( (?>[^()]+) | (?R)

    )* \) )Ux PCRE; preg_match_all($pattern, '(ab(cd)ef)(gh)', $match); var_dump($match); array(2) { [0] => array(2) { [0] => string(10) "(ab(cd)ef)" [1] => string(4) "(gh)" } [1] => array(2) { [0] => string(1) "f" [1] => string(1) "h" } }
  31. START OF PATTERN MODIFIERS (*UTF), (*UTF8), (*UTF16), (*UTF32) (*UTF)(*UCP) =

    u (*CR), (*LF), (*CRLF), (*ANYCRLF), (*ANY) (*BSR_ANYCRLF), (*BSR_UNICODE) - \R (*LIMIT_MATCH=x), (*LIMIT_RECURSION=d) (*NO_AUTO_POSSESS), (*NO_START_OPT) (*NOTEMPTY), (*NOTEMPTY_ATSTART)
  32. VERSIONS PCRE2 10.0 2015-01-05 PCRE 8.36 2014-09-26 3V4L.ORG PHP7, HHVM

    >= 3.3: 8.35 2014-04-04 PHP >= 5.5.10: 8.34 2013-12-15