﻿!!chain;
$CR           = [\p{Word_Break = CR}];
$LF           = [\p{Word_Break = LF}];
$Newline      = [\p{Word_Break = Newline}];
$Extend       = [\p{Word_Break = Extend}];
$Format       = [\p{Word_Break = Format}];
$Katakana     = [\p{Word_Break = Katakana}];
$ALetter      = [\p{Word_Break = ALetter}];
$MidNumLet    = [\p{Word_Break = MidNumLet}];
$MidLetter    = [\p{Word_Break = MidLetter}];
$MidNum       = [\p{Word_Break = MidNum}];
$Numeric      = [\p{Word_Break = Numeric}];
$ExtendNumLet = [\p{Word_Break = ExtendNumLet}];
$dictionary   = [:LineBreak = Complex_Context:];
$Control      = [\p{Grapheme_Cluster_Break = Control}];
$ALetterPlus  = [$ALetter [$dictionary-$Extend-$Control]];
$KatakanaEx     = $Katakana     ($Extend |  $Format)*;
$ALetterEx      = $ALetterPlus  ($Extend |  $Format)*;
$MidNumLetEx    = $MidNumLet    ($Extend |  $Format)*;
$MidLetterEx    = $MidLetter    ($Extend |  $Format)*;
$MidNumEx       = $MidNum       ($Extend |  $Format)*;
$NumericEx      = $Numeric      ($Extend |  $Format)*;
$ExtendNumLetEx = $ExtendNumLet ($Extend |  $Format)*;
$Hiragana       = [\p{script=Hiragana}];
$Ideographic    = [\p{Ideographic}];
$HiraganaEx     = $Hiragana     ($Extend |  $Format)*;
$IdeographicEx  = $Ideographic  ($Extend |  $Format)*;
# ============= Custom Rules ================
# XML markup: A run begins with < and ends with the first matching >
$XmlMarkup = \<[^\>]+\>;
# Abbreviation: Default abbreviation pattern or language-specific list-based pattern.
$Abbreviation = %abbreviations%;
# Hyphenated Word : sequence of letter or digit, (punctuated by [/+&_-], with following letter or digit sequence)+
$HyphenatedWord = [A-Za-z0-9]+([\-\+\&_][A-Za-z0-9]+)+;
# Contraction
$Contraction = [A-Za-z]+\'[A-Za-z0-9]+;
# Dollar amount
$DollarAmount = \$[0-9\,\.]+;
# Email address: sequence of letters, digits and punctuation followed by @ and followed by another sequence
$EmailAddress = [A-Za-z0-9_\-\.]+\@[A-Za-z][A-Za-z0-9_]+\.[a-z]+;
# Internet Addresses: http://www.foo.com(/bar)
$InternetAddress = [a-z]+\:\/\/[a-z0-9]+(\.[a-z0-9]+)+(\/[a-z0-9][a-z0-9\.]+);
# Emoticon: A run that starts with :;B8{[ and contains only one or more of the following -=/{})(
$Emoticon = [B8\:\;\{\[][-=\/\{\}\)\(]+;
# Internet IP Address - a block of 4 numbers of max 3 numbers each separated by period
$InternetIpAddress = [0-9]+\.[0-9]+\.[0-9]+\.[0-9]+;
# Internet Site Address - such as www.ibm.com
$InternetSiteAddress = [a-z][a-z0-9]*(\.[a-z0-9])+;

!!forward;
$CR $LF;
[^$CR $LF $Newline]? ($Extend |  $Format)+;
$NumericEx {100};
$ALetterEx {200};
$KatakanaEx {300};
$HiraganaEx {300};
$IdeographicEx {400};
$ALetterEx $ALetterEx {200};
$ALetterEx ($MidLetterEx | $MidNumLetEx) $ALetterEx {200};
$NumericEx $NumericEx {100};
$ALetterEx $NumericEx {200};
$NumericEx $ALetterEx {200};
$NumericEx ($MidNumEx | $MidNumLetEx) $NumericEx {100};
$KatakanaEx  $KatakanaEx {300};
$ALetterEx      $ExtendNumLetEx {200};
$NumericEx      $ExtendNumLetEx {100};
$KatakanaEx     $ExtendNumLetEx {300};
$ExtendNumLetEx $ExtendNumLetEx {200};
$ExtendNumLetEx $ALetterEx  {200};
$ExtendNumLetEx $NumericEx  {100};
$ExtendNumLetEx $KatakanaEx {300};
# =========== Custom Forwards ====================
$Abbreviation {500};
$HyphenatedWord {501};
$EmailAddress {502};
$InternetAddress {503};
$XmlMarkup {504};
$Emoticon {505};
$InternetIpAddress {506};
$InternetSiteAddress {507};
$Contraction {508};

!!reverse;
$BackALetterEx     = ($Format | $Extend)* $ALetterPlus;
$BackMidNumLetEx   = ($Format | $Extend)* $MidNumLet;
$BackNumericEx     = ($Format | $Extend)* $Numeric;
$BackMidNumEx      = ($Format | $Extend)* $MidNum;
$BackMidLetterEx   = ($Format | $Extend)* $MidLetter;
$BackKatakanaEx    = ($Format | $Extend)* $Katakana;
$BackExtendNumLetEx= ($Format | $Extend)* $ExtendNumLet;
$LF $CR;
($Format | $Extend)*  [^$CR $LF $Newline]?;
$BackALetterEx $BackALetterEx;
$BackALetterEx ($BackMidLetterEx | $BackMidNumLetEx) $BackALetterEx;
$BackNumericEx $BackNumericEx;
$BackNumericEx $BackALetterEx;
$BackALetterEx $BackNumericEx;
$BackNumericEx ($BackMidNumEx | $BackMidNumLetEx) $BackNumericEx;
$BackKatakanaEx $BackKatakanaEx;
$BackExtendNumLetEx ($BackALetterEx | $BackNumericEx | $BackKatakanaEx | $BackExtendNumLetEx);
($BackALetterEx | $BackNumericEx | $BackKatakanaEx) $BackExtendNumLetEx;

!!safe_reverse;
($Extend | $Format)+ .?;
($MidLetter | $MidNumLet) $BackALetterEx;
($MidNum | $MidNumLet) $BackNumericEx;
$dictionary $dictionary;

!!safe_forward;
($Extend | $Format)+ .?;
($MidLetterEx | $MidNumLetEx) $ALetterEx;
($MidNumEx | $MidNumLetEx) $NumericEx;
$dictionary $dictionary;
