1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351
// Copyright 2015 Google Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//! CommonMark punctuation set based on spec and Unicode properties.
// Autogenerated by mk_puncttable.py
const PUNCT_MASKS_ASCII: [u16; 8] = [
0x0000, // U+0000...U+000F
0x0000, // U+0010...U+001F
0xfffe, // U+0020...U+002F
0xfc00, // U+0030...U+003F
0x0001, // U+0040...U+004F
0xf800, // U+0050...U+005F
0x0001, // U+0060...U+006F
0x7800, // U+0070...U+007F
];
const PUNCT_TAB: [u16; 132] = [
10, // U+00A0...U+00AF
11, // U+00B0...U+00BF
55, // U+0370...U+037F
56, // U+0380...U+038F
85, // U+0550...U+055F
88, // U+0580...U+058F
91, // U+05B0...U+05BF
92, // U+05C0...U+05CF
95, // U+05F0...U+05FF
96, // U+0600...U+060F
97, // U+0610...U+061F
102, // U+0660...U+066F
109, // U+06D0...U+06DF
112, // U+0700...U+070F
127, // U+07F0...U+07FF
131, // U+0830...U+083F
133, // U+0850...U+085F
150, // U+0960...U+096F
151, // U+0970...U+097F
175, // U+0AF0...U+0AFF
223, // U+0DF0...U+0DFF
228, // U+0E40...U+0E4F
229, // U+0E50...U+0E5F
240, // U+0F00...U+0F0F
241, // U+0F10...U+0F1F
243, // U+0F30...U+0F3F
248, // U+0F80...U+0F8F
253, // U+0FD0...U+0FDF
260, // U+1040...U+104F
271, // U+10F0...U+10FF
310, // U+1360...U+136F
320, // U+1400...U+140F
358, // U+1660...U+166F
361, // U+1690...U+169F
366, // U+16E0...U+16EF
371, // U+1730...U+173F
381, // U+17D0...U+17DF
384, // U+1800...U+180F
404, // U+1940...U+194F
417, // U+1A10...U+1A1F
426, // U+1AA0...U+1AAF
437, // U+1B50...U+1B5F
438, // U+1B60...U+1B6F
447, // U+1BF0...U+1BFF
451, // U+1C30...U+1C3F
455, // U+1C70...U+1C7F
460, // U+1CC0...U+1CCF
461, // U+1CD0...U+1CDF
513, // U+2010...U+201F
514, // U+2020...U+202F
515, // U+2030...U+203F
516, // U+2040...U+204F
517, // U+2050...U+205F
519, // U+2070...U+207F
520, // U+2080...U+208F
560, // U+2300...U+230F
562, // U+2320...U+232F
630, // U+2760...U+276F
631, // U+2770...U+277F
636, // U+27C0...U+27CF
638, // U+27E0...U+27EF
664, // U+2980...U+298F
665, // U+2990...U+299F
669, // U+29D0...U+29DF
671, // U+29F0...U+29FF
719, // U+2CF0...U+2CFF
727, // U+2D70...U+2D7F
736, // U+2E00...U+2E0F
737, // U+2E10...U+2E1F
738, // U+2E20...U+2E2F
739, // U+2E30...U+2E3F
740, // U+2E40...U+2E4F
768, // U+3000...U+300F
769, // U+3010...U+301F
771, // U+3030...U+303F
778, // U+30A0...U+30AF
783, // U+30F0...U+30FF
2639, // U+A4F0...U+A4FF
2656, // U+A600...U+A60F
2663, // U+A670...U+A67F
2671, // U+A6F0...U+A6FF
2695, // U+A870...U+A87F
2700, // U+A8C0...U+A8CF
2703, // U+A8F0...U+A8FF
2706, // U+A920...U+A92F
2709, // U+A950...U+A95F
2716, // U+A9C0...U+A9CF
2717, // U+A9D0...U+A9DF
2725, // U+AA50...U+AA5F
2733, // U+AAD0...U+AADF
2735, // U+AAF0...U+AAFF
2750, // U+ABE0...U+ABEF
4051, // U+FD30...U+FD3F
4065, // U+FE10...U+FE1F
4067, // U+FE30...U+FE3F
4068, // U+FE40...U+FE4F
4069, // U+FE50...U+FE5F
4070, // U+FE60...U+FE6F
4080, // U+FF00...U+FF0F
4081, // U+FF10...U+FF1F
4082, // U+FF20...U+FF2F
4083, // U+FF30...U+FF3F
4085, // U+FF50...U+FF5F
4086, // U+FF60...U+FF6F
4112, // U+10100...U+1010F
4153, // U+10390...U+1039F
4157, // U+103D0...U+103DF
4182, // U+10560...U+1056F
4229, // U+10850...U+1085F
4241, // U+10910...U+1091F
4243, // U+10930...U+1093F
4261, // U+10A50...U+10A5F
4263, // U+10A70...U+10A7F
4271, // U+10AF0...U+10AFF
4275, // U+10B30...U+10B3F
4281, // U+10B90...U+10B9F
4356, // U+11040...U+1104F
4363, // U+110B0...U+110BF
4364, // U+110C0...U+110CF
4372, // U+11140...U+1114F
4375, // U+11170...U+1117F
4380, // U+111C0...U+111CF
4387, // U+11230...U+1123F
4428, // U+114C0...U+114CF
4444, // U+115C0...U+115CF
4452, // U+11640...U+1164F
4679, // U+12470...U+1247F
5798, // U+16A60...U+16A6F
5807, // U+16AF0...U+16AFF
5811, // U+16B30...U+16B3F
5812, // U+16B40...U+16B4F
7113, // U+1BC90...U+1BC9F
];
const PUNCT_MASKS: [u16; 132] = [
0x0882, // U+00A0...U+00AF
0x88c0, // U+00B0...U+00BF
0x4000, // U+0370...U+037F
0x0080, // U+0380...U+038F
0xfc00, // U+0550...U+055F
0x0600, // U+0580...U+058F
0x4000, // U+05B0...U+05BF
0x0049, // U+05C0...U+05CF
0x0018, // U+05F0...U+05FF
0x3600, // U+0600...U+060F
0xc800, // U+0610...U+061F
0x3c00, // U+0660...U+066F
0x0010, // U+06D0...U+06DF
0x3fff, // U+0700...U+070F
0x0380, // U+07F0...U+07FF
0x7fff, // U+0830...U+083F
0x4000, // U+0850...U+085F
0x0030, // U+0960...U+096F
0x0001, // U+0970...U+097F
0x0001, // U+0AF0...U+0AFF
0x0010, // U+0DF0...U+0DFF
0x8000, // U+0E40...U+0E4F
0x0c00, // U+0E50...U+0E5F
0xfff0, // U+0F00...U+0F0F
0x0017, // U+0F10...U+0F1F
0x3c00, // U+0F30...U+0F3F
0x0020, // U+0F80...U+0F8F
0x061f, // U+0FD0...U+0FDF
0xfc00, // U+1040...U+104F
0x0800, // U+10F0...U+10FF
0x01ff, // U+1360...U+136F
0x0001, // U+1400...U+140F
0x6000, // U+1660...U+166F
0x1800, // U+1690...U+169F
0x3800, // U+16E0...U+16EF
0x0060, // U+1730...U+173F
0x0770, // U+17D0...U+17DF
0x07ff, // U+1800...U+180F
0x0030, // U+1940...U+194F
0xc000, // U+1A10...U+1A1F
0x3f7f, // U+1AA0...U+1AAF
0xfc00, // U+1B50...U+1B5F
0x0001, // U+1B60...U+1B6F
0xf000, // U+1BF0...U+1BFF
0xf800, // U+1C30...U+1C3F
0xc000, // U+1C70...U+1C7F
0x00ff, // U+1CC0...U+1CCF
0x0008, // U+1CD0...U+1CDF
0xffff, // U+2010...U+201F
0x00ff, // U+2020...U+202F
0xffff, // U+2030...U+203F
0xffef, // U+2040...U+204F
0x7ffb, // U+2050...U+205F
0x6000, // U+2070...U+207F
0x6000, // U+2080...U+208F
0x0f00, // U+2300...U+230F
0x0600, // U+2320...U+232F
0xff00, // U+2760...U+276F
0x003f, // U+2770...U+277F
0x0060, // U+27C0...U+27CF
0xffc0, // U+27E0...U+27EF
0xfff8, // U+2980...U+298F
0x01ff, // U+2990...U+299F
0x0f00, // U+29D0...U+29DF
0x3000, // U+29F0...U+29FF
0xde00, // U+2CF0...U+2CFF
0x0001, // U+2D70...U+2D7F
0xffff, // U+2E00...U+2E0F
0xffff, // U+2E10...U+2E1F
0x7fff, // U+2E20...U+2E2F
0xffff, // U+2E30...U+2E3F
0x0007, // U+2E40...U+2E4F
0xff0e, // U+3000...U+300F
0xfff3, // U+3010...U+301F
0x2001, // U+3030...U+303F
0x0001, // U+30A0...U+30AF
0x0800, // U+30F0...U+30FF
0xc000, // U+A4F0...U+A4FF
0xe000, // U+A600...U+A60F
0x4008, // U+A670...U+A67F
0x00fc, // U+A6F0...U+A6FF
0x00f0, // U+A870...U+A87F
0xc000, // U+A8C0...U+A8CF
0x0700, // U+A8F0...U+A8FF
0xc000, // U+A920...U+A92F
0x8000, // U+A950...U+A95F
0x3ffe, // U+A9C0...U+A9CF
0xc000, // U+A9D0...U+A9DF
0xf000, // U+AA50...U+AA5F
0xc000, // U+AAD0...U+AADF
0x0003, // U+AAF0...U+AAFF
0x0800, // U+ABE0...U+ABEF
0xc000, // U+FD30...U+FD3F
0x03ff, // U+FE10...U+FE1F
0xffff, // U+FE30...U+FE3F
0xffff, // U+FE40...U+FE4F
0xfff7, // U+FE50...U+FE5F
0x0d0b, // U+FE60...U+FE6F
0xf7ee, // U+FF00...U+FF0F
0x8c00, // U+FF10...U+FF1F
0x0001, // U+FF20...U+FF2F
0xb800, // U+FF30...U+FF3F
0xa800, // U+FF50...U+FF5F
0x003f, // U+FF60...U+FF6F
0x0007, // U+10100...U+1010F
0x8000, // U+10390...U+1039F
0x0001, // U+103D0...U+103DF
0x8000, // U+10560...U+1056F
0x0080, // U+10850...U+1085F
0x8000, // U+10910...U+1091F
0x8000, // U+10930...U+1093F
0x01ff, // U+10A50...U+10A5F
0x8000, // U+10A70...U+10A7F
0x007f, // U+10AF0...U+10AFF
0xfe00, // U+10B30...U+10B3F
0x1e00, // U+10B90...U+10B9F
0x3f80, // U+11040...U+1104F
0xd800, // U+110B0...U+110BF
0x0003, // U+110C0...U+110CF
0x000f, // U+11140...U+1114F
0x0030, // U+11170...U+1117F
0x21e0, // U+111C0...U+111CF
0x3f00, // U+11230...U+1123F
0x0040, // U+114C0...U+114CF
0x03fe, // U+115C0...U+115CF
0x000e, // U+11640...U+1164F
0x001f, // U+12470...U+1247F
0xc000, // U+16A60...U+16A6F
0x0020, // U+16AF0...U+16AFF
0x0f80, // U+16B30...U+16B3F
0x0010, // U+16B40...U+16B4F
0x8000, // U+1BC90...U+1BC9F
];
pub(crate) fn is_ascii_punctuation(c: u8) -> bool {
c < 128 && (PUNCT_MASKS_ASCII[(c / 16) as usize] & (1 << (c & 15))) != 0
}
pub(crate) fn is_punctuation(c: char) -> bool {
let cp = c as u32;
if cp < 128 {
return is_ascii_punctuation(cp as u8);
}
if cp > 0x1BC9F {
return false;
}
let high = (cp / 16) as u16;
match PUNCT_TAB.binary_search(&high) {
Ok(index) => (PUNCT_MASKS[index] & (1 << (cp & 15))) != 0,
_ => false,
}
}
#[cfg(test)]
mod tests {
use super::{is_ascii_punctuation, is_punctuation};
#[test]
fn test_ascii() {
assert!(is_ascii_punctuation(b'!'));
assert!(is_ascii_punctuation(b'@'));
assert!(is_ascii_punctuation(b'~'));
assert!(!is_ascii_punctuation(b' '));
assert!(!is_ascii_punctuation(b'0'));
assert!(!is_ascii_punctuation(b'A'));
assert!(!is_ascii_punctuation(0xA1));
}
#[test]
fn test_unicode() {
assert!(is_punctuation('~'));
assert!(!is_punctuation(' '));
assert!(is_punctuation('\u{00A1}'));
assert!(is_punctuation('\u{060C}'));
assert!(is_punctuation('\u{FF65}'));
assert!(is_punctuation('\u{1BC9F}'));
assert!(!is_punctuation('\u{1BCA0}'));
}
}