100.00% Lines (47/47) 100.00% Functions (14/14)
TLA Baseline Branch
Line Hits Code Line Hits Code
1   // 1   //
2   // Copyright (c) 2021 Vinnie Falco (vinnie dot falco at gmail dot com) 2   // Copyright (c) 2021 Vinnie Falco (vinnie dot falco at gmail dot com)
3   // 3   //
4   // Distributed under the Boost Software License, Version 1.0. (See accompanying 4   // Distributed under the Boost Software License, Version 1.0. (See accompanying
5   // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 5   // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6   // 6   //
7   // Official repository: https://github.com/boostorg/url 7   // Official repository: https://github.com/boostorg/url
8   // 8   //
9   9  
10   #ifndef BOOST_URL_GRAMMAR_LUT_CHARS_HPP 10   #ifndef BOOST_URL_GRAMMAR_LUT_CHARS_HPP
11   #define BOOST_URL_GRAMMAR_LUT_CHARS_HPP 11   #define BOOST_URL_GRAMMAR_LUT_CHARS_HPP
12   12  
13   #include <boost/url/detail/config.hpp> 13   #include <boost/url/detail/config.hpp>
14   #include <boost/url/grammar/detail/charset.hpp> 14   #include <boost/url/grammar/detail/charset.hpp>
15   #include <cstdint> 15   #include <cstdint>
16   #include <type_traits> 16   #include <type_traits>
17   17  
18   // Credit to Peter Dimov for ideas regarding 18   // Credit to Peter Dimov for ideas regarding
19   // SIMD constexpr, and character set masks. 19   // SIMD constexpr, and character set masks.
20   20  
21   namespace boost { 21   namespace boost {
22   namespace urls { 22   namespace urls {
23   namespace grammar { 23   namespace grammar {
24   24  
25   #ifndef BOOST_URL_DOCS 25   #ifndef BOOST_URL_DOCS
26   namespace detail { 26   namespace detail {
27   template<class T, class = void> 27   template<class T, class = void>
28   struct is_pred : std::false_type {}; 28   struct is_pred : std::false_type {};
29   29  
30   template<class T> 30   template<class T>
31   struct is_pred<T, void_t< 31   struct is_pred<T, void_t<
32   decltype( 32   decltype(
33   std::declval<bool&>() = 33   std::declval<bool&>() =
34   std::declval<T const&>().operator()( 34   std::declval<T const&>().operator()(
35   std::declval<char>()) 35   std::declval<char>())
36   ) > > : std::true_type 36   ) > > : std::true_type
37   { 37   {
38   }; 38   };
39   } // detail 39   } // detail
40   #endif 40   #endif
41   41  
42   /** A set of characters 42   /** A set of characters
43   43  
44   The characters defined by instances of 44   The characters defined by instances of
45   this set are provided upon construction. 45   this set are provided upon construction.
46   The `constexpr` implementation allows 46   The `constexpr` implementation allows
47   these to become compile-time constants. 47   these to become compile-time constants.
48   48  
49   @par Example 49   @par Example
50   Character sets are used with rules and the 50   Character sets are used with rules and the
51   functions @ref find_if and @ref find_if_not. 51   functions @ref find_if and @ref find_if_not.
52   @code 52   @code
53   constexpr lut_chars vowel_chars = "AEIOU" "aeiou"; 53   constexpr lut_chars vowel_chars = "AEIOU" "aeiou";
54   54  
55   system::result< core::string_view > rv = parse( "Aiea", token_rule( vowel_chars ) ); 55   system::result< core::string_view > rv = parse( "Aiea", token_rule( vowel_chars ) );
56   @endcode 56   @endcode
57   57  
58   @see 58   @see
59   @ref find_if, 59   @ref find_if,
60   @ref find_if_not, 60   @ref find_if_not,
61   @ref parse, 61   @ref parse,
62   @ref token_rule. 62   @ref token_rule.
63   */ 63   */
64   class lut_chars 64   class lut_chars
65   { 65   {
66   std::uint64_t mask_[4] = {}; 66   std::uint64_t mask_[4] = {};
67   67  
68   constexpr 68   constexpr
69   static 69   static
70   std::uint64_t 70   std::uint64_t
HITCBC 71   2427180 lo(char c) noexcept 71   2427242 lo(char c) noexcept
72   { 72   {
HITCBC 73   2427180 return static_cast< 73   2427242 return static_cast<
HITCBC 74   2427180 unsigned char>(c) & 3; 74   2427242 unsigned char>(c) & 3;
75   } 75   }
76   76  
77   constexpr 77   constexpr
78   static 78   static
79   std::uint64_t 79   std::uint64_t
HITCBC 80   2402409 hi(char c) noexcept 80   2402471 hi(char c) noexcept
81   { 81   {
HITCBC 82   2402409 return 1ULL << (static_cast< 82   2402471 return 1ULL << (static_cast<
HITCBC 83   2402409 unsigned char>(c) >> 2); 83   2402471 unsigned char>(c) >> 2);
84   } 84   }
85   85  
86   constexpr 86   constexpr
87   static 87   static
88   lut_chars 88   lut_chars
89   construct( 89   construct(
90   char const* s) noexcept 90   char const* s) noexcept
91   { 91   {
92   return *s 92   return *s
93   ? lut_chars(*s) + 93   ? lut_chars(*s) +
94   construct(s+1) 94   construct(s+1)
95   : lut_chars(); 95   : lut_chars();
96   } 96   }
97   97  
98   constexpr 98   constexpr
99   static 99   static
100   lut_chars 100   lut_chars
HITCBC 101   44800 construct( 101   44800 construct(
102   unsigned char ch, 102   unsigned char ch,
103   bool b) noexcept 103   bool b) noexcept
104   { 104   {
105   return b 105   return b
HITCBC 106   44800 ? lut_chars(ch) 106   44800 ? lut_chars(ch)
HITCBC 107   157192 : lut_chars(); 107   157192 : lut_chars();
108   } 108   }
109   109  
110   template<class Pred> 110   template<class Pred>
111   constexpr 111   constexpr
112   static 112   static
113   lut_chars 113   lut_chars
HITCBC 114   44800 construct( 114   44800 construct(
115   Pred pred, 115   Pred pred,
116   unsigned char ch) noexcept 116   unsigned char ch) noexcept
117   { 117   {
118   return ch == 255 118   return ch == 255
HITCBC 119   44800 ? construct(ch, pred(static_cast<char>(ch))) 119   44800 ? construct(ch, pred(static_cast<char>(ch)))
HITCBC 120   44625 : construct(ch, pred(static_cast<char>(ch))) + 120   44625 : construct(ch, pred(static_cast<char>(ch))) +
HITCBC 121   89425 construct(pred, ch + 1); 121   89425 construct(pred, ch + 1);
122   } 122   }
123   123  
124   constexpr 124   constexpr
HITCBC 125   37464 lut_chars() = default; 125   37464 lut_chars() = default;
126   126  
127   constexpr 127   constexpr
HITCBC 128   45223 lut_chars( 128   45223 lut_chars(
129   std::uint64_t m0, 129   std::uint64_t m0,
130   std::uint64_t m1, 130   std::uint64_t m1,
131   std::uint64_t m2, 131   std::uint64_t m2,
132   std::uint64_t m3) noexcept 132   std::uint64_t m3) noexcept
HITCBC 133   45223 : mask_{ m0, m1, m2, m3 } 133   45223 : mask_{ m0, m1, m2, m3 }
134   { 134   {
HITCBC 135   45223 } 135   45223 }
136   136  
137   public: 137   public:
138   /** Constructor 138   /** Constructor
139   139  
140   This function constructs a character 140   This function constructs a character
141   set which has as a single member, 141   set which has as a single member,
142   the character `ch`. 142   the character `ch`.
143   143  
144   @par Example 144   @par Example
145   @code 145   @code
146   constexpr lut_chars asterisk( '*' ); 146   constexpr lut_chars asterisk( '*' );
147   @endcode 147   @endcode
148   148  
149   @par Complexity 149   @par Complexity
150   Constant. 150   Constant.
151   151  
152   @par Exception Safety 152   @par Exception Safety
153   Throws nothing. 153   Throws nothing.
154   154  
155   @param ch A character. 155   @param ch A character.
156   */ 156   */
157   constexpr 157   constexpr
HITCBC 158   8257 lut_chars(char ch) noexcept 158   8257 lut_chars(char ch) noexcept
HITCBC 159   41285 : mask_ { 159   41285 : mask_ {
HITCBC 160   8257 lo(ch) == 0 ? hi(ch) : 0, 160   8257 lo(ch) == 0 ? hi(ch) : 0,
HITCBC 161   8257 lo(ch) == 1 ? hi(ch) : 0, 161   8257 lo(ch) == 1 ? hi(ch) : 0,
HITCBC 162   8257 lo(ch) == 2 ? hi(ch) : 0, 162   8257 lo(ch) == 2 ? hi(ch) : 0,
HITCBC 163   8257 lo(ch) == 3 ? hi(ch) : 0 } 163   8257 lo(ch) == 3 ? hi(ch) : 0 }
164   { 164   {
HITCBC 165   8257 } 165   8257 }
166   166  
167   /** Constructor 167   /** Constructor
168   168  
169   This function constructs a character 169   This function constructs a character
170   set which has as members, all of the 170   set which has as members, all of the
171   characters present in the null-terminated 171   characters present in the null-terminated
172   string `s`. 172   string `s`.
173   173  
174   @par Example 174   @par Example
175   @code 175   @code
176   constexpr lut_chars digits = "0123456789"; 176   constexpr lut_chars digits = "0123456789";
177   @endcode 177   @endcode
178   178  
179   @par Complexity 179   @par Complexity
180   Linear in `::strlen(s)`, or constant 180   Linear in `::strlen(s)`, or constant
181   if `s` is a constant expression. 181   if `s` is a constant expression.
182   182  
183   @par Exception Safety 183   @par Exception Safety
184   Throws nothing. 184   Throws nothing.
185   185  
186   @param s A null-terminated string. 186   @param s A null-terminated string.
187   */ 187   */
188   constexpr 188   constexpr
189   lut_chars( 189   lut_chars(
190   char const* s) noexcept 190   char const* s) noexcept
191   : lut_chars(construct(s)) 191   : lut_chars(construct(s))
192   { 192   {
193   } 193   }
194   194  
195   /** Constructor. 195   /** Constructor.
196   196  
197   This function constructs a character 197   This function constructs a character
198   set which has as members, every value 198   set which has as members, every value
199   of `char ch` for which the expression 199   of `char ch` for which the expression
200   `pred(ch)` returns `true`. 200   `pred(ch)` returns `true`.
201   201  
202   @par Example 202   @par Example
203   @code 203   @code
204   struct is_digit 204   struct is_digit
205   { 205   {
206   constexpr bool 206   constexpr bool
207   operator()(char c ) const noexcept 207   operator()(char c ) const noexcept
208   { 208   {
209   return c >= '0' && c <= '9'; 209   return c >= '0' && c <= '9';
210   } 210   }
211   }; 211   };
212   212  
213   constexpr lut_chars digits( is_digit{} ); 213   constexpr lut_chars digits( is_digit{} );
214   @endcode 214   @endcode
215   215  
216   @par Complexity 216   @par Complexity
217   Linear in `pred`, or constant if 217   Linear in `pred`, or constant if
218   `pred(ch)` is a constant expression. 218   `pred(ch)` is a constant expression.
219   219  
220   @par Exception Safety 220   @par Exception Safety
221   Throws nothing. 221   Throws nothing.
222   222  
223   @param pred The function object to 223   @param pred The function object to
224   use for determining membership in 224   use for determining membership in
225   the character set. 225   the character set.
226   */ 226   */
227   template<class Pred 227   template<class Pred
228   #ifndef BOOST_URL_DOCS 228   #ifndef BOOST_URL_DOCS
229   ,class = typename std::enable_if< 229   ,class = typename std::enable_if<
230   detail::is_pred<Pred>::value && 230   detail::is_pred<Pred>::value &&
231   ! std::is_base_of< 231   ! std::is_base_of<
232   lut_chars, Pred>::value>::type 232   lut_chars, Pred>::value>::type
233   #endif 233   #endif
234   > 234   >
235   constexpr 235   constexpr
HITCBC 236   175 lut_chars(Pred const& pred) noexcept 236   175 lut_chars(Pred const& pred) noexcept
237   : lut_chars( 237   : lut_chars(
HITCBC 238   175 construct(pred, 0)) 238   175 construct(pred, 0))
239   { 239   {
HITCBC 240   175 } 240   175 }
241   241  
242   /** Return true if ch is in the character set. 242   /** Return true if ch is in the character set.
243   243  
244   This function returns true if the 244   This function returns true if the
245   character `ch` is in the set, otherwise 245   character `ch` is in the set, otherwise
246   it returns false. 246   it returns false.
247   247  
248   @par Complexity 248   @par Complexity
249   Constant. 249   Constant.
250   250  
251   @par Exception Safety 251   @par Exception Safety
252   Throws nothing. 252   Throws nothing.
253   253  
254   @param ch The character to test. 254   @param ch The character to test.
255   @return `true` if `ch` is in the set. 255   @return `true` if `ch` is in the set.
256   */ 256   */
257   constexpr 257   constexpr
258   bool 258   bool
HITCBC 259   1280 operator()( 259   1280 operator()(
260   unsigned char ch) const noexcept 260   unsigned char ch) const noexcept
261   { 261   {
HITCBC 262   1280 return operator()(static_cast<char>(ch)); 262   1280 return operator()(static_cast<char>(ch));
263   } 263   }
264   264  
265   /// @copydoc operator()(unsigned char) const 265   /// @copydoc operator()(unsigned char) const
266   constexpr 266   constexpr
267   bool 267   bool
HITCBC 268   2394152 operator()(char ch) const noexcept 268   2394214 operator()(char ch) const noexcept
269   { 269   {
HITCBC 270   2394152 return mask_[lo(ch)] & hi(ch); 270   2394214 return mask_[lo(ch)] & hi(ch);
271   } 271   }
272   272  
273   /** Return the union of two character sets. 273   /** Return the union of two character sets.
274   274  
275   This function returns a new character 275   This function returns a new character
276   set which contains all of the characters 276   set which contains all of the characters
277   in `cs0` as well as all of the characters 277   in `cs0` as well as all of the characters
278   in `cs`. 278   in `cs`.
279   279  
280   @par Example 280   @par Example
281   This creates a character set which 281   This creates a character set which
282   includes all letters and numbers 282   includes all letters and numbers
283   @code 283   @code
284   constexpr lut_chars alpha_chars( 284   constexpr lut_chars alpha_chars(
285   "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 285   "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
286   "abcdefghijklmnopqrstuvwxyz"); 286   "abcdefghijklmnopqrstuvwxyz");
287   287  
288   constexpr lut_chars alnum_chars = alpha_chars + "0123456789"; 288   constexpr lut_chars alnum_chars = alpha_chars + "0123456789";
289   @endcode 289   @endcode
290   290  
291   @par Complexity 291   @par Complexity
292   Constant. 292   Constant.
293   293  
294   @return The new character set. 294   @return The new character set.
295   295  
296   @param cs0 A character to join 296   @param cs0 A character to join
297   297  
298   @param cs1 A character to join 298   @param cs1 A character to join
299   */ 299   */
300   friend 300   friend
301   constexpr 301   constexpr
302   lut_chars 302   lut_chars
HITCBC 303   44629 operator+( 303   44629 operator+(
304   lut_chars const& cs0, 304   lut_chars const& cs0,
305   lut_chars const& cs1) noexcept 305   lut_chars const& cs1) noexcept
306   { 306   {
307   return lut_chars( 307   return lut_chars(
HITCBC 308   44629 cs0.mask_[0] | cs1.mask_[0], 308   44629 cs0.mask_[0] | cs1.mask_[0],
HITCBC 309   44629 cs0.mask_[1] | cs1.mask_[1], 309   44629 cs0.mask_[1] | cs1.mask_[1],
HITCBC 310   44629 cs0.mask_[2] | cs1.mask_[2], 310   44629 cs0.mask_[2] | cs1.mask_[2],
HITCBC 311   44629 cs0.mask_[3] | cs1.mask_[3]); 311   44629 cs0.mask_[3] | cs1.mask_[3]);
312   } 312   }
313   313  
314   /** Return a new character set by subtracting 314   /** Return a new character set by subtracting
315   315  
316   This function returns a new character 316   This function returns a new character
317   set which is formed from all of the 317   set which is formed from all of the
318   characters in `cs0` which are not in `cs`. 318   characters in `cs0` which are not in `cs`.
319   319  
320   @par Example 320   @par Example
321   This statement declares a character set 321   This statement declares a character set
322   containing all the lowercase letters 322   containing all the lowercase letters
323   which are not vowels: 323   which are not vowels:
324   @code 324   @code
325   constexpr lut_chars consonants = lut_chars("abcdefghijklmnopqrstuvwxyz") - "aeiou"; 325   constexpr lut_chars consonants = lut_chars("abcdefghijklmnopqrstuvwxyz") - "aeiou";
326   @endcode 326   @endcode
327   327  
328   @par Complexity 328   @par Complexity
329   Constant. 329   Constant.
330   330  
331   @return The new character set. 331   @return The new character set.
332   332  
333   @param cs0 A character set to join. 333   @param cs0 A character set to join.
334   334  
335   @param cs1 A character set to join. 335   @param cs1 A character set to join.
336   */ 336   */
337   friend 337   friend
338   constexpr 338   constexpr
339   lut_chars 339   lut_chars
HITCBC 340   594 operator-( 340   594 operator-(
341   lut_chars const& cs0, 341   lut_chars const& cs0,
342   lut_chars const& cs1) noexcept 342   lut_chars const& cs1) noexcept
343   { 343   {
344   return lut_chars( 344   return lut_chars(
HITCBC 345   594 cs0.mask_[0] & ~cs1.mask_[0], 345   594 cs0.mask_[0] & ~cs1.mask_[0],
HITCBC 346   594 cs0.mask_[1] & ~cs1.mask_[1], 346   594 cs0.mask_[1] & ~cs1.mask_[1],
HITCBC 347   594 cs0.mask_[2] & ~cs1.mask_[2], 347   594 cs0.mask_[2] & ~cs1.mask_[2],
HITCBC 348   594 cs0.mask_[3] & ~cs1.mask_[3]); 348   594 cs0.mask_[3] & ~cs1.mask_[3]);
349   } 349   }
350   350  
351   /** Return a new character set which is the complement of another character set. 351   /** Return a new character set which is the complement of another character set.
352   352  
353   This function returns a new character 353   This function returns a new character
354   set which contains all of the characters 354   set which contains all of the characters
355   that are not in `*this`. 355   that are not in `*this`.
356   356  
357   @par Example 357   @par Example
358   This statement declares a character set 358   This statement declares a character set
359   containing everything but vowels: 359   containing everything but vowels:
360   @code 360   @code
361   constexpr lut_chars not_vowels = ~lut_chars( "AEIOU" "aeiou" ); 361   constexpr lut_chars not_vowels = ~lut_chars( "AEIOU" "aeiou" );
362   @endcode 362   @endcode
363   363  
364   @par Complexity 364   @par Complexity
365   Constant. 365   Constant.
366   366  
367   @par Exception Safety 367   @par Exception Safety
368   Throws nothing. 368   Throws nothing.
369   369  
370   @return The new character set. 370   @return The new character set.
371   */ 371   */
372   constexpr 372   constexpr
373   lut_chars 373   lut_chars
374   operator~() const noexcept 374   operator~() const noexcept
375   { 375   {
376   return lut_chars( 376   return lut_chars(
377   ~mask_[0], 377   ~mask_[0],
378   ~mask_[1], 378   ~mask_[1],
379   ~mask_[2], 379   ~mask_[2],
380   ~mask_[3] 380   ~mask_[3]
381   ); 381   );
382   } 382   }
383   383  
384   #ifndef BOOST_URL_DOCS 384   #ifndef BOOST_URL_DOCS
385   #ifdef BOOST_URL_USE_SSE2 385   #ifdef BOOST_URL_USE_SSE2
386   char const* 386   char const*
HITCBC 387   1603 find_if( 387   1603 find_if(
388   char const* first, 388   char const* first,
389   char const* last) const noexcept 389   char const* last) const noexcept
390   { 390   {
HITCBC 391   1603 return detail::find_if_pred( 391   1603 return detail::find_if_pred(
HITCBC 392   1603 *this, first, last); 392   1603 *this, first, last);
393   } 393   }
394   394  
395   char const* 395   char const*
HITCBC 396   75611 find_if_not( 396   75615 find_if_not(
397   char const* first, 397   char const* first,
398   char const* last) const noexcept 398   char const* last) const noexcept
399   { 399   {
HITCBC 400   75611 return detail::find_if_not_pred( 400   75615 return detail::find_if_not_pred(
HITCBC 401   75611 *this, first, last); 401   75615 *this, first, last);
402   } 402   }
403   #endif 403   #endif
404   #endif 404   #endif
405   }; 405   };
406   406  
407   } // grammar 407   } // grammar
408   } // urls 408   } // urls
409   } // boost 409   } // boost
410   410  
411   #endif 411   #endif