99.30% Lines (427/430) 100.00% Functions (21/21)
TLA Baseline Branch
Line Hits Code Line Hits Code
1   // 1   //
2   // Copyright (c) 2016-2019 Vinnie Falco (vinnie dot falco at gmail dot com) 2   // Copyright (c) 2016-2019 Vinnie Falco (vinnie dot falco at gmail dot com)
3   // Copyright (c) 2022 Alan de Freitas (alandefreitas@gmail.com) 3   // Copyright (c) 2022 Alan de Freitas (alandefreitas@gmail.com)
4   // 4   //
5   // Distributed under the Boost Software License, Version 1.0. (See accompanying 5   // Distributed under the Boost Software License, Version 1.0. (See accompanying
6   // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6   // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
7   // 7   //
8   // Official repository: https://github.com/boostorg/url 8   // Official repository: https://github.com/boostorg/url
9   // 9   //
10   10  
11   11  
12   #include <boost/url/detail/config.hpp> 12   #include <boost/url/detail/config.hpp>
13   #include <boost/url/decode_view.hpp> 13   #include <boost/url/decode_view.hpp>
14   #include <boost/url/detail/decode.hpp> 14   #include <boost/url/detail/decode.hpp>
15   #include <boost/url/segments_encoded_view.hpp> 15   #include <boost/url/segments_encoded_view.hpp>
16   #include <boost/url/grammar/ci_string.hpp> 16   #include <boost/url/grammar/ci_string.hpp>
17   #include <boost/url/grammar/lut_chars.hpp> 17   #include <boost/url/grammar/lut_chars.hpp>
18   #include <boost/assert.hpp> 18   #include <boost/assert.hpp>
19   #include <boost/core/ignore_unused.hpp> 19   #include <boost/core/ignore_unused.hpp>
20   #include <cstring> 20   #include <cstring>
21   #include <boost/url/detail/normalize.hpp> 21   #include <boost/url/detail/normalize.hpp>
22   22  
23   namespace boost { 23   namespace boost {
24   namespace urls { 24   namespace urls {
25   namespace detail { 25   namespace detail {
26   26  
27   void 27   void
HITCBC 28   7772 pop_encoded_front( 28   7772 pop_encoded_front(
29   core::string_view& s, 29   core::string_view& s,
30   char& c, 30   char& c,
31   std::size_t& n) noexcept 31   std::size_t& n) noexcept
32   { 32   {
HITCBC 33   7772 if(s.front() != '%') 33   7772 if(s.front() != '%')
34   { 34   {
HITCBC 35   7620 c = s.front(); 35   7620 c = s.front();
HITCBC 36   7620 s.remove_prefix(1); 36   7620 s.remove_prefix(1);
37   } 37   }
38   else 38   else
39   { 39   {
HITCBC 40   152 detail::decode_unsafe( 40   152 detail::decode_unsafe(
41   &c, 41   &c,
42   &c + 1, 42   &c + 1,
43   s.substr(0, 3)); 43   s.substr(0, 3));
HITCBC 44   152 s.remove_prefix(3); 44   152 s.remove_prefix(3);
45   } 45   }
HITCBC 46   7772 ++n; 46   7772 ++n;
HITCBC 47   7772 } 47   7772 }
48   48  
49   int 49   int
HITCBC 50   64 compare_encoded( 50   64 compare_encoded(
51   core::string_view lhs, 51   core::string_view lhs,
52   core::string_view rhs) noexcept 52   core::string_view rhs) noexcept
53   { 53   {
HITCBC 54   64 std::size_t n0 = 0; 54   64 std::size_t n0 = 0;
HITCBC 55   64 std::size_t n1 = 0; 55   64 std::size_t n1 = 0;
HITCBC 56   64 char c0 = 0; 56   64 char c0 = 0;
HITCBC 57   64 char c1 = 0; 57   64 char c1 = 0;
HITCBC 58   64 while( 58   64 while(
HITCBC 59   486 !lhs.empty() && 59   486 !lhs.empty() &&
HITCBC 60   228 !rhs.empty()) 60   228 !rhs.empty())
61   { 61   {
HITCBC 62   215 pop_encoded_front(lhs, c0, n0); 62   215 pop_encoded_front(lhs, c0, n0);
HITCBC 63   215 pop_encoded_front(rhs, c1, n1); 63   215 pop_encoded_front(rhs, c1, n1);
HITCBC 64   215 if (c0 < c1) 64   215 if (c0 < c1)
HITCBC 65   18 return -1; 65   18 return -1;
HITCBC 66   197 if (c1 < c0) 66   197 if (c1 < c0)
HITCBC 67   3 return 1; 67   3 return 1;
68   } 68   }
HITCBC 69   43 n0 += detail::decode_bytes_unsafe(lhs); 69   43 n0 += detail::decode_bytes_unsafe(lhs);
HITCBC 70   43 n1 += detail::decode_bytes_unsafe(rhs); 70   43 n1 += detail::decode_bytes_unsafe(rhs);
HITCBC 71   43 if (n0 == n1) 71   43 if (n0 == n1)
HITCBC 72   22 return 0; 72   22 return 0;
HITCBC 73   21 if (n0 < n1) 73   21 if (n0 < n1)
HITCBC 74   8 return -1; 74   8 return -1;
HITCBC 75   13 return 1; 75   13 return 1;
76   } 76   }
77   77  
78   int 78   int
HITCBC 79   28 compare_encoded_query( 79   28 compare_encoded_query(
80   core::string_view lhs, 80   core::string_view lhs,
81   core::string_view rhs) noexcept 81   core::string_view rhs) noexcept
82   { 82   {
83   static constexpr 83   static constexpr
84   grammar::lut_chars 84   grammar::lut_chars
85   query_compare_exception_lut = "&=+"; 85   query_compare_exception_lut = "&=+";
86   86  
HITCBC 87   28 std::size_t n0 = 0; 87   28 std::size_t n0 = 0;
HITCBC 88   28 std::size_t n1 = 0; 88   28 std::size_t n1 = 0;
HITCBC 89   28 char c0 = 0; 89   28 char c0 = 0;
HITCBC 90   28 char c1 = 0; 90   28 char c1 = 0;
HITCBC 91   28 while( 91   28 while(
HITCBC 92   254 !lhs.empty() && 92   254 !lhs.empty() &&
HITCBC 93   122 !rhs.empty()) 93   122 !rhs.empty())
94   { 94   {
HITCBC 95   121 bool const lhs_was_decoded = lhs.front() != '%'; 95   121 bool const lhs_was_decoded = lhs.front() != '%';
HITCBC 96   121 bool const rhs_was_decoded = rhs.front() != '%'; 96   121 bool const rhs_was_decoded = rhs.front() != '%';
HITCBC 97   121 pop_encoded_front(lhs, c0, n0); 97   121 pop_encoded_front(lhs, c0, n0);
HITCBC 98   121 pop_encoded_front(rhs, c1, n1); 98   121 pop_encoded_front(rhs, c1, n1);
HITCBC 99   121 if (c0 < c1) 99   121 if (c0 < c1)
HITCBC 100   2 return -1; 100   2 return -1;
HITCBC 101   119 if (c1 < c0) 101   119 if (c1 < c0)
HITCBC 102   12 return 1; 102   12 return 1;
103   // The decoded chars are the same, but 103   // The decoded chars are the same, but
104   // are these query exceptions that have 104   // are these query exceptions that have
105   // different meanings when decoded? 105   // different meanings when decoded?
HITCBC 106   107 if (query_compare_exception_lut(c0)) 106   107 if (query_compare_exception_lut(c0))
107   { 107   {
108   // If so, we only continue if both 108   // If so, we only continue if both
109   // chars were decoded or encoded 109   // chars were decoded or encoded
110   // the same way. 110   // the same way.
HITCBC 111   40 if (lhs_was_decoded == rhs_was_decoded) 111   40 if (lhs_was_decoded == rhs_was_decoded)
HITCBC 112   37 continue; 112   37 continue;
113   // Otherwise, we return a value != 0 113   // Otherwise, we return a value != 0
114   // because these chars are not equal. 114   // because these chars are not equal.
115   // If rhs was the decoded one, it contains 115   // If rhs was the decoded one, it contains
116   // an ascii char higher than '%' 116   // an ascii char higher than '%'
HITCBC 117   3 if (rhs_was_decoded) 117   3 if (rhs_was_decoded)
HITCBC 118   2 return -1; 118   2 return -1;
119   else 119   else
HITCBC 120   1 return 1; 120   1 return 1;
121   } 121   }
122   } 122   }
HITCBC 123   11 n0 += detail::decode_bytes_unsafe(lhs); 123   11 n0 += detail::decode_bytes_unsafe(lhs);
HITCBC 124   11 n1 += detail::decode_bytes_unsafe(rhs); 124   11 n1 += detail::decode_bytes_unsafe(rhs);
HITCBC 125   11 if (n0 == n1) 125   11 if (n0 == n1)
HITCBC 126   9 return 0; 126   9 return 0;
HITCBC 127   2 if (n0 < n1) 127   2 if (n0 < n1)
HITCBC 128   1 return -1; 128   1 return -1;
HITCBC 129   1 return 1; 129   1 return 1;
130   } 130   }
131   131  
132   void 132   void
HITCBC 133   1216 digest_encoded( 133   1216 digest_encoded(
134   core::string_view s, 134   core::string_view s,
135   fnv_1a& hasher) noexcept 135   fnv_1a& hasher) noexcept
136   { 136   {
HITCBC 137   1216 char c = 0; 137   1216 char c = 0;
HITCBC 138   1216 std::size_t n = 0; 138   1216 std::size_t n = 0;
HITCBC 139   1724 while(!s.empty()) 139   1724 while(!s.empty())
140   { 140   {
HITCBC 141   508 pop_encoded_front(s, c, n); 141   508 pop_encoded_front(s, c, n);
HITCBC 142   508 hasher.put(c); 142   508 hasher.put(c);
143   } 143   }
HITCBC 144   1216 } 144   1216 }
145   145  
146   int 146   int
HITCBC 147   180 ci_compare_encoded( 147   180 ci_compare_encoded(
148   core::string_view lhs, 148   core::string_view lhs,
149   core::string_view rhs) noexcept 149   core::string_view rhs) noexcept
150   { 150   {
HITCBC 151   180 std::size_t n0 = 0; 151   180 std::size_t n0 = 0;
HITCBC 152   180 std::size_t n1 = 0; 152   180 std::size_t n1 = 0;
HITCBC 153   180 char c0 = 0; 153   180 char c0 = 0;
HITCBC 154   180 char c1 = 0; 154   180 char c1 = 0;
HITCBC 155   180 while ( 155   180 while (
HITCBC 156   4704 !lhs.empty() && 156   4704 !lhs.empty() &&
HITCBC 157   2271 !rhs.empty()) 157   2271 !rhs.empty())
158   { 158   {
HITCBC 159   2265 pop_encoded_front(lhs, c0, n0); 159   2265 pop_encoded_front(lhs, c0, n0);
HITCBC 160   2265 pop_encoded_front(rhs, c1, n1); 160   2265 pop_encoded_front(rhs, c1, n1);
HITCBC 161   2265 c0 = grammar::to_lower(c0); 161   2265 c0 = grammar::to_lower(c0);
HITCBC 162   2265 c1 = grammar::to_lower(c1); 162   2265 c1 = grammar::to_lower(c1);
HITCBC 163   2265 if (c0 < c1) 163   2265 if (c0 < c1)
HITCBC 164   10 return -1; 164   10 return -1;
HITCBC 165   2255 if (c1 < c0) 165   2255 if (c1 < c0)
HITCBC 166   2 return 1; 166   2 return 1;
167   } 167   }
HITCBC 168   168 n0 += detail::decode_bytes_unsafe(lhs); 168   168 n0 += detail::decode_bytes_unsafe(lhs);
HITCBC 169   168 n1 += detail::decode_bytes_unsafe(rhs); 169   168 n1 += detail::decode_bytes_unsafe(rhs);
HITCBC 170   168 if (n0 == n1) 170   168 if (n0 == n1)
HITCBC 171   161 return 0; 171   161 return 0;
HITCBC 172   7 if (n0 < n1) 172   7 if (n0 < n1)
HITCBC 173   1 return -1; 173   1 return -1;
HITCBC 174   6 return 1; 174   6 return 1;
175   } 175   }
176   176  
177   void 177   void
HITCBC 178   304 ci_digest_encoded( 178   304 ci_digest_encoded(
179   core::string_view s, 179   core::string_view s,
180   fnv_1a& hasher) noexcept 180   fnv_1a& hasher) noexcept
181   { 181   {
HITCBC 182   304 char c = 0; 182   304 char c = 0;
HITCBC 183   304 std::size_t n = 0; 183   304 std::size_t n = 0;
HITCBC 184   2366 while(!s.empty()) 184   2366 while(!s.empty())
185   { 185   {
HITCBC 186   2062 pop_encoded_front(s, c, n); 186   2062 pop_encoded_front(s, c, n);
HITCBC 187   2062 c = grammar::to_lower(c); 187   2062 c = grammar::to_lower(c);
HITCBC 188   2062 hasher.put(c); 188   2062 hasher.put(c);
189   } 189   }
HITCBC 190   304 } 190   304 }
191   191  
192   int 192   int
HITCBC 193   46 compare( 193   46 compare(
194   core::string_view lhs, 194   core::string_view lhs,
195   core::string_view rhs) noexcept 195   core::string_view rhs) noexcept
196   { 196   {
HITCBC 197   46 auto rlen = (std::min)(lhs.size(), rhs.size()); 197   46 auto rlen = (std::min)(lhs.size(), rhs.size());
HITCBC 198   104 for (std::size_t i = 0; i < rlen; ++i) 198   104 for (std::size_t i = 0; i < rlen; ++i)
199   { 199   {
HITCBC 200   79 char c0 = lhs[i]; 200   79 char c0 = lhs[i];
HITCBC 201   79 char c1 = rhs[i]; 201   79 char c1 = rhs[i];
HITCBC 202   79 if (c0 < c1) 202   79 if (c0 < c1)
HITCBC 203   13 return -1; 203   13 return -1;
HITCBC 204   66 if (c1 < c0) 204   66 if (c1 < c0)
HITCBC 205   8 return 1; 205   8 return 1;
206   } 206   }
HITCBC 207   25 if ( lhs.size() == rhs.size() ) 207   25 if ( lhs.size() == rhs.size() )
HITCBC 208   4 return 0; 208   4 return 0;
HITCBC 209   21 if ( lhs.size() < rhs.size() ) 209   21 if ( lhs.size() < rhs.size() )
HITCBC 210   8 return -1; 210   8 return -1;
HITCBC 211   13 return 1; 211   13 return 1;
212   } 212   }
213   213  
214   int 214   int
HITCBC 215   220 ci_compare( 215   220 ci_compare(
216   core::string_view lhs, 216   core::string_view lhs,
217   core::string_view rhs) noexcept 217   core::string_view rhs) noexcept
218   { 218   {
HITCBC 219   220 auto rlen = (std::min)(lhs.size(), rhs.size()); 219   220 auto rlen = (std::min)(lhs.size(), rhs.size());
HITCBC 220   1125 for (std::size_t i = 0; i < rlen; ++i) 220   1125 for (std::size_t i = 0; i < rlen; ++i)
221   { 221   {
HITCBC 222   912 char c0 = grammar::to_lower(lhs[i]); 222   912 char c0 = grammar::to_lower(lhs[i]);
HITCBC 223   912 char c1 = grammar::to_lower(rhs[i]); 223   912 char c1 = grammar::to_lower(rhs[i]);
HITCBC 224   912 if (c0 < c1) 224   912 if (c0 < c1)
HITCBC 225   6 return -1; 225   6 return -1;
HITCBC 226   906 if (c1 < c0) 226   906 if (c1 < c0)
HITCBC 227   1 return 1; 227   1 return 1;
228   } 228   }
HITCBC 229   213 if ( lhs.size() == rhs.size() ) 229   213 if ( lhs.size() == rhs.size() )
HITCBC 230   205 return 0; 230   205 return 0;
HITCBC 231   8 if ( lhs.size() < rhs.size() ) 231   8 if ( lhs.size() < rhs.size() )
HITCBC 232   6 return -1; 232   6 return -1;
HITCBC 233   2 return 1; 233   2 return 1;
234   } 234   }
235   235  
236   void 236   void
HITCBC 237   304 ci_digest( 237   304 ci_digest(
238   core::string_view s, 238   core::string_view s,
239   fnv_1a& hasher) noexcept 239   fnv_1a& hasher) noexcept
240   { 240   {
HITCBC 241   1034 for (char c: s) 241   1034 for (char c: s)
242   { 242   {
HITCBC 243   730 c = grammar::to_lower(c); 243   730 c = grammar::to_lower(c);
HITCBC 244   730 hasher.put(c); 244   730 hasher.put(c);
245   } 245   }
HITCBC 246   304 } 246   304 }
247   247  
248   /* Check if a string ends with the specified suffix (decoded comparison) 248   /* Check if a string ends with the specified suffix (decoded comparison)
249   249  
250   This function determines if a string ends with the specified suffix 250   This function determines if a string ends with the specified suffix
251   when the string and suffix are compared after percent-decoding. 251   when the string and suffix are compared after percent-decoding.
252   252  
253   @param str The string to check (percent-encoded) 253   @param str The string to check (percent-encoded)
254   @param suffix The suffix to check for (percent-decoded) 254   @param suffix The suffix to check for (percent-decoded)
255   @return The number of encoded chars consumed in the string 255   @return The number of encoded chars consumed in the string
256   */ 256   */
257   std::size_t 257   std::size_t
HITCBC 258   2136 path_ends_with( 258   2136 path_ends_with(
259   core::string_view str, 259   core::string_view str,
260   core::string_view suffix) noexcept 260   core::string_view suffix) noexcept
261   { 261   {
HITCBC 262   2136 BOOST_ASSERT(!str.empty()); 262   2136 BOOST_ASSERT(!str.empty());
HITCBC 263   2136 BOOST_ASSERT(!suffix.empty()); 263   2136 BOOST_ASSERT(!suffix.empty());
HITCBC 264   2136 BOOST_ASSERT(!suffix.contains("%2F")); 264   2136 BOOST_ASSERT(!suffix.contains("%2F"));
HITCBC 265   2136 BOOST_ASSERT(!suffix.contains("%2f")); 265   2136 BOOST_ASSERT(!suffix.contains("%2f"));
HITCBC 266   5848 auto consume_last = []( 266   5848 auto consume_last = [](
267   core::string_view::iterator& it, 267   core::string_view::iterator& it,
268   core::string_view::iterator& end, 268   core::string_view::iterator& end,
269   char& c) 269   char& c)
270   { 270   {
HITCBC 271   5848 BOOST_ASSERT(end > it); 271   5848 BOOST_ASSERT(end > it);
HITCBC 272   5848 BOOST_ASSERT(it != end); 272   5848 BOOST_ASSERT(it != end);
HITCBC 273   9808 if ((end - it) < 3 || 273   9808 if ((end - it) < 3 ||
HITCBC 274   7920 *(std::prev(end, 3)) != '%') 274   7920 *(std::prev(end, 3)) != '%')
275   { 275   {
HITCBC 276   5800 c = *--end; 276   5800 c = *--end;
HITCBC 277   5800 return false; 277   5800 return false;
278   } 278   }
HITCBC 279   96 detail::decode_unsafe( 279   96 detail::decode_unsafe(
280   &c, 280   &c,
281   &c + 1, 281   &c + 1,
282   core::string_view(std::prev( 282   core::string_view(std::prev(
283   end, 3), 3)); 283   end, 3), 3));
HITCBC 284   48 end -= 3; 284   48 end -= 3;
HITCBC 285   48 return true; 285   48 return true;
286   }; 286   };
287   287  
HITCBC 288   2136 auto it0 = str.begin(); 288   2136 auto it0 = str.begin();
HITCBC 289   2136 auto end0 = str.end(); 289   2136 auto end0 = str.end();
HITCBC 290   2136 auto it1 = suffix.begin(); 290   2136 auto it1 = suffix.begin();
HITCBC 291   2136 auto end1 = suffix.end(); 291   2136 auto end1 = suffix.end();
HITCBC 292   2136 char c0 = 0; 292   2136 char c0 = 0;
HITCBC 293   2136 char c1 = 0; 293   2136 char c1 = 0;
HITCBC 294   2136 while( 294   2136 while(
HITCBC 295   3248 it0 < end0 && 295   3248 it0 < end0 &&
HITCBC 296   3006 it1 < end1) 296   3006 it1 < end1)
297   { 297   {
HITCBC 298   2932 bool const is_encoded = consume_last(it0, end0, c0); 298   2932 bool const is_encoded = consume_last(it0, end0, c0);
299   // The suffix never contains an encoded slash (%2F), and a decoded 299   // The suffix never contains an encoded slash (%2F), and a decoded
300   // slash is not equivalent to an encoded slash 300   // slash is not equivalent to an encoded slash
HITCBC 301   2932 if (is_encoded && c0 == '/') 301   2932 if (is_encoded && c0 == '/')
HITCBC 302   16 return 0; 302   16 return 0;
HITCBC 303   2916 consume_last(it1, end1, c1); 303   2916 consume_last(it1, end1, c1);
HITCBC 304   2916 if (c0 != c1) 304   2916 if (c0 != c1)
HITCBC 305   1804 return 0; 305   1804 return 0;
306   } 306   }
HITCBC 307   316 bool const consumed_suffix = it1 == end1; 307   316 bool const consumed_suffix = it1 == end1;
HITCBC 308   316 if (consumed_suffix) 308   316 if (consumed_suffix)
309   { 309   {
HITCBC 310   110 std::size_t const consumed_encoded = str.end() - end0; 310   110 std::size_t const consumed_encoded = str.end() - end0;
HITCBC 311   110 return consumed_encoded; 311   110 return consumed_encoded;
312   } 312   }
HITCBC 313   206 return 0; 313   206 return 0;
314   } 314   }
315   315  
316   std::size_t 316   std::size_t
HITCBC 317   1065 remove_dot_segments( 317   1065 remove_dot_segments(
318   char* dest0, 318   char* dest0,
319   char const* end, 319   char const* end,
320   core::string_view input) noexcept 320   core::string_view input) noexcept
321   { 321   {
322   // 1. The input buffer `s` is initialized with 322   // 1. The input buffer `s` is initialized with
323   // the now-appended path components and the 323   // the now-appended path components and the
324   // output buffer `dest0` is initialized to 324   // output buffer `dest0` is initialized to
325   // the empty string. 325   // the empty string.
HITCBC 326   1065 char* dest = dest0; 326   1065 char* dest = dest0;
HITCBC 327   1065 bool const is_absolute = input.starts_with('/'); 327   1065 bool const is_absolute = input.starts_with('/');
328   328  
329   // Step 2 is a loop through 5 production rules: 329   // Step 2 is a loop through 5 production rules:
330   // https://www.rfc-editor.org/rfc/rfc3986#section-5.2.4 330   // https://www.rfc-editor.org/rfc/rfc3986#section-5.2.4
331   // 331   //
332   // There are no transitions between all rules, 332   // There are no transitions between all rules,
333   // which enables some optimizations. 333   // which enables some optimizations.
334   // 334   //
335   // Initial: 335   // Initial:
336   // - Rule A: handle initial dots 336   // - Rule A: handle initial dots
337   // If the input buffer begins with a 337   // If the input buffer begins with a
338   // prefix of "../" or "./", then remove 338   // prefix of "../" or "./", then remove
339   // that prefix from the input buffer. 339   // that prefix from the input buffer.
340   // Rule A can only happen at the beginning. 340   // Rule A can only happen at the beginning.
341   // Errata 4547: Keep "../" in the beginning 341   // Errata 4547: Keep "../" in the beginning
342   // https://www.rfc-editor.org/errata/eid4547 342   // https://www.rfc-editor.org/errata/eid4547
343   // 343   //
344   // Then: 344   // Then:
345   // - Rule D: ignore a final ".." or "." 345   // - Rule D: ignore a final ".." or "."
346   // if the input buffer consists only of "." 346   // if the input buffer consists only of "."
347   // or "..", then remove that from the input 347   // or "..", then remove that from the input
348   // buffer. 348   // buffer.
349   // Rule D can only happen after Rule A because: 349   // Rule D can only happen after Rule A because:
350   // - B and C write "/" to the input 350   // - B and C write "/" to the input
351   // - E writes "/" to input or returns 351   // - E writes "/" to input or returns
352   // 352   //
353   // Then: 353   // Then:
354   // - Rule B: ignore ".": write "/" to the input 354   // - Rule B: ignore ".": write "/" to the input
355   // - Rule C: apply "..": remove seg and write "/" 355   // - Rule C: apply "..": remove seg and write "/"
356   // - Rule E: copy complete segment 356   // - Rule E: copy complete segment
357   auto append = 357   auto append =
HITCBC 358   1879 [](char*& first, char const* last, core::string_view in) 358   1879 [](char*& first, char const* last, core::string_view in)
359   { 359   {
360   // append `in` to `dest` 360   // append `in` to `dest`
HITCBC 361   1879 BOOST_ASSERT(in.size() <= std::size_t(last - first)); 361   1879 BOOST_ASSERT(in.size() <= std::size_t(last - first));
HITCBC 362   1879 std::memmove(first, in.data(), in.size()); 362   1879 std::memmove(first, in.data(), in.size());
HITCBC 363   1879 first += in.size(); 363   1879 first += in.size();
364   ignore_unused(last); 364   ignore_unused(last);
HITCBC 365   1879 }; 365   1879 };
366   366  
HITCBC 367   12011 auto dot_starts_with = []( 367   12011 auto dot_starts_with = [](
368   core::string_view str, core::string_view dots, std::size_t& n) 368   core::string_view str, core::string_view dots, std::size_t& n)
369   { 369   {
370   // starts_with for encoded/decoded dots 370   // starts_with for encoded/decoded dots
371   // or decoded otherwise. return how many 371   // or decoded otherwise. return how many
372   // chars in str match the dots 372   // chars in str match the dots
HITCBC 373   12011 n = 0; 373   12011 n = 0;
HITCBC 374   21036 for (char c: dots) 374   21036 for (char c: dots)
375   { 375   {
HITCBC 376   20431 if (str.starts_with(c)) 376   20431 if (str.starts_with(c))
377   { 377   {
HITCBC 378   9025 str.remove_prefix(1); 378   9025 str.remove_prefix(1);
HITCBC 379   9025 ++n; 379   9025 ++n;
HITCBC 380   9025 continue; 380   9025 continue;
381   } 381   }
382   382  
383   // In the general case, we would need to 383   // In the general case, we would need to
384   // check if the next char is an encoded 384   // check if the next char is an encoded
385   // dot. 385   // dot.
386   // However, an encoded dot in `str` 386   // However, an encoded dot in `str`
387   // would have already been decoded in 387   // would have already been decoded in
388   // url_base::normalize_path(). 388   // url_base::normalize_path().
389   // This needs to be undone if 389   // This needs to be undone if
390   // `remove_dot_segments` is used in a 390   // `remove_dot_segments` is used in a
391   // different context. 391   // different context.
392   // if (str.size() > 2 && 392   // if (str.size() > 2 &&
393   // c == '.' 393   // c == '.'
394   // && 394   // &&
395   // str[0] == '%' && 395   // str[0] == '%' &&
396   // str[1] == '2' && 396   // str[1] == '2' &&
397   // (str[2] == 'e' || 397   // (str[2] == 'e' ||
398   // str[2] == 'E')) 398   // str[2] == 'E'))
399   // { 399   // {
400   // str.remove_prefix(3); 400   // str.remove_prefix(3);
401   // n += 3; 401   // n += 3;
402   // continue; 402   // continue;
403   // } 403   // }
404   404  
HITCBC 405   11406 n = 0; 405   11406 n = 0;
HITCBC 406   11406 return false; 406   11406 return false;
407   } 407   }
HITCBC 408   605 return true; 408   605 return true;
409   }; 409   };
410   410  
HITCBC 411   6016 auto dot_equal = [&dot_starts_with]( 411   6016 auto dot_equal = [&dot_starts_with](
412   core::string_view str, core::string_view dots) 412   core::string_view str, core::string_view dots)
413   { 413   {
HITCBC 414   6016 std::size_t n = 0; 414   6016 std::size_t n = 0;
HITCBC 415   6016 dot_starts_with(str, dots, n); 415   6016 dot_starts_with(str, dots, n);
HITCBC 416   6016 return n == str.size(); 416   6016 return n == str.size();
HITCBC 417   1065 }; 417   1065 };
418   418  
419   // Rule A 419   // Rule A
420   std::size_t n; 420   std::size_t n;
HITCBC 421   1086 while (!input.empty()) 421   1086 while (!input.empty())
422   { 422   {
HITCBC 423   960 if (dot_starts_with(input, "../", n)) 423   960 if (dot_starts_with(input, "../", n))
424   { 424   {
425   // Errata 4547 425   // Errata 4547
HITCBC 426   4 append(dest, end, "../"); 426   4 append(dest, end, "../");
HITCBC 427   4 input.remove_prefix(n); 427   4 input.remove_prefix(n);
HITCBC 428   4 continue; 428   4 continue;
429   } 429   }
HITCBC 430   956 else if (!dot_starts_with(input, "./", n)) 430   956 else if (!dot_starts_with(input, "./", n))
431   { 431   {
HITCBC 432   939 break; 432   939 break;
433   } 433   }
HITCBC 434   17 input.remove_prefix(n); 434   17 input.remove_prefix(n);
435   } 435   }
436   436  
437   // Rule D 437   // Rule D
HITCBC 438   1065 if( dot_equal(input, ".")) 438   1065 if( dot_equal(input, "."))
439   { 439   {
HITCBC 440   127 input = {}; 440   127 input = {};
441   } 441   }
HITCBC 442   938 else if( dot_equal(input, "..") ) 442   938 else if( dot_equal(input, "..") )
443   { 443   {
444   // Errata 4547 444   // Errata 4547
HITCBC 445   3 append(dest, end, ".."); 445   3 append(dest, end, "..");
HITCBC 446   3 input = {}; 446   3 input = {};
447   } 447   }
448   448  
449   // 2. While the input buffer is not empty, 449   // 2. While the input buffer is not empty,
450   // loop as follows: 450   // loop as follows:
HITCBC 451   3088 while (!input.empty()) 451   3088 while (!input.empty())
452   { 452   {
453   // Rule B 453   // Rule B
HITCBC 454   2062 bool const is_dot_seg = dot_starts_with(input, "/./", n); 454   2062 bool const is_dot_seg = dot_starts_with(input, "/./", n);
HITCBC 455   2062 if (is_dot_seg) 455   2062 if (is_dot_seg)
456   { 456   {
HITCBC 457   37 input.remove_prefix(n - 1); 457   37 input.remove_prefix(n - 1);
HITCBC 458   37 continue; 458   37 continue;
459   } 459   }
460   460  
HITCBC 461   2025 bool const is_final_dot_seg = dot_equal(input, "/."); 461   2025 bool const is_final_dot_seg = dot_equal(input, "/.");
HITCBC 462   2025 if (is_final_dot_seg) 462   2025 if (is_final_dot_seg)
463   { 463   {
464   // We can't remove "." from a core::string_view 464   // We can't remove "." from a core::string_view
465   // So what we do here is equivalent to 465   // So what we do here is equivalent to
466   // replacing s with '/' as required 466   // replacing s with '/' as required
467   // in Rule B and executing the next 467   // in Rule B and executing the next
468   // iteration, which would append this 468   // iteration, which would append this
469   // '/' to the output, as required by 469   // '/' to the output, as required by
470   // Rule E 470   // Rule E
HITCBC 471   8 append(dest, end, input.substr(0, 1)); 471   8 append(dest, end, input.substr(0, 1));
HITCBC 472   8 input = {}; 472   8 input = {};
HITCBC 473   8 break; 473   8 break;
474   } 474   }
475   475  
476   // Rule C 476   // Rule C
HITCBC 477   2017 bool const is_dotdot_seg = dot_starts_with(input, "/../", n); 477   2017 bool const is_dotdot_seg = dot_starts_with(input, "/../", n);
HITCBC 478   2017 if (is_dotdot_seg) 478   2017 if (is_dotdot_seg)
479   { 479   {
HITCBC 480   215 core::string_view cur_out(dest0, dest - dest0); 480   215 core::string_view cur_out(dest0, dest - dest0);
HITCBC 481   215 std::size_t p = cur_out.find_last_of('/'); 481   215 std::size_t p = cur_out.find_last_of('/');
HITCBC 482   215 bool const has_multiple_segs = p != core::string_view::npos; 482   215 bool const has_multiple_segs = p != core::string_view::npos;
HITCBC 483   215 if (has_multiple_segs) 483   215 if (has_multiple_segs)
484   { 484   {
485   // output has multiple segments 485   // output has multiple segments
486   // "erase" [p, end] if not "/.." 486   // "erase" [p, end] if not "/.."
HITCBC 487   144 core::string_view last_seg(dest0 + p, dest - (dest0 + p)); 487   144 core::string_view last_seg(dest0 + p, dest - (dest0 + p));
HITCBC 488   144 bool const prev_is_dotdot_seg = dot_equal(last_seg, "/.."); 488   144 bool const prev_is_dotdot_seg = dot_equal(last_seg, "/..");
HITCBC 489   144 if (!prev_is_dotdot_seg) 489   144 if (!prev_is_dotdot_seg)
490   { 490   {
HITCBC 491   133 dest = dest0 + p; 491   133 dest = dest0 + p;
492   } 492   }
493   else 493   else
494   { 494   {
HITCBC 495   11 append(dest, end, "/.."); 495   11 append(dest, end, "/..");
496   } 496   }
497   } 497   }
HITCBC 498   71 else if (dest0 != dest) 498   71 else if (dest0 != dest)
499   { 499   {
500   // Only one segment in the output: remove it 500   // Only one segment in the output: remove it
HITCBC 501   21 core::string_view last_seg(dest0, dest - dest0); 501   21 core::string_view last_seg(dest0, dest - dest0);
HITCBC 502   21 bool const prev_is_dotdot_seg = dot_equal(last_seg, ".."); 502   21 bool const prev_is_dotdot_seg = dot_equal(last_seg, "..");
HITCBC 503   21 if (!prev_is_dotdot_seg) 503   21 if (!prev_is_dotdot_seg)
504   { 504   {
HITCBC 505   19 dest = dest0; 505   19 dest = dest0;
HITCBC 506   19 if (!is_absolute) 506   19 if (!is_absolute)
507   { 507   {
HITCBC 508   19 input.remove_prefix(1); 508   19 input.remove_prefix(1);
509   } 509   }
510   } 510   }
511   else 511   else
512   { 512   {
HITCBC 513   2 append(dest, end, "/.."); 513   2 append(dest, end, "/..");
514   } 514   }
515   } 515   }
516   else 516   else
517   { 517   {
518   // Output is empty 518   // Output is empty
HITCBC 519   50 if (is_absolute) 519   50 if (is_absolute)
520   { 520   {
HITCBC 521   50 append(dest, end, "/.."); 521   50 append(dest, end, "/..");
522   } 522   }
523   else 523   else
524   { 524   {
525   // AFREITAS: Although we have no formal proof 525   // AFREITAS: Although we have no formal proof
526   // for that, the output can't be relative 526   // for that, the output can't be relative
527   // and empty at this point because relative 527   // and empty at this point because relative
528   // paths will fall in the `dest0 != dest` 528   // paths will fall in the `dest0 != dest`
529   // case above of this rule C and then the 529   // case above of this rule C and then the
530   // general case of rule E for "..". 530   // general case of rule E for "..".
MISUBC 531   append(dest, end, ".."); 531   append(dest, end, "..");
532   } 532   }
533   } 533   }
HITCBC 534   215 input.remove_prefix(n - 1); 534   215 input.remove_prefix(n - 1);
HITCBC 535   215 continue; 535   215 continue;
HITCBC 536   215 } 536   215 }
537   537  
HITCBC 538   1802 bool const is_final_dotdot_seg = dot_equal(input, "/.."); 538   1802 bool const is_final_dotdot_seg = dot_equal(input, "/..");
HITCBC 539   1802 if (is_final_dotdot_seg) 539   1802 if (is_final_dotdot_seg)
540   { 540   {
HITCBC 541   31 core::string_view cur_out(dest0, dest - dest0); 541   31 core::string_view cur_out(dest0, dest - dest0);
HITCBC 542   31 std::size_t p = cur_out.find_last_of('/'); 542   31 std::size_t p = cur_out.find_last_of('/');
HITCBC 543   31 bool const has_multiple_segs = p != core::string_view::npos; 543   31 bool const has_multiple_segs = p != core::string_view::npos;
HITCBC 544   31 if (has_multiple_segs) 544   31 if (has_multiple_segs)
545   { 545   {
546   // output has multiple segments 546   // output has multiple segments
547   // "erase" [p, end] if not "/.." 547   // "erase" [p, end] if not "/.."
HITCBC 548   18 core::string_view last_seg(dest0 + p, dest - (dest0 + p)); 548   18 core::string_view last_seg(dest0 + p, dest - (dest0 + p));
HITCBC 549   18 bool const prev_is_dotdot_seg = dot_equal(last_seg, "/.."); 549   18 bool const prev_is_dotdot_seg = dot_equal(last_seg, "/..");
HITCBC 550   18 if (!prev_is_dotdot_seg) 550   18 if (!prev_is_dotdot_seg)
551   { 551   {
HITCBC 552   14 dest = dest0 + p; 552   14 dest = dest0 + p;
HITCBC 553   14 append(dest, end, "/"); 553   14 append(dest, end, "/");
554   } 554   }
555   else 555   else
556   { 556   {
HITCBC 557   4 append(dest, end, "/.."); 557   4 append(dest, end, "/..");
558   } 558   }
559   } 559   }
HITCBC 560   13 else if (dest0 != dest) 560   13 else if (dest0 != dest)
561   { 561   {
562   // Only one segment in the output: remove it 562   // Only one segment in the output: remove it
HITCBC 563   3 core::string_view last_seg(dest0, dest - dest0); 563   3 core::string_view last_seg(dest0, dest - dest0);
HITCBC 564   3 bool const prev_is_dotdot_seg = dot_equal(last_seg, ".."); 564   3 bool const prev_is_dotdot_seg = dot_equal(last_seg, "..");
HITCBC 565   3 if (!prev_is_dotdot_seg) { 565   3 if (!prev_is_dotdot_seg) {
HITCBC 566   1 dest = dest0; 566   1 dest = dest0;
567   } 567   }
568   else 568   else
569   { 569   {
HITCBC 570   2 append(dest, end, "/.."); 570   2 append(dest, end, "/..");
571   } 571   }
572   } 572   }
573   else 573   else
574   { 574   {
575   // Output is empty: append dotdot 575   // Output is empty: append dotdot
HITCBC 576   10 if (is_absolute) 576   10 if (is_absolute)
577   { 577   {
HITCBC 578   10 append(dest, end, "/.."); 578   10 append(dest, end, "/..");
579   } 579   }
580   else 580   else
581   { 581   {
582   // AFREITAS: Although we have no formal proof 582   // AFREITAS: Although we have no formal proof
583   // for that, the output can't be relative 583   // for that, the output can't be relative
584   // and empty at this point because relative 584   // and empty at this point because relative
585   // paths will fall in the `dest0 != dest` 585   // paths will fall in the `dest0 != dest`
586   // case above of this rule C and then the 586   // case above of this rule C and then the
587   // general case of rule E for "..". 587   // general case of rule E for "..".
MISUBC 588   append(dest, end, ".."); 588   append(dest, end, "..");
589   } 589   }
590   } 590   }
HITCBC 591   31 input = {}; 591   31 input = {};
HITCBC 592   31 break; 592   31 break;
593   } 593   }
594   594  
595   // Rule E 595   // Rule E
HITCBC 596   1771 std::size_t p = input.find_first_of('/', 1); 596   1771 std::size_t p = input.find_first_of('/', 1);
HITCBC 597   1771 if (p != core::string_view::npos) 597   1771 if (p != core::string_view::npos)
598   { 598   {
HITCBC 599   875 append(dest, end, input.substr(0, p)); 599   875 append(dest, end, input.substr(0, p));
HITCBC 600   875 input.remove_prefix(p); 600   875 input.remove_prefix(p);
601   } 601   }
602   else 602   else
603   { 603   {
HITCBC 604   896 append(dest, end, input); 604   896 append(dest, end, input);
HITCBC 605   896 input = {}; 605   896 input = {};
606   } 606   }
607   } 607   }
608   608  
609   // 3. Finally, the output buffer is set 609   // 3. Finally, the output buffer is set
610   // as the result of remove_dot_segments, 610   // as the result of remove_dot_segments,
611   // and we return its size 611   // and we return its size
HITCBC 612   1065 return dest - dest0; 612   1065 return dest - dest0;
613   } 613   }
614   614  
615   char 615   char
HITCBC 616   1154 path_pop_back( core::string_view& s ) 616   1154 path_pop_back( core::string_view& s )
617   { 617   {
HITCBC 618   1676 if (s.size() < 3 || 618   1676 if (s.size() < 3 ||
HITCBC 619   1044 *std::prev(s.end(), 3) != '%') 619   1044 *std::prev(s.end(), 3) != '%')
620   { 620   {
HITCBC 621   1102 char c = s.back(); 621   1102 char c = s.back();
HITCBC 622   1102 s.remove_suffix(1); 622   1102 s.remove_suffix(1);
HITCBC 623   1102 return c; 623   1102 return c;
624   } 624   }
HITCBC 625   52 char c = 0; 625   52 char c = 0;
HITCBC 626   104 detail::decode_unsafe( 626   104 detail::decode_unsafe(
HITCBC 627   104 &c, &c + 1, s.substr(s.size() - 3)); 627   104 &c, &c + 1, s.substr(s.size() - 3));
HITCBC 628   52 if (c != '/') 628   52 if (c != '/')
629   { 629   {
HITCBC 630   44 s.remove_suffix(3); 630   44 s.remove_suffix(3);
HITCBC 631   44 return c; 631   44 return c;
632   } 632   }
HITCBC 633   8 c = s.back(); 633   8 c = s.back();
HITCBC 634   8 s.remove_suffix(1); 634   8 s.remove_suffix(1);
HITCBC 635   8 return c; 635   8 return c;
636   }; 636   };
637   637  
638   void 638   void
HITCBC 639   538 pop_last_segment( 639   538 pop_last_segment(
640   core::string_view& str, 640   core::string_view& str,
641   core::string_view& seg, 641   core::string_view& seg,
642   std::size_t& level, 642   std::size_t& level,
643   bool remove_unmatched) noexcept 643   bool remove_unmatched) noexcept
644   { 644   {
HITCBC 645   538 seg = {}; 645   538 seg = {};
HITCBC 646   538 std::size_t n = 0; 646   538 std::size_t n = 0;
HITCBC 647   700 while (!str.empty()) 647   700 while (!str.empty())
648   { 648   {
649   // B. if the input buffer begins with a 649   // B. if the input buffer begins with a
650   // prefix of "/./" or "/.", where "." is 650   // prefix of "/./" or "/.", where "." is
651   // a complete path segment, then replace 651   // a complete path segment, then replace
652   // that prefix with "/" in the input 652   // that prefix with "/" in the input
653   // buffer; otherwise, 653   // buffer; otherwise,
HITCBC 654   558 n = detail::path_ends_with(str, "/./"); 654   558 n = detail::path_ends_with(str, "/./");
HITCBC 655   558 if (n) 655   558 if (n)
656   { 656   {
HITCBC 657   10 seg = str.substr(str.size() - n); 657   10 seg = str.substr(str.size() - n);
HITCBC 658   10 str.remove_suffix(n); 658   10 str.remove_suffix(n);
HITCBC 659   10 continue; 659   10 continue;
660   } 660   }
HITCBC 661   548 n = detail::path_ends_with(str, "/."); 661   548 n = detail::path_ends_with(str, "/.");
HITCBC 662   548 if (n) 662   548 if (n)
663   { 663   {
HITCBC 664   12 seg = str.substr(str.size() - n, 1); 664   12 seg = str.substr(str.size() - n, 1);
HITCBC 665   12 str.remove_suffix(n); 665   12 str.remove_suffix(n);
HITCBC 666   12 continue; 666   12 continue;
667   } 667   }
668   668  
669   // C. if the input buffer begins with a 669   // C. if the input buffer begins with a
670   // prefix of "/../" or "/..", where ".." 670   // prefix of "/../" or "/..", where ".."
671   // is a complete path segment, then 671   // is a complete path segment, then
672   // replace that prefix with "/" in the 672   // replace that prefix with "/" in the
673   // input buffer and remove the last 673   // input buffer and remove the last
674   // segment and its preceding "/" 674   // segment and its preceding "/"
675   // (if any) from the output buffer 675   // (if any) from the output buffer
676   // otherwise, 676   // otherwise,
HITCBC 677   536 n = detail::path_ends_with(str, "/../"); 677   536 n = detail::path_ends_with(str, "/../");
HITCBC 678   536 if (n) 678   536 if (n)
679   { 679   {
HITCBC 680   42 seg = str.substr(str.size() - n); 680   42 seg = str.substr(str.size() - n);
HITCBC 681   42 str.remove_suffix(n); 681   42 str.remove_suffix(n);
HITCBC 682   42 ++level; 682   42 ++level;
HITCBC 683   42 continue; 683   42 continue;
684   } 684   }
HITCBC 685   494 n = detail::path_ends_with(str, "/.."); 685   494 n = detail::path_ends_with(str, "/..");
HITCBC 686   494 if (n) 686   494 if (n)
687   { 687   {
HITCBC 688   46 seg = str.substr(str.size() - n); 688   46 seg = str.substr(str.size() - n);
HITCBC 689   46 str.remove_suffix(n); 689   46 str.remove_suffix(n);
HITCBC 690   46 ++level; 690   46 ++level;
HITCBC 691   46 continue; 691   46 continue;
692   } 692   }
693   693  
694   // E. move the first path segment in the 694   // E. move the first path segment in the
695   // input buffer to the end of the output 695   // input buffer to the end of the output
696   // buffer, including the initial "/" 696   // buffer, including the initial "/"
697   // character (if any) and any subsequent 697   // character (if any) and any subsequent
698   // characters up to, but not including, 698   // characters up to, but not including,
699   // the next "/" character or the end of 699   // the next "/" character or the end of
700   // the input buffer. 700   // the input buffer.
HITCBC 701   448 std::size_t p = str.size() > 1 701   448 std::size_t p = str.size() > 1
HITCBC 702   448 ? str.find_last_of('/', str.size() - 2) 702   448 ? str.find_last_of('/', str.size() - 2)
HITCBC 703   448 : core::string_view::npos; 703   448 : core::string_view::npos;
HITCBC 704   448 if (p != core::string_view::npos) 704   448 if (p != core::string_view::npos)
705   { 705   {
HITCBC 706   276 seg = str.substr(p + 1); 706   276 seg = str.substr(p + 1);
HITCBC 707   276 str.remove_suffix(seg.size()); 707   276 str.remove_suffix(seg.size());
708   } 708   }
709   else 709   else
710   { 710   {
HITCBC 711   172 seg = str; 711   172 seg = str;
HITCBC 712   172 str = {}; 712   172 str = {};
713   } 713   }
714   714  
HITCBC 715   448 if (level == 0) 715   448 if (level == 0)
HITCBC 716   396 return; 716   396 return;
HITCBC 717   52 if (!str.empty()) 717   52 if (!str.empty())
HITCBC 718   42 --level; 718   42 --level;
719   } 719   }
720   // we still need to skip n_skip + 1 720   // we still need to skip n_skip + 1
721   // but the string is empty 721   // but the string is empty
HITCBC 722   142 if (remove_unmatched && level) 722   142 if (remove_unmatched && level)
723   { 723   {
HITCBC 724   34 seg = "/"; 724   34 seg = "/";
HITCBC 725   34 level = 0; 725   34 level = 0;
HITCBC 726   34 return; 726   34 return;
727   } 727   }
HITCBC 728   108 else if (level) 728   108 else if (level)
729   { 729   {
HITCBC 730   4 if (!seg.empty()) 730   4 if (!seg.empty())
731   { 731   {
HITCBC 732   4 seg = "/../"; 732   4 seg = "/../";
733   } 733   }
734   else 734   else
735   { 735   {
736   // AFREITAS: this condition 736   // AFREITAS: this condition
737   // is correct, but it might 737   // is correct, but it might
738   // unreachable. 738   // unreachable.
MISUBC 739   seg = "/.."; 739   seg = "/..";
740   } 740   }
HITCBC 741   4 --level; 741   4 --level;
HITCBC 742   4 return; 742   4 return;
743   } 743   }
HITCBC 744   104 seg = {}; 744   104 seg = {};
745   } 745   }
746   746  
747   void 747   void
HITCBC 748   304 normalized_path_digest( 748   304 normalized_path_digest(
749   core::string_view str, 749   core::string_view str,
750   bool remove_unmatched, 750   bool remove_unmatched,
751   fnv_1a& hasher) noexcept 751   fnv_1a& hasher) noexcept
752   { 752   {
HITCBC 753   304 core::string_view seg; 753   304 core::string_view seg;
HITCBC 754   304 std::size_t level = 0; 754   304 std::size_t level = 0;
755   do 755   do
756   { 756   {
HITCBC 757   538 pop_last_segment( 757   538 pop_last_segment(
758   str, seg, level, remove_unmatched); 758   str, seg, level, remove_unmatched);
HITCBC 759   1692 while (!seg.empty()) 759   1692 while (!seg.empty())
760   { 760   {
HITCBC 761   1154 char c = path_pop_back(seg); 761   1154 char c = path_pop_back(seg);
HITCBC 762   1154 hasher.put(c); 762   1154 hasher.put(c);
763   } 763   }
764   } 764   }
HITCBC 765   538 while (!str.empty()); 765   538 while (!str.empty());
HITCBC 766   304 } 766   304 }
767   767  
768   // compare segments as if there were a normalized 768   // compare segments as if there were a normalized
769   int 769   int
HITCBC 770   239 segments_compare( 770   239 segments_compare(
771   segments_encoded_view seg0, 771   segments_encoded_view seg0,
772   segments_encoded_view seg1) noexcept 772   segments_encoded_view seg1) noexcept
773   { 773   {
774   // calculate path size as if it were normalized 774   // calculate path size as if it were normalized
775   auto normalized_size = 775   auto normalized_size =
HITCBC 776   478 [](segments_encoded_view seg) -> std::size_t 776   478 [](segments_encoded_view seg) -> std::size_t
777   { 777   {
HITCBC 778   478 if (seg.empty()) 778   478 if (seg.empty())
HITCBC 779   144 return seg.is_absolute(); 779   144 return seg.is_absolute();
780   780  
HITCBC 781   334 std::size_t n = 0; 781   334 std::size_t n = 0;
HITCBC 782   334 std::size_t skip = 0; 782   334 std::size_t skip = 0;
HITCBC 783   334 auto begin = seg.begin(); 783   334 auto begin = seg.begin();
HITCBC 784   334 auto it = seg.end(); 784   334 auto it = seg.end();
HITCBC 785   1096 while (it != begin) 785   1096 while (it != begin)
786   { 786   {
HITCBC 787   762 --it; 787   762 --it;
HITCBC 788   762 decode_view dseg = **it; 788   762 decode_view dseg = **it;
HITCBC 789   762 if (dseg == "..") 789   762 if (dseg == "..")
HITCBC 790   167 ++skip; 790   167 ++skip;
HITCBC 791   595 else if (dseg != ".") 791   595 else if (dseg != ".")
792   { 792   {
HITCBC 793   557 if (skip) 793   557 if (skip)
HITCBC 794   85 --skip; 794   85 --skip;
795   else 795   else
HITCBC 796   472 n += dseg.size() + 1; 796   472 n += dseg.size() + 1;
797   } 797   }
798   } 798   }
HITCBC 799   334 n += skip * 3; 799   334 n += skip * 3;
HITCBC 800   334 n -= !seg.is_absolute(); 800   334 n -= !seg.is_absolute();
HITCBC 801   334 return n; 801   334 return n;
802   }; 802   };
803   803  
804   // find the normalized size for the comparison 804   // find the normalized size for the comparison
HITCBC 805   239 std::size_t n0 = normalized_size(seg0); 805   239 std::size_t n0 = normalized_size(seg0);
HITCBC 806   239 std::size_t n1 = normalized_size(seg1); 806   239 std::size_t n1 = normalized_size(seg1);
HITCBC 807   239 std::size_t n00 = n0; 807   239 std::size_t n00 = n0;
HITCBC 808   239 std::size_t n10 = n1; 808   239 std::size_t n10 = n1;
809   809  
810   // consume the last char from a segment range 810   // consume the last char from a segment range
811   auto consume_last = 811   auto consume_last =
HITCBC 812   2064 []( 812   2064 [](
813   std::size_t& n, 813   std::size_t& n,
814   decode_view& dseg, 814   decode_view& dseg,
815   segments_encoded_view::iterator& begin, 815   segments_encoded_view::iterator& begin,
816   segments_encoded_view::iterator& it, 816   segments_encoded_view::iterator& it,
817   decode_view::iterator& cit, 817   decode_view::iterator& cit,
818   std::size_t& skip, 818   std::size_t& skip,
819   bool& at_slash) -> char 819   bool& at_slash) -> char
820   { 820   {
HITCBC 821   2064 if (cit != dseg.begin()) 821   2064 if (cit != dseg.begin())
822   { 822   {
823   // return last char from current segment 823   // return last char from current segment
HITCBC 824   1387 at_slash = false; 824   1387 at_slash = false;
HITCBC 825   1387 --cit; 825   1387 --cit;
HITCBC 826   1387 --n; 826   1387 --n;
HITCBC 827   1387 return *cit; 827   1387 return *cit;
828   } 828   }
829   829  
HITCBC 830   677 if (!at_slash) 830   677 if (!at_slash)
831   { 831   {
832   // current segment dseg is over and 832   // current segment dseg is over and
833   // previous char was not a slash 833   // previous char was not a slash
834   // so we output one 834   // so we output one
HITCBC 835   403 at_slash = true; 835   403 at_slash = true;
HITCBC 836   403 --n; 836   403 --n;
HITCBC 837   403 return '/'; 837   403 return '/';
838   } 838   }
839   839  
840   // current segment dseg is over and 840   // current segment dseg is over and
841   // last char was already the slash 841   // last char was already the slash
842   // between segments, so take the 842   // between segments, so take the
843   // next final segment to consume 843   // next final segment to consume
HITCBC 844   274 at_slash = false; 844   274 at_slash = false;
HITCBC 845   512 while (cit == dseg.begin()) 845   512 while (cit == dseg.begin())
846   { 846   {
847   // take next segment 847   // take next segment
HITCBC 848   512 if (it != begin) 848   512 if (it != begin)
HITCBC 849   380 --it; 849   380 --it;
850   else 850   else
HITCBC 851   132 break; 851   132 break;
HITCBC 852   380 if (**it == "..") 852   380 if (**it == "..")
853   { 853   {
854   // skip next if this is ".." 854   // skip next if this is ".."
HITCBC 855   140 ++skip; 855   140 ++skip;
856   } 856   }
HITCBC 857   240 else if (**it != ".") 857   240 else if (**it != ".")
858   { 858   {
HITCBC 859   212 if (skip) 859   212 if (skip)
860   { 860   {
861   // discount skips 861   // discount skips
HITCBC 862   70 --skip; 862   70 --skip;
863   } 863   }
864   else 864   else
865   { 865   {
866   // or update current seg 866   // or update current seg
HITCBC 867   142 dseg = **it; 867   142 dseg = **it;
HITCBC 868   142 cit = dseg.end(); 868   142 cit = dseg.end();
HITCBC 869   142 break; 869   142 break;
870   } 870   }
871   } 871   }
872   } 872   }
873   // consume from the new current 873   // consume from the new current
874   // segment 874   // segment
HITCBC 875   274 --n; 875   274 --n;
HITCBC 876   274 if (cit != dseg.begin()) 876   274 if (cit != dseg.begin())
877   { 877   {
878   // in the general case, we consume 878   // in the general case, we consume
879   // one more character from the end 879   // one more character from the end
HITCBC 880   127 --cit; 880   127 --cit;
HITCBC 881   127 return *cit; 881   127 return *cit;
882   } 882   }
883   883  
884   // nothing left to consume in the 884   // nothing left to consume in the
885   // current and new segment 885   // current and new segment
HITCBC 886   147 if (it == begin) 886   147 if (it == begin)
887   { 887   {
888   // if this is the first 888   // if this is the first
889   // segment, the segments are 889   // segment, the segments are
890   // over and there can only 890   // over and there can only
891   // be repetitions of "../" to 891   // be repetitions of "../" to
892   // output 892   // output
HITCBC 893   138 return "/.."[n % 3]; 893   138 return "/.."[n % 3];
894   } 894   }
895   // at other segments, we need 895   // at other segments, we need
896   // a slash to transition to the 896   // a slash to transition to the
897   // next segment 897   // next segment
HITCBC 898   9 at_slash = true; 898   9 at_slash = true;
HITCBC 899   9 return '/'; 899   9 return '/';
900   }; 900   };
901   901  
902   // consume final segments from seg0 that 902   // consume final segments from seg0 that
903   // should not influence the comparison 903   // should not influence the comparison
HITCBC 904   239 auto begin0 = seg0.begin(); 904   239 auto begin0 = seg0.begin();
HITCBC 905   239 auto it0 = seg0.end(); 905   239 auto it0 = seg0.end();
HITCBC 906   239 decode_view dseg0; 906   239 decode_view dseg0;
HITCBC 907   239 if (it0 != seg0.begin()) 907   239 if (it0 != seg0.begin())
908   { 908   {
HITCBC 909   166 --it0; 909   166 --it0;
HITCBC 910   166 dseg0 = **it0; 910   166 dseg0 = **it0;
911   } 911   }
HITCBC 912   239 decode_view::iterator cit0 = dseg0.end(); 912   239 decode_view::iterator cit0 = dseg0.end();
HITCBC 913   239 std::size_t skip0 = 0; 913   239 std::size_t skip0 = 0;
HITCBC 914   239 bool at_slash0 = true; 914   239 bool at_slash0 = true;
HITCBC 915   377 while (n0 > n1) 915   377 while (n0 > n1)
916   { 916   {
HITCBC 917   138 consume_last(n0, dseg0, begin0, it0, cit0, skip0, at_slash0); 917   138 consume_last(n0, dseg0, begin0, it0, cit0, skip0, at_slash0);
918   } 918   }
919   919  
920   // consume final segments from seg1 that 920   // consume final segments from seg1 that
921   // should not influence the comparison 921   // should not influence the comparison
HITCBC 922   239 auto begin1 = seg1.begin(); 922   239 auto begin1 = seg1.begin();
HITCBC 923   239 auto it1 = seg1.end(); 923   239 auto it1 = seg1.end();
HITCBC 924   239 decode_view dseg1; 924   239 decode_view dseg1;
HITCBC 925   239 if (it1 != seg1.begin()) 925   239 if (it1 != seg1.begin())
926   { 926   {
HITCBC 927   168 --it1; 927   168 --it1;
HITCBC 928   168 dseg1 = **it1; 928   168 dseg1 = **it1;
929   } 929   }
HITCBC 930   239 decode_view::iterator cit1 = dseg1.end(); 930   239 decode_view::iterator cit1 = dseg1.end();
HITCBC 931   239 std::size_t skip1 = 0; 931   239 std::size_t skip1 = 0;
HITCBC 932   239 bool at_slash1 = true; 932   239 bool at_slash1 = true;
HITCBC 933   285 while (n1 > n0) 933   285 while (n1 > n0)
934   { 934   {
HITCBC 935   46 consume_last(n1, dseg1, begin1, it1, cit1, skip1, at_slash1); 935   46 consume_last(n1, dseg1, begin1, it1, cit1, skip1, at_slash1);
936   } 936   }
937   937  
HITCBC 938   239 int cmp = 0; 938   239 int cmp = 0;
HITCBC 939   1179 while (n0) 939   1179 while (n0)
940   { 940   {
HITCBC 941   940 char c0 = consume_last( 941   940 char c0 = consume_last(
942   n0, dseg0, begin0, it0, cit0, skip0, at_slash0); 942   n0, dseg0, begin0, it0, cit0, skip0, at_slash0);
HITCBC 943   940 char c1 = consume_last( 943   940 char c1 = consume_last(
944   n1, dseg1, begin1, it1, cit1, skip1, at_slash1); 944   n1, dseg1, begin1, it1, cit1, skip1, at_slash1);
HITCBC 945   940 if (c0 < c1) 945   940 if (c0 < c1)
HITCBC 946   40 cmp = -1; 946   40 cmp = -1;
HITCBC 947   900 else if (c1 < c0) 947   900 else if (c1 < c0)
HITCBC 948   44 cmp = +1; 948   44 cmp = +1;
949   } 949   }
950   950  
HITCBC 951   239 if (cmp != 0) 951   239 if (cmp != 0)
HITCBC 952   48 return cmp; 952   48 return cmp;
HITCBC 953   191 if ( n00 == n10 ) 953   191 if ( n00 == n10 )
HITCBC 954   185 return 0; 954   185 return 0;
HITCBC 955   6 if ( n00 < n10 ) 955   6 if ( n00 < n10 )
HITCBC 956   4 return -1; 956   4 return -1;
HITCBC 957   2 return 1; 957   2 return 1;
958   } 958   }
959   959  
960   } // detail 960   } // detail
961   } // urls 961   } // urls
962   } // boost 962   } // boost
963   963