src/detail/normalize.cpp

TLA	Baseline
	1		//	1		//
	2		// Copyright (c) 2016-2019 Vinnie Falco (vinnie dot falco at gmail dot com)	2		// Copyright (c) 2016-2019 Vinnie Falco (vinnie dot falco at gmail dot com)
	3		// Copyright (c) 2022 Alan de Freitas (alandefreitas@gmail.com)	3		// Copyright (c) 2022 Alan de Freitas (alandefreitas@gmail.com)
	4		//	4		//
	5		// Distributed under the Boost Software License, Version 1.0. (See accompanying	5		// Distributed under the Boost Software License, Version 1.0. (See accompanying
	6		// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)	6		// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
	7		//	7		//
	8		// Official repository: https://github.com/boostorg/url	8		// Official repository: https://github.com/boostorg/url
	9		//	9		//
	10			10
	11			11
	12		#include <boost/url/detail/config.hpp>	12		#include <boost/url/detail/config.hpp>
	13		#include <boost/url/decode_view.hpp>	13		#include <boost/url/decode_view.hpp>
	14		#include <boost/url/detail/decode.hpp>	14		#include <boost/url/detail/decode.hpp>
	15		#include <boost/url/segments_encoded_view.hpp>	15		#include <boost/url/segments_encoded_view.hpp>
	16		#include <boost/url/grammar/ci_string.hpp>	16		#include <boost/url/grammar/ci_string.hpp>
	17		#include <boost/url/grammar/lut_chars.hpp>	17		#include <boost/url/grammar/lut_chars.hpp>
	18		#include <boost/assert.hpp>	18		#include <boost/assert.hpp>
	19		#include <boost/core/ignore_unused.hpp>	19		#include <boost/core/ignore_unused.hpp>
	20		#include <cstring>	20		#include <cstring>
	21		#include <boost/url/detail/normalize.hpp>	21		#include <boost/url/detail/normalize.hpp>
	22			22
	23		namespace boost {	23		namespace boost {
	24		namespace urls {	24		namespace urls {
	25		namespace detail {	25		namespace detail {
	26			26
	27		void	27		void
HIT CBC	28	7772	pop_encoded_front(	28	7772	pop_encoded_front(
	29		core::string_view& s,	29		core::string_view& s,
	30		char& c,	30		char& c,
	31		std::size_t& n) noexcept	31		std::size_t& n) noexcept
	32		{	32		{
HIT CBC	33	7772	if(s.front() != '%')	33	7772	if(s.front() != '%')
	34		{	34		{
HIT CBC	35	7620	c = s.front();	35	7620	c = s.front();
HIT CBC	36	7620	s.remove_prefix(1);	36	7620	s.remove_prefix(1);
	37		}	37		}
	38		else	38		else
	39		{	39		{
HIT CBC	40	152	detail::decode_unsafe(	40	152	detail::decode_unsafe(
	41		&c,	41		&c,
	42		&c + 1,	42		&c + 1,
	43		s.substr(0, 3));	43		s.substr(0, 3));
HIT CBC	44	152	s.remove_prefix(3);	44	152	s.remove_prefix(3);
	45		}	45		}
HIT CBC	46	7772	++n;	46	7772	++n;
HIT CBC	47	7772	}	47	7772	}
	48			48
	49		int	49		int
HIT CBC	50	64	compare_encoded(	50	64	compare_encoded(
	51		core::string_view lhs,	51		core::string_view lhs,
	52		core::string_view rhs) noexcept	52		core::string_view rhs) noexcept
	53		{	53		{
HIT CBC	54	64	std::size_t n0 = 0;	54	64	std::size_t n0 = 0;
HIT CBC	55	64	std::size_t n1 = 0;	55	64	std::size_t n1 = 0;
HIT CBC	56	64	char c0 = 0;	56	64	char c0 = 0;
HIT CBC	57	64	char c1 = 0;	57	64	char c1 = 0;
HIT CBC	58	64	while(	58	64	while(
HIT CBC	59	486	!lhs.empty() &&	59	486	!lhs.empty() &&
HIT CBC	60	228	!rhs.empty())	60	228	!rhs.empty())
	61		{	61		{
HIT CBC	62	215	pop_encoded_front(lhs, c0, n0);	62	215	pop_encoded_front(lhs, c0, n0);
HIT CBC	63	215	pop_encoded_front(rhs, c1, n1);	63	215	pop_encoded_front(rhs, c1, n1);
HIT CBC	64	215	if (c0 < c1)	64	215	if (c0 < c1)
HIT CBC	65	18	return -1;	65	18	return -1;
HIT CBC	66	197	if (c1 < c0)	66	197	if (c1 < c0)
HIT CBC	67	3	return 1;	67	3	return 1;
	68		}	68		}
HIT CBC	69	43	n0 += detail::decode_bytes_unsafe(lhs);	69	43	n0 += detail::decode_bytes_unsafe(lhs);
HIT CBC	70	43	n1 += detail::decode_bytes_unsafe(rhs);	70	43	n1 += detail::decode_bytes_unsafe(rhs);
HIT CBC	71	43	if (n0 == n1)	71	43	if (n0 == n1)
HIT CBC	72	22	return 0;	72	22	return 0;
HIT CBC	73	21	if (n0 < n1)	73	21	if (n0 < n1)
HIT CBC	74	8	return -1;	74	8	return -1;
HIT CBC	75	13	return 1;	75	13	return 1;
	76		}	76		}
	77			77
	78		int	78		int
HIT CBC	79	28	compare_encoded_query(	79	28	compare_encoded_query(
	80		core::string_view lhs,	80		core::string_view lhs,
	81		core::string_view rhs) noexcept	81		core::string_view rhs) noexcept
	82		{	82		{
	83		static constexpr	83		static constexpr
	84		grammar::lut_chars	84		grammar::lut_chars
	85		query_compare_exception_lut = "&=+";	85		query_compare_exception_lut = "&=+";
	86			86
HIT CBC	87	28	std::size_t n0 = 0;	87	28	std::size_t n0 = 0;
HIT CBC	88	28	std::size_t n1 = 0;	88	28	std::size_t n1 = 0;
HIT CBC	89	28	char c0 = 0;	89	28	char c0 = 0;
HIT CBC	90	28	char c1 = 0;	90	28	char c1 = 0;
HIT CBC	91	28	while(	91	28	while(
HIT CBC	92	254	!lhs.empty() &&	92	254	!lhs.empty() &&
HIT CBC	93	122	!rhs.empty())	93	122	!rhs.empty())
	94		{	94		{
HIT CBC	95	121	bool const lhs_was_decoded = lhs.front() != '%';	95	121	bool const lhs_was_decoded = lhs.front() != '%';
HIT CBC	96	121	bool const rhs_was_decoded = rhs.front() != '%';	96	121	bool const rhs_was_decoded = rhs.front() != '%';
HIT CBC	97	121	pop_encoded_front(lhs, c0, n0);	97	121	pop_encoded_front(lhs, c0, n0);
HIT CBC	98	121	pop_encoded_front(rhs, c1, n1);	98	121	pop_encoded_front(rhs, c1, n1);
HIT CBC	99	121	if (c0 < c1)	99	121	if (c0 < c1)
HIT CBC	100	2	return -1;	100	2	return -1;
HIT CBC	101	119	if (c1 < c0)	101	119	if (c1 < c0)
HIT CBC	102	12	return 1;	102	12	return 1;
	103		// The decoded chars are the same, but	103		// The decoded chars are the same, but
	104		// are these query exceptions that have	104		// are these query exceptions that have
	105		// different meanings when decoded?	105		// different meanings when decoded?
HIT CBC	106	107	if (query_compare_exception_lut(c0))	106	107	if (query_compare_exception_lut(c0))
	107		{	107		{
	108		// If so, we only continue if both	108		// If so, we only continue if both
	109		// chars were decoded or encoded	109		// chars were decoded or encoded
	110		// the same way.	110		// the same way.
HIT CBC	111	40	if (lhs_was_decoded == rhs_was_decoded)	111	40	if (lhs_was_decoded == rhs_was_decoded)
HIT CBC	112	37	continue;	112	37	continue;
	113		// Otherwise, we return a value != 0	113		// Otherwise, we return a value != 0
	114		// because these chars are not equal.	114		// because these chars are not equal.
	115		// If rhs was the decoded one, it contains	115		// If rhs was the decoded one, it contains
	116		// an ascii char higher than '%'	116		// an ascii char higher than '%'
HIT CBC	117	3	if (rhs_was_decoded)	117	3	if (rhs_was_decoded)
HIT CBC	118	2	return -1;	118	2	return -1;
	119		else	119		else
HIT CBC	120	1	return 1;	120	1	return 1;
	121		}	121		}
	122		}	122		}
HIT CBC	123	11	n0 += detail::decode_bytes_unsafe(lhs);	123	11	n0 += detail::decode_bytes_unsafe(lhs);
HIT CBC	124	11	n1 += detail::decode_bytes_unsafe(rhs);	124	11	n1 += detail::decode_bytes_unsafe(rhs);
HIT CBC	125	11	if (n0 == n1)	125	11	if (n0 == n1)
HIT CBC	126	9	return 0;	126	9	return 0;
HIT CBC	127	2	if (n0 < n1)	127	2	if (n0 < n1)
HIT CBC	128	1	return -1;	128	1	return -1;
HIT CBC	129	1	return 1;	129	1	return 1;
	130		}	130		}
	131			131
	132		void	132		void
HIT CBC	133	1216	digest_encoded(	133	1216	digest_encoded(
	134		core::string_view s,	134		core::string_view s,
	135		fnv_1a& hasher) noexcept	135		fnv_1a& hasher) noexcept
	136		{	136		{
HIT CBC	137	1216	char c = 0;	137	1216	char c = 0;
HIT CBC	138	1216	std::size_t n = 0;	138	1216	std::size_t n = 0;
HIT CBC	139	1724	while(!s.empty())	139	1724	while(!s.empty())
	140		{	140		{
HIT CBC	141	508	pop_encoded_front(s, c, n);	141	508	pop_encoded_front(s, c, n);
HIT CBC	142	508	hasher.put(c);	142	508	hasher.put(c);
	143		}	143		}
HIT CBC	144	1216	}	144	1216	}
	145			145
	146		int	146		int
HIT CBC	147	180	ci_compare_encoded(	147	180	ci_compare_encoded(
	148		core::string_view lhs,	148		core::string_view lhs,
	149		core::string_view rhs) noexcept	149		core::string_view rhs) noexcept
	150		{	150		{
HIT CBC	151	180	std::size_t n0 = 0;	151	180	std::size_t n0 = 0;
HIT CBC	152	180	std::size_t n1 = 0;	152	180	std::size_t n1 = 0;
HIT CBC	153	180	char c0 = 0;	153	180	char c0 = 0;
HIT CBC	154	180	char c1 = 0;	154	180	char c1 = 0;
HIT CBC	155	180	while (	155	180	while (
HIT CBC	156	4704	!lhs.empty() &&	156	4704	!lhs.empty() &&
HIT CBC	157	2271	!rhs.empty())	157	2271	!rhs.empty())
	158		{	158		{
HIT CBC	159	2265	pop_encoded_front(lhs, c0, n0);	159	2265	pop_encoded_front(lhs, c0, n0);
HIT CBC	160	2265	pop_encoded_front(rhs, c1, n1);	160	2265	pop_encoded_front(rhs, c1, n1);
HIT CBC	161	2265	c0 = grammar::to_lower(c0);	161	2265	c0 = grammar::to_lower(c0);
HIT CBC	162	2265	c1 = grammar::to_lower(c1);	162	2265	c1 = grammar::to_lower(c1);
HIT CBC	163	2265	if (c0 < c1)	163	2265	if (c0 < c1)
HIT CBC	164	10	return -1;	164	10	return -1;
HIT CBC	165	2255	if (c1 < c0)	165	2255	if (c1 < c0)
HIT CBC	166	2	return 1;	166	2	return 1;
	167		}	167		}
HIT CBC	168	168	n0 += detail::decode_bytes_unsafe(lhs);	168	168	n0 += detail::decode_bytes_unsafe(lhs);
HIT CBC	169	168	n1 += detail::decode_bytes_unsafe(rhs);	169	168	n1 += detail::decode_bytes_unsafe(rhs);
HIT CBC	170	168	if (n0 == n1)	170	168	if (n0 == n1)
HIT CBC	171	161	return 0;	171	161	return 0;
HIT CBC	172	7	if (n0 < n1)	172	7	if (n0 < n1)
HIT CBC	173	1	return -1;	173	1	return -1;
HIT CBC	174	6	return 1;	174	6	return 1;
	175		}	175		}
	176			176
	177		void	177		void
HIT CBC	178	304	ci_digest_encoded(	178	304	ci_digest_encoded(
	179		core::string_view s,	179		core::string_view s,
	180		fnv_1a& hasher) noexcept	180		fnv_1a& hasher) noexcept
	181		{	181		{
HIT CBC	182	304	char c = 0;	182	304	char c = 0;
HIT CBC	183	304	std::size_t n = 0;	183	304	std::size_t n = 0;
HIT CBC	184	2366	while(!s.empty())	184	2366	while(!s.empty())
	185		{	185		{
HIT CBC	186	2062	pop_encoded_front(s, c, n);	186	2062	pop_encoded_front(s, c, n);
HIT CBC	187	2062	c = grammar::to_lower(c);	187	2062	c = grammar::to_lower(c);
HIT CBC	188	2062	hasher.put(c);	188	2062	hasher.put(c);
	189		}	189		}
HIT CBC	190	304	}	190	304	}
	191			191
	192		int	192		int
HIT CBC	193	46	compare(	193	46	compare(
	194		core::string_view lhs,	194		core::string_view lhs,
	195		core::string_view rhs) noexcept	195		core::string_view rhs) noexcept
	196		{	196		{
HIT CBC	197	46	auto rlen = (std::min)(lhs.size(), rhs.size());	197	46	auto rlen = (std::min)(lhs.size(), rhs.size());
HIT CBC	198	104	for (std::size_t i = 0; i < rlen; ++i)	198	104	for (std::size_t i = 0; i < rlen; ++i)
	199		{	199		{
HIT CBC	200	79	char c0 = lhs[i];	200	79	char c0 = lhs[i];
HIT CBC	201	79	char c1 = rhs[i];	201	79	char c1 = rhs[i];
HIT CBC	202	79	if (c0 < c1)	202	79	if (c0 < c1)
HIT CBC	203	13	return -1;	203	13	return -1;
HIT CBC	204	66	if (c1 < c0)	204	66	if (c1 < c0)
HIT CBC	205	8	return 1;	205	8	return 1;
	206		}	206		}
HIT CBC	207	25	if ( lhs.size() == rhs.size() )	207	25	if ( lhs.size() == rhs.size() )
HIT CBC	208	4	return 0;	208	4	return 0;
HIT CBC	209	21	if ( lhs.size() < rhs.size() )	209	21	if ( lhs.size() < rhs.size() )
HIT CBC	210	8	return -1;	210	8	return -1;
HIT CBC	211	13	return 1;	211	13	return 1;
	212		}	212		}
	213			213
	214		int	214		int
HIT CBC	215	220	ci_compare(	215	220	ci_compare(
	216		core::string_view lhs,	216		core::string_view lhs,
	217		core::string_view rhs) noexcept	217		core::string_view rhs) noexcept
	218		{	218		{
HIT CBC	219	220	auto rlen = (std::min)(lhs.size(), rhs.size());	219	220	auto rlen = (std::min)(lhs.size(), rhs.size());
HIT CBC	220	1125	for (std::size_t i = 0; i < rlen; ++i)	220	1125	for (std::size_t i = 0; i < rlen; ++i)
	221		{	221		{
HIT CBC	222	912	char c0 = grammar::to_lower(lhs[i]);	222	912	char c0 = grammar::to_lower(lhs[i]);
HIT CBC	223	912	char c1 = grammar::to_lower(rhs[i]);	223	912	char c1 = grammar::to_lower(rhs[i]);
HIT CBC	224	912	if (c0 < c1)	224	912	if (c0 < c1)
HIT CBC	225	6	return -1;	225	6	return -1;
HIT CBC	226	906	if (c1 < c0)	226	906	if (c1 < c0)
HIT CBC	227	1	return 1;	227	1	return 1;
	228		}	228		}
HIT CBC	229	213	if ( lhs.size() == rhs.size() )	229	213	if ( lhs.size() == rhs.size() )
HIT CBC	230	205	return 0;	230	205	return 0;
HIT CBC	231	8	if ( lhs.size() < rhs.size() )	231	8	if ( lhs.size() < rhs.size() )
HIT CBC	232	6	return -1;	232	6	return -1;
HIT CBC	233	2	return 1;	233	2	return 1;
	234		}	234		}
	235			235
	236		void	236		void
HIT CBC	237	304	ci_digest(	237	304	ci_digest(
	238		core::string_view s,	238		core::string_view s,
	239		fnv_1a& hasher) noexcept	239		fnv_1a& hasher) noexcept
	240		{	240		{
HIT CBC	241	1034	for (char c: s)	241	1034	for (char c: s)
	242		{	242		{
HIT CBC	243	730	c = grammar::to_lower(c);	243	730	c = grammar::to_lower(c);
HIT CBC	244	730	hasher.put(c);	244	730	hasher.put(c);
	245		}	245		}
HIT CBC	246	304	}	246	304	}
	247			247
	248		/* Check if a string ends with the specified suffix (decoded comparison)	248		/* Check if a string ends with the specified suffix (decoded comparison)
	249			249
	250		This function determines if a string ends with the specified suffix	250		This function determines if a string ends with the specified suffix
	251		when the string and suffix are compared after percent-decoding.	251		when the string and suffix are compared after percent-decoding.
	252			252
	253		@param str The string to check (percent-encoded)	253		@param str The string to check (percent-encoded)
	254		@param suffix The suffix to check for (percent-decoded)	254		@param suffix The suffix to check for (percent-decoded)
	255		@return The number of encoded chars consumed in the string	255		@return The number of encoded chars consumed in the string
	256		*/	256		*/
	257		std::size_t	257		std::size_t
HIT CBC	258	2136	path_ends_with(	258	2136	path_ends_with(
	259		core::string_view str,	259		core::string_view str,
	260		core::string_view suffix) noexcept	260		core::string_view suffix) noexcept
	261		{	261		{
HIT CBC	262	2136	BOOST_ASSERT(!str.empty());	262	2136	BOOST_ASSERT(!str.empty());
HIT CBC	263	2136	BOOST_ASSERT(!suffix.empty());	263	2136	BOOST_ASSERT(!suffix.empty());
HIT CBC	264	2136	BOOST_ASSERT(!suffix.contains("%2F"));	264	2136	BOOST_ASSERT(!suffix.contains("%2F"));
HIT CBC	265	2136	BOOST_ASSERT(!suffix.contains("%2f"));	265	2136	BOOST_ASSERT(!suffix.contains("%2f"));
HIT CBC	266	5848	auto consume_last = [](	266	5848	auto consume_last = [](
	267		core::string_view::iterator& it,	267		core::string_view::iterator& it,
	268		core::string_view::iterator& end,	268		core::string_view::iterator& end,
	269		char& c)	269		char& c)
	270		{	270		{
HIT CBC	271	5848	BOOST_ASSERT(end > it);	271	5848	BOOST_ASSERT(end > it);
HIT CBC	272	5848	BOOST_ASSERT(it != end);	272	5848	BOOST_ASSERT(it != end);
HIT CBC	273	9808	if ((end - it) < 3 \|\|	273	9808	if ((end - it) < 3 \|\|
HIT CBC	274	7920	*(std::prev(end, 3)) != '%')	274	7920	*(std::prev(end, 3)) != '%')
	275		{	275		{
HIT CBC	276	5800	c = *--end;	276	5800	c = *--end;
HIT CBC	277	5800	return false;	277	5800	return false;
	278		}	278		}
HIT CBC	279	96	detail::decode_unsafe(	279	96	detail::decode_unsafe(
	280		&c,	280		&c,
	281		&c + 1,	281		&c + 1,
	282		core::string_view(std::prev(	282		core::string_view(std::prev(
	283		end, 3), 3));	283		end, 3), 3));
HIT CBC	284	48	end -= 3;	284	48	end -= 3;
HIT CBC	285	48	return true;	285	48	return true;
	286		};	286		};
	287			287
HIT CBC	288	2136	auto it0 = str.begin();	288	2136	auto it0 = str.begin();
HIT CBC	289	2136	auto end0 = str.end();	289	2136	auto end0 = str.end();
HIT CBC	290	2136	auto it1 = suffix.begin();	290	2136	auto it1 = suffix.begin();
HIT CBC	291	2136	auto end1 = suffix.end();	291	2136	auto end1 = suffix.end();
HIT CBC	292	2136	char c0 = 0;	292	2136	char c0 = 0;
HIT CBC	293	2136	char c1 = 0;	293	2136	char c1 = 0;
HIT CBC	294	2136	while(	294	2136	while(
HIT CBC	295	3248	it0 < end0 &&	295	3248	it0 < end0 &&
HIT CBC	296	3006	it1 < end1)	296	3006	it1 < end1)
	297		{	297		{
HIT CBC	298	2932	bool const is_encoded = consume_last(it0, end0, c0);	298	2932	bool const is_encoded = consume_last(it0, end0, c0);
	299		// The suffix never contains an encoded slash (%2F), and a decoded	299		// The suffix never contains an encoded slash (%2F), and a decoded
	300		// slash is not equivalent to an encoded slash	300		// slash is not equivalent to an encoded slash
HIT CBC	301	2932	if (is_encoded && c0 == '/')	301	2932	if (is_encoded && c0 == '/')
HIT CBC	302	16	return 0;	302	16	return 0;
HIT CBC	303	2916	consume_last(it1, end1, c1);	303	2916	consume_last(it1, end1, c1);
HIT CBC	304	2916	if (c0 != c1)	304	2916	if (c0 != c1)
HIT CBC	305	1804	return 0;	305	1804	return 0;
	306		}	306		}
HIT CBC	307	316	bool const consumed_suffix = it1 == end1;	307	316	bool const consumed_suffix = it1 == end1;
HIT CBC	308	316	if (consumed_suffix)	308	316	if (consumed_suffix)
	309		{	309		{
HIT CBC	310	110	std::size_t const consumed_encoded = str.end() - end0;	310	110	std::size_t const consumed_encoded = str.end() - end0;
HIT CBC	311	110	return consumed_encoded;	311	110	return consumed_encoded;
	312		}	312		}
HIT CBC	313	206	return 0;	313	206	return 0;
	314		}	314		}
	315			315
	316		std::size_t	316		std::size_t
HIT CBC	317	1065	remove_dot_segments(	317	1065	remove_dot_segments(
	318		char* dest0,	318		char* dest0,
	319		char const* end,	319		char const* end,
	320		core::string_view input) noexcept	320		core::string_view input) noexcept
	321		{	321		{
	322		// 1. The input buffer `s` is initialized with	322		// 1. The input buffer `s` is initialized with
	323		// the now-appended path components and the	323		// the now-appended path components and the
	324		// output buffer `dest0` is initialized to	324		// output buffer `dest0` is initialized to
	325		// the empty string.	325		// the empty string.
HIT CBC	326	1065	char* dest = dest0;	326	1065	char* dest = dest0;
HIT CBC	327	1065	bool const is_absolute = input.starts_with('/');	327	1065	bool const is_absolute = input.starts_with('/');
	328			328
	329		// Step 2 is a loop through 5 production rules:	329		// Step 2 is a loop through 5 production rules:
	330		// https://www.rfc-editor.org/rfc/rfc3986#section-5.2.4	330		// https://www.rfc-editor.org/rfc/rfc3986#section-5.2.4
	331		//	331		//
	332		// There are no transitions between all rules,	332		// There are no transitions between all rules,
	333		// which enables some optimizations.	333		// which enables some optimizations.
	334		//	334		//
	335		// Initial:	335		// Initial:
	336		// - Rule A: handle initial dots	336		// - Rule A: handle initial dots
	337		// If the input buffer begins with a	337		// If the input buffer begins with a
	338		// prefix of "../" or "./", then remove	338		// prefix of "../" or "./", then remove
	339		// that prefix from the input buffer.	339		// that prefix from the input buffer.
	340		// Rule A can only happen at the beginning.	340		// Rule A can only happen at the beginning.
	341		// Errata 4547: Keep "../" in the beginning	341		// Errata 4547: Keep "../" in the beginning
	342		// https://www.rfc-editor.org/errata/eid4547	342		// https://www.rfc-editor.org/errata/eid4547
	343		//	343		//
	344		// Then:	344		// Then:
	345		// - Rule D: ignore a final ".." or "."	345		// - Rule D: ignore a final ".." or "."
	346		// if the input buffer consists only of "."	346		// if the input buffer consists only of "."
	347		// or "..", then remove that from the input	347		// or "..", then remove that from the input
	348		// buffer.	348		// buffer.
	349		// Rule D can only happen after Rule A because:	349		// Rule D can only happen after Rule A because:
	350		// - B and C write "/" to the input	350		// - B and C write "/" to the input
	351		// - E writes "/" to input or returns	351		// - E writes "/" to input or returns
	352		//	352		//
	353		// Then:	353		// Then:
	354		// - Rule B: ignore ".": write "/" to the input	354		// - Rule B: ignore ".": write "/" to the input
	355		// - Rule C: apply "..": remove seg and write "/"	355		// - Rule C: apply "..": remove seg and write "/"
	356		// - Rule E: copy complete segment	356		// - Rule E: copy complete segment
	357		auto append =	357		auto append =
HIT CBC	358	1879	[](char& first, char const last, core::string_view in)	358	1879	[](char& first, char const last, core::string_view in)
	359		{	359		{
	360		// append `in` to `dest`	360		// append `in` to `dest`
HIT CBC	361	1879	BOOST_ASSERT(in.size() <= std::size_t(last - first));	361	1879	BOOST_ASSERT(in.size() <= std::size_t(last - first));
HIT CBC	362	1879	std::memmove(first, in.data(), in.size());	362	1879	std::memmove(first, in.data(), in.size());
HIT CBC	363	1879	first += in.size();	363	1879	first += in.size();
	364		ignore_unused(last);	364		ignore_unused(last);
HIT CBC	365	1879	};	365	1879	};
	366			366
HIT CBC	367	12011	auto dot_starts_with = [](	367	12011	auto dot_starts_with = [](
	368		core::string_view str, core::string_view dots, std::size_t& n)	368		core::string_view str, core::string_view dots, std::size_t& n)
	369		{	369		{
	370		// starts_with for encoded/decoded dots	370		// starts_with for encoded/decoded dots
	371		// or decoded otherwise. return how many	371		// or decoded otherwise. return how many
	372		// chars in str match the dots	372		// chars in str match the dots
HIT CBC	373	12011	n = 0;	373	12011	n = 0;
HIT CBC	374	21036	for (char c: dots)	374	21036	for (char c: dots)
	375		{	375		{
HIT CBC	376	20431	if (str.starts_with(c))	376	20431	if (str.starts_with(c))
	377		{	377		{
HIT CBC	378	9025	str.remove_prefix(1);	378	9025	str.remove_prefix(1);
HIT CBC	379	9025	++n;	379	9025	++n;
HIT CBC	380	9025	continue;	380	9025	continue;
	381		}	381		}
	382			382
	383		// In the general case, we would need to	383		// In the general case, we would need to
	384		// check if the next char is an encoded	384		// check if the next char is an encoded
	385		// dot.	385		// dot.
	386		// However, an encoded dot in `str`	386		// However, an encoded dot in `str`
	387		// would have already been decoded in	387		// would have already been decoded in
	388		// url_base::normalize_path().	388		// url_base::normalize_path().
	389		// This needs to be undone if	389		// This needs to be undone if
	390		// `remove_dot_segments` is used in a	390		// `remove_dot_segments` is used in a
	391		// different context.	391		// different context.
	392		// if (str.size() > 2 &&	392		// if (str.size() > 2 &&
	393		// c == '.'	393		// c == '.'
	394		// &&	394		// &&
	395		// str[0] == '%' &&	395		// str[0] == '%' &&
	396		// str[1] == '2' &&	396		// str[1] == '2' &&
	397		// (str[2] == 'e' \|\|	397		// (str[2] == 'e' \|\|
	398		// str[2] == 'E'))	398		// str[2] == 'E'))
	399		// {	399		// {
	400		// str.remove_prefix(3);	400		// str.remove_prefix(3);
	401		// n += 3;	401		// n += 3;
	402		// continue;	402		// continue;
	403		// }	403		// }
	404			404
HIT CBC	405	11406	n = 0;	405	11406	n = 0;
HIT CBC	406	11406	return false;	406	11406	return false;
	407		}	407		}
HIT CBC	408	605	return true;	408	605	return true;
	409		};	409		};
	410			410
HIT CBC	411	6016	auto dot_equal = [&dot_starts_with](	411	6016	auto dot_equal = [&dot_starts_with](
	412		core::string_view str, core::string_view dots)	412		core::string_view str, core::string_view dots)
	413		{	413		{
HIT CBC	414	6016	std::size_t n = 0;	414	6016	std::size_t n = 0;
HIT CBC	415	6016	dot_starts_with(str, dots, n);	415	6016	dot_starts_with(str, dots, n);
HIT CBC	416	6016	return n == str.size();	416	6016	return n == str.size();
HIT CBC	417	1065	};	417	1065	};
	418			418
	419		// Rule A	419		// Rule A
	420		std::size_t n;	420		std::size_t n;
HIT CBC	421	1086	while (!input.empty())	421	1086	while (!input.empty())
	422		{	422		{
HIT CBC	423	960	if (dot_starts_with(input, "../", n))	423	960	if (dot_starts_with(input, "../", n))
	424		{	424		{
	425		// Errata 4547	425		// Errata 4547
HIT CBC	426	4	append(dest, end, "../");	426	4	append(dest, end, "../");
HIT CBC	427	4	input.remove_prefix(n);	427	4	input.remove_prefix(n);
HIT CBC	428	4	continue;	428	4	continue;
	429		}	429		}
HIT CBC	430	956	else if (!dot_starts_with(input, "./", n))	430	956	else if (!dot_starts_with(input, "./", n))
	431		{	431		{
HIT CBC	432	939	break;	432	939	break;
	433		}	433		}
HIT CBC	434	17	input.remove_prefix(n);	434	17	input.remove_prefix(n);
	435		}	435		}
	436			436
	437		// Rule D	437		// Rule D
HIT CBC	438	1065	if( dot_equal(input, "."))	438	1065	if( dot_equal(input, "."))
	439		{	439		{
HIT CBC	440	127	input = {};	440	127	input = {};
	441		}	441		}
HIT CBC	442	938	else if( dot_equal(input, "..") )	442	938	else if( dot_equal(input, "..") )
	443		{	443		{
	444		// Errata 4547	444		// Errata 4547
HIT CBC	445	3	append(dest, end, "..");	445	3	append(dest, end, "..");
HIT CBC	446	3	input = {};	446	3	input = {};
	447		}	447		}
	448			448
	449		// 2. While the input buffer is not empty,	449		// 2. While the input buffer is not empty,
	450		// loop as follows:	450		// loop as follows:
HIT CBC	451	3088	while (!input.empty())	451	3088	while (!input.empty())
	452		{	452		{
	453		// Rule B	453		// Rule B
HIT CBC	454	2062	bool const is_dot_seg = dot_starts_with(input, "/./", n);	454	2062	bool const is_dot_seg = dot_starts_with(input, "/./", n);
HIT CBC	455	2062	if (is_dot_seg)	455	2062	if (is_dot_seg)
	456		{	456		{
HIT CBC	457	37	input.remove_prefix(n - 1);	457	37	input.remove_prefix(n - 1);
HIT CBC	458	37	continue;	458	37	continue;
	459		}	459		}
	460			460
HIT CBC	461	2025	bool const is_final_dot_seg = dot_equal(input, "/.");	461	2025	bool const is_final_dot_seg = dot_equal(input, "/.");
HIT CBC	462	2025	if (is_final_dot_seg)	462	2025	if (is_final_dot_seg)
	463		{	463		{
	464		// We can't remove "." from a core::string_view	464		// We can't remove "." from a core::string_view
	465		// So what we do here is equivalent to	465		// So what we do here is equivalent to
	466		// replacing s with '/' as required	466		// replacing s with '/' as required
	467		// in Rule B and executing the next	467		// in Rule B and executing the next
	468		// iteration, which would append this	468		// iteration, which would append this
	469		// '/' to the output, as required by	469		// '/' to the output, as required by
	470		// Rule E	470		// Rule E
HIT CBC	471	8	append(dest, end, input.substr(0, 1));	471	8	append(dest, end, input.substr(0, 1));
HIT CBC	472	8	input = {};	472	8	input = {};
HIT CBC	473	8	break;	473	8	break;
	474		}	474		}
	475			475
	476		// Rule C	476		// Rule C
HIT CBC	477	2017	bool const is_dotdot_seg = dot_starts_with(input, "/../", n);	477	2017	bool const is_dotdot_seg = dot_starts_with(input, "/../", n);
HIT CBC	478	2017	if (is_dotdot_seg)	478	2017	if (is_dotdot_seg)
	479		{	479		{
HIT CBC	480	215	core::string_view cur_out(dest0, dest - dest0);	480	215	core::string_view cur_out(dest0, dest - dest0);
HIT CBC	481	215	std::size_t p = cur_out.find_last_of('/');	481	215	std::size_t p = cur_out.find_last_of('/');
HIT CBC	482	215	bool const has_multiple_segs = p != core::string_view::npos;	482	215	bool const has_multiple_segs = p != core::string_view::npos;
HIT CBC	483	215	if (has_multiple_segs)	483	215	if (has_multiple_segs)
	484		{	484		{
	485		// output has multiple segments	485		// output has multiple segments
	486		// "erase" [p, end] if not "/.."	486		// "erase" [p, end] if not "/.."
HIT CBC	487	144	core::string_view last_seg(dest0 + p, dest - (dest0 + p));	487	144	core::string_view last_seg(dest0 + p, dest - (dest0 + p));
HIT CBC	488	144	bool const prev_is_dotdot_seg = dot_equal(last_seg, "/..");	488	144	bool const prev_is_dotdot_seg = dot_equal(last_seg, "/..");
HIT CBC	489	144	if (!prev_is_dotdot_seg)	489	144	if (!prev_is_dotdot_seg)
	490		{	490		{
HIT CBC	491	133	dest = dest0 + p;	491	133	dest = dest0 + p;
	492		}	492		}
	493		else	493		else
	494		{	494		{
HIT CBC	495	11	append(dest, end, "/..");	495	11	append(dest, end, "/..");
	496		}	496		}
	497		}	497		}
HIT CBC	498	71	else if (dest0 != dest)	498	71	else if (dest0 != dest)
	499		{	499		{
	500		// Only one segment in the output: remove it	500		// Only one segment in the output: remove it

1

//

1

//

2

3

4

//

4

//

5

// Distributed under the Boost Software License, Version 1.0. (See accompanying

5

// Distributed under the Boost Software License, Version 1.0. (See accompanying

6

// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

6

// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

7

//

7

//

8

// Official repository: https://github.com/boostorg/url

8

// Official repository: https://github.com/boostorg/url

9

//

9

//

10

11

12

#include <boost/url/detail/config.hpp>

12

#include <boost/url/detail/config.hpp>

13

#include <boost/url/decode_view.hpp>

13

#include <boost/url/decode_view.hpp>

14

#include <boost/url/detail/decode.hpp>

14

#include <boost/url/detail/decode.hpp>

15

#include <boost/url/segments_encoded_view.hpp>

15

#include <boost/url/segments_encoded_view.hpp>

16

#include <boost/url/grammar/ci_string.hpp>

16

#include <boost/url/grammar/ci_string.hpp>

17

#include <boost/url/grammar/lut_chars.hpp>

17

#include <boost/url/grammar/lut_chars.hpp>

18

#include <boost/assert.hpp>

18

#include <boost/assert.hpp>

19

#include <boost/core/ignore_unused.hpp>

19

#include <boost/core/ignore_unused.hpp>

20

#include <cstring>

20

#include <cstring>

21

#include <boost/url/detail/normalize.hpp>

21

#include <boost/url/detail/normalize.hpp>

22

23

namespace boost {

23

namespace boost {

24

namespace urls {

24

namespace urls {

25

namespace detail {

25

namespace detail {

26

27

void

27

void

HIT CBC

28

7772

pop_encoded_front(

28

7772

pop_encoded_front(

29

core::string_view& s,

29

core::string_view& s,

30

char& c,

30

char& c,

31

std::size_t& n) noexcept

31

std::size_t& n) noexcept

32

{

32

{

HIT CBC

33

7772

if(s.front() != '%')

33

7772

if(s.front() != '%')

34

{

34

{

HIT CBC

35

7620

c = s.front();

35

7620

c = s.front();

HIT CBC

36

7620

s.remove_prefix(1);

36

7620

s.remove_prefix(1);

37

}

37

}

38

else

38

else

39

{

39

{

HIT CBC

40

152

detail::decode_unsafe(

40

152

detail::decode_unsafe(

41

&c,

41

&c,

42

&c + 1,

42

&c + 1,

43

s.substr(0, 3));

43

s.substr(0, 3));

HIT CBC

44

152

s.remove_prefix(3);

44

152

s.remove_prefix(3);

45

}

45

}

HIT CBC

46

7772

++n;

46

7772

++n;

HIT CBC

47

7772

}

47

7772

}

48

49

int

49

int

HIT CBC

50

64

compare_encoded(

50

64

compare_encoded(

51

core::string_view lhs,

51

core::string_view lhs,

52

core::string_view rhs) noexcept

52

core::string_view rhs) noexcept

53

{

53

{

HIT CBC

54

64

std::size_t n0 = 0;

54

64

std::size_t n0 = 0;

HIT CBC

55

64

std::size_t n1 = 0;

55

64

std::size_t n1 = 0;

HIT CBC

56

64

char c0 = 0;

56

64

char c0 = 0;

HIT CBC

57

64

char c1 = 0;

57

64

char c1 = 0;

HIT CBC

58

64

while(

58

64

while(

HIT CBC

59

486

!lhs.empty() &&

59

486

!lhs.empty() &&

HIT CBC

60

228

!rhs.empty())

60

228

!rhs.empty())

61

{

61

{

HIT CBC

62

215

pop_encoded_front(lhs, c0, n0);

62

215

pop_encoded_front(lhs, c0, n0);

HIT CBC

63

215

pop_encoded_front(rhs, c1, n1);

63

215

pop_encoded_front(rhs, c1, n1);

HIT CBC

64

215

if (c0 < c1)

64

215

if (c0 < c1)

HIT CBC

65

18

return -1;

65

18

return -1;

HIT CBC

66

197

if (c1 < c0)

66

197

if (c1 < c0)

HIT CBC

67

3

return 1;

67

3

return 1;

68

}

68

}

HIT CBC

69

43

n0 += detail::decode_bytes_unsafe(lhs);

69

43

n0 += detail::decode_bytes_unsafe(lhs);

HIT CBC

70

43

n1 += detail::decode_bytes_unsafe(rhs);

70

43

n1 += detail::decode_bytes_unsafe(rhs);

HIT CBC

71

43

if (n0 == n1)

71

43

if (n0 == n1)

HIT CBC

72

22

return 0;

72

22

return 0;

HIT CBC

73

21

if (n0 < n1)

73

21

if (n0 < n1)

HIT CBC

74

8

return -1;

74

8

return -1;

HIT CBC

75

13

return 1;

75

13

return 1;

76

}

76

}

77

78

int

78

int

HIT CBC

79

28

compare_encoded_query(

79

28

compare_encoded_query(

80

core::string_view lhs,

80

core::string_view lhs,

81

core::string_view rhs) noexcept

81

core::string_view rhs) noexcept

82

{

82

{

83

static constexpr

83

static constexpr

84

grammar::lut_chars

84

grammar::lut_chars

85

query_compare_exception_lut = "&=+";

85

query_compare_exception_lut = "&=+";

86

HIT CBC

87

28

std::size_t n0 = 0;

87

28

std::size_t n0 = 0;

HIT CBC

88

28

std::size_t n1 = 0;

88

28

std::size_t n1 = 0;

HIT CBC

89

28

char c0 = 0;

89

28

char c0 = 0;

HIT CBC

90

28

char c1 = 0;

90

28

char c1 = 0;

HIT CBC

91

28

while(

91

28

while(

HIT CBC

92

254

!lhs.empty() &&

92

254

!lhs.empty() &&

HIT CBC

93

122

!rhs.empty())

93

122

!rhs.empty())

94

{

94

{

HIT CBC

95

121

bool const lhs_was_decoded = lhs.front() != '%';

95

121

bool const lhs_was_decoded = lhs.front() != '%';

HIT CBC

96

121

bool const rhs_was_decoded = rhs.front() != '%';

96

121

bool const rhs_was_decoded = rhs.front() != '%';

HIT CBC

97

121

pop_encoded_front(lhs, c0, n0);

97

121

pop_encoded_front(lhs, c0, n0);

HIT CBC

98

121

pop_encoded_front(rhs, c1, n1);

98

121

pop_encoded_front(rhs, c1, n1);

HIT CBC

99

121

if (c0 < c1)

99

121

if (c0 < c1)

HIT CBC

100

2

return -1;

100

2

return -1;

HIT CBC

101

119

if (c1 < c0)

101

119

if (c1 < c0)

HIT CBC

102

12

return 1;

102

12

return 1;

103

// The decoded chars are the same, but

103

// The decoded chars are the same, but

104

// are these query exceptions that have

104

// are these query exceptions that have

105

// different meanings when decoded?

105

// different meanings when decoded?

HIT CBC

106

107

if (query_compare_exception_lut(c0))

106

107

if (query_compare_exception_lut(c0))

107

{

107

{

108

// If so, we only continue if both

108

// If so, we only continue if both

109

// chars were decoded or encoded

109

// chars were decoded or encoded

110

// the same way.

110

// the same way.

HIT CBC

111

40

if (lhs_was_decoded == rhs_was_decoded)

111

40

if (lhs_was_decoded == rhs_was_decoded)

HIT CBC

112

37

continue;

112

37

continue;

113

// Otherwise, we return a value != 0

113

// Otherwise, we return a value != 0

114

// because these chars are not equal.

114

// because these chars are not equal.

115

// If rhs was the decoded one, it contains

115

// If rhs was the decoded one, it contains

116

// an ascii char higher than '%'

116

// an ascii char higher than '%'

HIT CBC

117

3

if (rhs_was_decoded)

117

3

if (rhs_was_decoded)

HIT CBC

118

2

return -1;

118

2

return -1;

119

else

119

else

HIT CBC

120

1

return 1;

120

1

return 1;

121

}

121

}

122

}

122

}

HIT CBC

123

11

n0 += detail::decode_bytes_unsafe(lhs);

123

11

n0 += detail::decode_bytes_unsafe(lhs);

HIT CBC

124

11

n1 += detail::decode_bytes_unsafe(rhs);

124

11

n1 += detail::decode_bytes_unsafe(rhs);

HIT CBC

125

11

if (n0 == n1)

125

11

if (n0 == n1)

HIT CBC

126

9

return 0;

126

9

return 0;

HIT CBC

127

2

if (n0 < n1)

127

2

if (n0 < n1)

HIT CBC

128

1

return -1;

128

1

return -1;

HIT CBC

129

1

return 1;

129

1

return 1;

130

}

130

}

131

132

void

132

void

HIT CBC

133

1216

digest_encoded(

133

1216

digest_encoded(

134

core::string_view s,

134

core::string_view s,

135

fnv_1a& hasher) noexcept

135

fnv_1a& hasher) noexcept

136

{

136

{

HIT CBC

137

1216

char c = 0;

137

1216

char c = 0;

HIT CBC

138

1216

std::size_t n = 0;

138

1216

std::size_t n = 0;

HIT CBC

139

1724

while(!s.empty())

139

1724

while(!s.empty())

140

{

140

{

HIT CBC

141

508

pop_encoded_front(s, c, n);

141

508

pop_encoded_front(s, c, n);

HIT CBC

142

508

hasher.put(c);

142

508

hasher.put(c);

143

}

143

}

HIT CBC

144

1216

}

144

1216

}

145

146

int

146

int

HIT CBC

147

180

ci_compare_encoded(

147

180

ci_compare_encoded(

148

core::string_view lhs,

148

core::string_view lhs,

149

core::string_view rhs) noexcept

149

core::string_view rhs) noexcept

150

{

150

{

HIT CBC

151

180

std::size_t n0 = 0;

151

180

std::size_t n0 = 0;

HIT CBC

152

180

std::size_t n1 = 0;

152

180

std::size_t n1 = 0;

HIT CBC

153

180

char c0 = 0;

153

180

char c0 = 0;

HIT CBC

154

180

char c1 = 0;

154

180

char c1 = 0;

HIT CBC

155

180

while (

155

180

while (

HIT CBC

156

4704

!lhs.empty() &&

156

4704

!lhs.empty() &&

HIT CBC

157

2271

!rhs.empty())

157

2271

!rhs.empty())

158

{

158

{

HIT CBC

159

2265

pop_encoded_front(lhs, c0, n0);

159

2265

pop_encoded_front(lhs, c0, n0);

HIT CBC

160

2265

pop_encoded_front(rhs, c1, n1);

160

2265

pop_encoded_front(rhs, c1, n1);

HIT CBC

161

2265

c0 = grammar::to_lower(c0);

161

2265

c0 = grammar::to_lower(c0);

HIT CBC

162

2265

c1 = grammar::to_lower(c1);

162

2265

c1 = grammar::to_lower(c1);

HIT CBC

163

2265

if (c0 < c1)

163

2265

if (c0 < c1)

HIT CBC

164

10

return -1;

164

10

return -1;

HIT CBC

165

2255

if (c1 < c0)

165

2255

if (c1 < c0)

HIT CBC

166

2

return 1;

166

2

return 1;

167

}

167

}

HIT CBC

168

n0 += detail::decode_bytes_unsafe(lhs);

168

n0 += detail::decode_bytes_unsafe(lhs);

HIT CBC

169

168

n1 += detail::decode_bytes_unsafe(rhs);

169

168

n1 += detail::decode_bytes_unsafe(rhs);

HIT CBC

170

168

if (n0 == n1)

170

168

if (n0 == n1)

HIT CBC

171

161

return 0;

171

161

return 0;

HIT CBC

172

7

if (n0 < n1)

172

7

if (n0 < n1)

HIT CBC

173

1

return -1;

173

1

return -1;

HIT CBC

174

6

return 1;

174

6

return 1;

175

}

175

}

176

177

void

177

void

HIT CBC

178

304

ci_digest_encoded(

178

304

ci_digest_encoded(

179

core::string_view s,

179

core::string_view s,

180

fnv_1a& hasher) noexcept

180

fnv_1a& hasher) noexcept

181

{

181

{

HIT CBC

182

304

char c = 0;

182

304

char c = 0;

HIT CBC

183

304

std::size_t n = 0;

183

304

std::size_t n = 0;

HIT CBC

184

2366

while(!s.empty())

184

2366

while(!s.empty())

185

{

185

{

HIT CBC

186

2062

pop_encoded_front(s, c, n);

186

2062

pop_encoded_front(s, c, n);

HIT CBC

187

2062

c = grammar::to_lower(c);

187

2062

c = grammar::to_lower(c);

HIT CBC

188

2062

hasher.put(c);

188

2062

hasher.put(c);

189

}

189

}

HIT CBC

190

304

}

190

304

}

191

192

int

192

int

HIT CBC

193

46

compare(

193

46

compare(

194

core::string_view lhs,

194

core::string_view lhs,

195

core::string_view rhs) noexcept

195

core::string_view rhs) noexcept

196

{

196

{

HIT CBC

197

46

auto rlen = (std::min)(lhs.size(), rhs.size());

197

46

auto rlen = (std::min)(lhs.size(), rhs.size());

HIT CBC

198

104

for (std::size_t i = 0; i < rlen; ++i)

198

104

for (std::size_t i = 0; i < rlen; ++i)

199

{

199

{

HIT CBC

200

79

char c0 = lhs[i];

200

79

char c0 = lhs[i];

HIT CBC

201

79

char c1 = rhs[i];

201

79

char c1 = rhs[i];

HIT CBC

202

79

if (c0 < c1)

202

79

if (c0 < c1)

HIT CBC

203

13

return -1;

203

13

return -1;

HIT CBC

204

66

if (c1 < c0)

204

66

if (c1 < c0)

HIT CBC

205

8

return 1;

205

8

return 1;

206

}

206

}

HIT CBC

207

25

if ( lhs.size() == rhs.size() )

207

25

if ( lhs.size() == rhs.size() )

HIT CBC

208

4

return 0;

208

4

return 0;

HIT CBC

209

21

if ( lhs.size() < rhs.size() )

209

21

if ( lhs.size() < rhs.size() )

HIT CBC

210

8

return -1;

210

8

return -1;

HIT CBC

211

13

return 1;

211

13

return 1;

212

}

212

}

213

214

int

214

int

HIT CBC

215

220

ci_compare(

215

220

ci_compare(

216

core::string_view lhs,

216

core::string_view lhs,

217

core::string_view rhs) noexcept

217

core::string_view rhs) noexcept

218

{

218

{

HIT CBC

219

220

auto rlen = (std::min)(lhs.size(), rhs.size());

219

220

auto rlen = (std::min)(lhs.size(), rhs.size());

HIT CBC

220

1125

for (std::size_t i = 0; i < rlen; ++i)

220

1125

for (std::size_t i = 0; i < rlen; ++i)

221

{

221

{

HIT CBC

222

912

char c0 = grammar::to_lower(lhs[i]);

222

912

char c0 = grammar::to_lower(lhs[i]);

HIT CBC

223

912

char c1 = grammar::to_lower(rhs[i]);

223

912

char c1 = grammar::to_lower(rhs[i]);

HIT CBC

224

912

if (c0 < c1)

224

912

if (c0 < c1)

HIT CBC

225

6

return -1;

225

6

return -1;

HIT CBC

226

906

if (c1 < c0)

226

906

if (c1 < c0)

HIT CBC

227

1

return 1;

227

1

return 1;

228

}

228

}

HIT CBC

229

213

if ( lhs.size() == rhs.size() )

229

213

if ( lhs.size() == rhs.size() )

HIT CBC

230

205

return 0;

230

205

return 0;

HIT CBC

231

8

if ( lhs.size() < rhs.size() )

231

8

if ( lhs.size() < rhs.size() )

HIT CBC

232

6

return -1;

232

6

return -1;

HIT CBC

233

2

return 1;

233

2

return 1;

234

}

234

}

235

236

void

236

void

HIT CBC

237

304

ci_digest(

237

304

ci_digest(

238

core::string_view s,

238

core::string_view s,

239

fnv_1a& hasher) noexcept

239

fnv_1a& hasher) noexcept

240

{

240

{

HIT CBC

241

1034

for (char c: s)

241

1034

for (char c: s)

242

{

242

{

HIT CBC

243

730

c = grammar::to_lower(c);

243

730

c = grammar::to_lower(c);

HIT CBC

244

730

hasher.put(c);

244

730

hasher.put(c);

245

}

245

}

HIT CBC

246

304

}

246

304

}

247

248

/* Check if a string ends with the specified suffix (decoded comparison)

248

/* Check if a string ends with the specified suffix (decoded comparison)

249

250

This function determines if a string ends with the specified suffix

250

This function determines if a string ends with the specified suffix

251

when the string and suffix are compared after percent-decoding.

251

when the string and suffix are compared after percent-decoding.

252

253

@param str The string to check (percent-encoded)

253

@param str The string to check (percent-encoded)

254

@param suffix The suffix to check for (percent-decoded)

254

@param suffix The suffix to check for (percent-decoded)

255

@return The number of encoded chars consumed in the string

255

@return The number of encoded chars consumed in the string

256

*/

256

*/

257

std::size_t

257

std::size_t

HIT CBC

258

2136

path_ends_with(

258

2136

path_ends_with(

259

core::string_view str,

259

core::string_view str,

260

core::string_view suffix) noexcept

260

core::string_view suffix) noexcept

261

{

261

{

HIT CBC

262

2136

BOOST_ASSERT(!str.empty());

262

2136

BOOST_ASSERT(!str.empty());

HIT CBC

263

2136

BOOST_ASSERT(!suffix.empty());

263

2136

BOOST_ASSERT(!suffix.empty());

HIT CBC

264

2136

BOOST_ASSERT(!suffix.contains("%2F"));

264

2136

BOOST_ASSERT(!suffix.contains("%2F"));

HIT CBC

265

2136

BOOST_ASSERT(!suffix.contains("%2f"));

265

2136

BOOST_ASSERT(!suffix.contains("%2f"));

HIT CBC

266

5848

auto consume_last = [](

266

5848

auto consume_last = [](

267

core::string_view::iterator& it,

267

core::string_view::iterator& it,

268

core::string_view::iterator& end,

268

core::string_view::iterator& end,

269

char& c)

269

char& c)

270

{

270

{

HIT CBC

271

5848

BOOST_ASSERT(end > it);

271

5848

BOOST_ASSERT(end > it);

HIT CBC

272

5848

BOOST_ASSERT(it != end);

272

5848

BOOST_ASSERT(it != end);

HIT CBC

273

9808

if ((end - it) < 3 ||

273

9808

if ((end - it) < 3 ||

HIT CBC

274

7920

*(std::prev(end, 3)) != '%')

274

7920

*(std::prev(end, 3)) != '%')

275

{

275

{

HIT CBC

276

5800

c = *--end;

276

5800

c = *--end;

HIT CBC

277

5800

return false;

277

5800

return false;

278

}

278

}

HIT CBC

279

96

detail::decode_unsafe(

279

96

detail::decode_unsafe(

280

&c,

280

&c,

281

&c + 1,

281

&c + 1,

282

core::string_view(std::prev(

282

core::string_view(std::prev(

283

end, 3), 3));

283

end, 3), 3));

HIT CBC

284

48

end -= 3;

284

48

end -= 3;

HIT CBC

285

48

return true;

285

48

return true;

286

};

286

};

287

HIT CBC

288

2136

auto it0 = str.begin();

288

2136

auto it0 = str.begin();

HIT CBC

289

2136

auto end0 = str.end();

289

2136

auto end0 = str.end();

HIT CBC

290

2136

auto it1 = suffix.begin();

290

2136

auto it1 = suffix.begin();

HIT CBC

291

2136

auto end1 = suffix.end();

291

2136

auto end1 = suffix.end();

HIT CBC

292

2136

char c0 = 0;

292

2136

char c0 = 0;

HIT CBC

293

2136

char c1 = 0;

293

2136

char c1 = 0;

HIT CBC

294

2136

while(

294

2136

while(

HIT CBC

295

3248

it0 < end0 &&

295

3248

it0 < end0 &&

HIT CBC

296

3006

it1 < end1)

296

3006

it1 < end1)

297

{

297

{

HIT CBC

298

2932

bool const is_encoded = consume_last(it0, end0, c0);

298

2932

bool const is_encoded = consume_last(it0, end0, c0);

299

// The suffix never contains an encoded slash (%2F), and a decoded

299

// The suffix never contains an encoded slash (%2F), and a decoded

300

// slash is not equivalent to an encoded slash

300

// slash is not equivalent to an encoded slash

HIT CBC

301

2932

if (is_encoded && c0 == '/')

301

2932

if (is_encoded && c0 == '/')

HIT CBC

302

16

return 0;

302

16

return 0;

HIT CBC

303

2916

consume_last(it1, end1, c1);

303

2916

consume_last(it1, end1, c1);

HIT CBC

304

2916

if (c0 != c1)

304

2916

if (c0 != c1)

HIT CBC

305

1804

return 0;

305

1804

return 0;

306

}

306

}

HIT CBC

307

316

bool const consumed_suffix = it1 == end1;

307

316

bool const consumed_suffix = it1 == end1;

HIT CBC

308

316

if (consumed_suffix)

308

316

if (consumed_suffix)

309

{

309

{

HIT CBC

310

110

std::size_t const consumed_encoded = str.end() - end0;

310

110

std::size_t const consumed_encoded = str.end() - end0;

HIT CBC

311

110

return consumed_encoded;

311

110

return consumed_encoded;

312

}

312

}

HIT CBC

313

206

return 0;

313

206

return 0;

314

}

314

}

315

316

std::size_t

316

std::size_t

HIT CBC

317

1065

remove_dot_segments(

317

1065

remove_dot_segments(

318

char* dest0,

318

char* dest0,

319

char const* end,

319

char const* end,

320

core::string_view input) noexcept

320

core::string_view input) noexcept

321

{

321

{

322

// 1. The input buffer `s` is initialized with

322

// 1. The input buffer `s` is initialized with

323

// the now-appended path components and the

323

// the now-appended path components and the

324

// output buffer `dest0` is initialized to

324

// output buffer `dest0` is initialized to

325

// the empty string.

325

// the empty string.

HIT CBC

326

1065

char* dest = dest0;

326

1065

char* dest = dest0;

HIT CBC

327

1065

bool const is_absolute = input.starts_with('/');

327

1065

bool const is_absolute = input.starts_with('/');

328

329

// Step 2 is a loop through 5 production rules:

329

// Step 2 is a loop through 5 production rules:

330

// https://www.rfc-editor.org/rfc/rfc3986#section-5.2.4

330

// https://www.rfc-editor.org/rfc/rfc3986#section-5.2.4

331

//

331

//

332

// There are no transitions between all rules,

332

// There are no transitions between all rules,

333

// which enables some optimizations.

333

// which enables some optimizations.

334

//

334

//

335

// Initial:

335

// Initial:

336

// - Rule A: handle initial dots

336

// - Rule A: handle initial dots

337

// If the input buffer begins with a

337

// If the input buffer begins with a

338

// prefix of "../" or "./", then remove

338

// prefix of "../" or "./", then remove

339

// that prefix from the input buffer.

339

// that prefix from the input buffer.

340

// Rule A can only happen at the beginning.

340

// Rule A can only happen at the beginning.

341

// Errata 4547: Keep "../" in the beginning

341

// Errata 4547: Keep "../" in the beginning

342

// https://www.rfc-editor.org/errata/eid4547

342

// https://www.rfc-editor.org/errata/eid4547

343

//

343

//

344

// Then:

344

// Then:

345

// - Rule D: ignore a final ".." or "."

345

// - Rule D: ignore a final ".." or "."

346

// if the input buffer consists only of "."

346

// if the input buffer consists only of "."

347

// or "..", then remove that from the input

347

// or "..", then remove that from the input

348

// buffer.

348

// buffer.

349

// Rule D can only happen after Rule A because:

349

// Rule D can only happen after Rule A because:

350

// - B and C write "/" to the input

350

// - B and C write "/" to the input

351

// - E writes "/" to input or returns

351

// - E writes "/" to input or returns

352

//

352

//

353

// Then:

353

// Then:

354

// - Rule B: ignore ".": write "/" to the input

354

// - Rule B: ignore ".": write "/" to the input

355

// - Rule C: apply "..": remove seg and write "/"

355

// - Rule C: apply "..": remove seg and write "/"

356

// - Rule E: copy complete segment

356

// - Rule E: copy complete segment

357

auto append =

357

auto append =

HIT CBC

358

1879

[](char*& first, char const* last, core::string_view in)

358

1879

[](char*& first, char const* last, core::string_view in)

359

{

359

{

360

// append `in` to `dest`

360

// append `in` to `dest`

HIT CBC

361

1879

BOOST_ASSERT(in.size() <= std::size_t(last - first));

361

1879

BOOST_ASSERT(in.size() <= std::size_t(last - first));

HIT CBC

362

1879

std::memmove(first, in.data(), in.size());

362

1879

std::memmove(first, in.data(), in.size());

HIT CBC

363

1879

first += in.size();

363

1879

first += in.size();

364

ignore_unused(last);

364

ignore_unused(last);

HIT CBC

365

1879

};

365

1879

};

366

HIT CBC

367

12011

auto dot_starts_with = [](

367

12011

auto dot_starts_with = [](

368

core::string_view str, core::string_view dots, std::size_t& n)

368

core::string_view str, core::string_view dots, std::size_t& n)

369

{

369

{

370

// starts_with for encoded/decoded dots

370

// starts_with for encoded/decoded dots

371

// or decoded otherwise. return how many

371

// or decoded otherwise. return how many

372

// chars in str match the dots

372

// chars in str match the dots

HIT CBC

373

12011

n = 0;

373

12011

n = 0;

HIT CBC

374

21036

for (char c: dots)

374

21036

for (char c: dots)

375

{

375

{

HIT CBC

376

20431

if (str.starts_with(c))

376

20431

if (str.starts_with(c))

377

{

377

{

HIT CBC

378

9025

str.remove_prefix(1);

378

9025

str.remove_prefix(1);

HIT CBC

379

9025

++n;

379

9025

++n;

HIT CBC

380

9025

continue;

380

9025

continue;

381

}

381

}

382

383

// In the general case, we would need to

383

// In the general case, we would need to

384

// check if the next char is an encoded

384

// check if the next char is an encoded

385

// dot.

385

// dot.

386

// However, an encoded dot in `str`

386

// However, an encoded dot in `str`

387

// would have already been decoded in

387

// would have already been decoded in

388

// url_base::normalize_path().

388

// url_base::normalize_path().

389

// This needs to be undone if

389

// This needs to be undone if

390

// `remove_dot_segments` is used in a

390

// `remove_dot_segments` is used in a

391

// different context.

391

// different context.

392

// if (str.size() > 2 &&

392

// if (str.size() > 2 &&

393

// c == '.'

393

// c == '.'

394

// &&

394

// &&

395

// str[0] == '%' &&

395

// str[0] == '%' &&

396

// str[1] == '2' &&

396

// str[1] == '2' &&

397

// (str[2] == 'e' ||

397

// (str[2] == 'e' ||

398

// str[2] == 'E'))

398

// str[2] == 'E'))

399

// {

399

// {

400

// str.remove_prefix(3);

400

// str.remove_prefix(3);

401

// n += 3;

401

// n += 3;

402

// continue;

402

// continue;

403

// }

403

// }

404

HIT CBC

405

11406

n = 0;

405

11406

n = 0;

HIT CBC

406

11406

return false;

406

11406

return false;

407

}

407

}

HIT CBC

408

605

return true;

408

605

return true;

409

};

409

};

410

HIT CBC

411

6016

auto dot_equal = [&dot_starts_with](

411

6016

auto dot_equal = [&dot_starts_with](

412

core::string_view str, core::string_view dots)

412

core::string_view str, core::string_view dots)

413

{

413

{

HIT CBC

414

6016

std::size_t n = 0;

414

6016

std::size_t n = 0;

HIT CBC

415

6016

dot_starts_with(str, dots, n);

415

6016

dot_starts_with(str, dots, n);

HIT CBC

416

6016

return n == str.size();

416

6016

return n == str.size();

HIT CBC

417

1065

};

417

1065

};

418

419

// Rule A

419

// Rule A

420

std::size_t n;

420

std::size_t n;

HIT CBC

421

1086

while (!input.empty())

421

1086

while (!input.empty())

422

{

422

{

HIT CBC

423

960

if (dot_starts_with(input, "../", n))

423

960

if (dot_starts_with(input, "../", n))

424

{

424

{

425

// Errata 4547

425

// Errata 4547

HIT CBC

426

4

append(dest, end, "../");

426

4

append(dest, end, "../");

HIT CBC

427

4

input.remove_prefix(n);

427

4

input.remove_prefix(n);

HIT CBC

428

4

continue;

428

4

continue;

429

}

429

}

HIT CBC

430

956

else if (!dot_starts_with(input, "./", n))

430

956

else if (!dot_starts_with(input, "./", n))

431

{

431

{

HIT CBC

432

939

break;

432

939

break;

433

}

433

}

HIT CBC

434

17

input.remove_prefix(n);

434

17

input.remove_prefix(n);

435

}

435

}

436

437

// Rule D

437

// Rule D

HIT CBC

438

1065

if( dot_equal(input, "."))

438

1065

if( dot_equal(input, "."))

439

{

439

{

HIT CBC

440

127

input = {};

440

127

input = {};

441

}

441

}

HIT CBC

442

938

else if( dot_equal(input, "..") )

442

938

else if( dot_equal(input, "..") )

443

{

443

{

444

// Errata 4547

444

// Errata 4547

HIT CBC

445

3

append(dest, end, "..");

445

3

append(dest, end, "..");

HIT CBC

446

3

input = {};

446

3

input = {};

447

}

447

}

448

449

// 2. While the input buffer is not empty,

449

// 2. While the input buffer is not empty,

450

// loop as follows:

450

// loop as follows:

HIT CBC

451

3088

while (!input.empty())

451

3088

while (!input.empty())

452

{

452

{

453

// Rule B

453

// Rule B

HIT CBC

454

2062

bool const is_dot_seg = dot_starts_with(input, "/./", n);

454

2062

bool const is_dot_seg = dot_starts_with(input, "/./", n);

HIT CBC

455

2062

if (is_dot_seg)

455

2062

if (is_dot_seg)

456

{

456

{

HIT CBC

457

37

input.remove_prefix(n - 1);

457

37

input.remove_prefix(n - 1);

HIT CBC

458

37

continue;

458

37

continue;

459

}

459

}

460

HIT CBC

461

2025

bool const is_final_dot_seg = dot_equal(input, "/.");

461

2025

bool const is_final_dot_seg = dot_equal(input, "/.");

HIT CBC

462

2025

if (is_final_dot_seg)

462

2025

if (is_final_dot_seg)

463

{

463

{

464

// We can't remove "." from a core::string_view

464

// We can't remove "." from a core::string_view

465

// So what we do here is equivalent to

465

// So what we do here is equivalent to

466

// replacing s with '/' as required

466

// replacing s with '/' as required

467

// in Rule B and executing the next

467

// in Rule B and executing the next

468

// iteration, which would append this

468

// iteration, which would append this

469

// '/' to the output, as required by

469

// '/' to the output, as required by

470

// Rule E

470

// Rule E

HIT CBC

471

8

append(dest, end, input.substr(0, 1));

471

8

append(dest, end, input.substr(0, 1));

HIT CBC

472

8

input = {};

472

8

input = {};

HIT CBC

473

8

break;

473

8

break;

474

}

474

}

475

476

// Rule C

476

// Rule C

HIT CBC

477

2017

bool const is_dotdot_seg = dot_starts_with(input, "/../", n);

477

2017

bool const is_dotdot_seg = dot_starts_with(input, "/../", n);

HIT CBC

478

2017

if (is_dotdot_seg)

478

2017

if (is_dotdot_seg)

479

{

479

{

HIT CBC

480

215

core::string_view cur_out(dest0, dest - dest0);

480

215

core::string_view cur_out(dest0, dest - dest0);

HIT CBC

481

215

std::size_t p = cur_out.find_last_of('/');

481

215

std::size_t p = cur_out.find_last_of('/');

HIT CBC

482

215

bool const has_multiple_segs = p != core::string_view::npos;

482

215

bool const has_multiple_segs = p != core::string_view::npos;

HIT CBC

483

215

if (has_multiple_segs)

483

215

if (has_multiple_segs)

484

{

484

{

485

// output has multiple segments

485

// output has multiple segments

486

// "erase" [p, end] if not "/.."

486

// "erase" [p, end] if not "/.."

HIT CBC

487

144

core::string_view last_seg(dest0 + p, dest - (dest0 + p));

487

144

core::string_view last_seg(dest0 + p, dest - (dest0 + p));

HIT CBC

488

144

bool const prev_is_dotdot_seg = dot_equal(last_seg, "/..");

488

144

bool const prev_is_dotdot_seg = dot_equal(last_seg, "/..");

HIT CBC

489

144

if (!prev_is_dotdot_seg)

489

144

if (!prev_is_dotdot_seg)

490

{

490

{

HIT CBC

491

133

dest = dest0 + p;

491

133

dest = dest0 + p;

492

}

492

}

493

else

493

else

494

{

494

{

HIT CBC

495

11

append(dest, end, "/..");

495

11

append(dest, end, "/..");

496

}

496

}

497

}

497

}

HIT CBC

498

71

else if (dest0 != dest)

498

71

else if (dest0 != dest)

499

{

499

{

500

// Only one segment in the output: remove it

500

// Only one segment in the output: remove it

HIT CBC

501

21

core::string_view last_seg(dest0, dest - dest0);

501

21

core::string_view last_seg(dest0, dest - dest0);

HIT CBC

502

21

bool const prev_is_dotdot_seg = dot_equal(last_seg, "..");

502

21

bool const prev_is_dotdot_seg = dot_equal(last_seg, "..");

HIT CBC

503

21

if (!prev_is_dotdot_seg)

503

21

if (!prev_is_dotdot_seg)

504

{

504

{

HIT CBC

505

19

dest = dest0;

505

19

dest = dest0;

HIT CBC

506

19

if (!is_absolute)

506

19

if (!is_absolute)

507

{

507

{

HIT CBC

508

19

input.remove_prefix(1);

508

19

input.remove_prefix(1);

509

}

509

}

510

}

510

}

511

else

511

else

512

{

512

{

HIT CBC

513

2

append(dest, end, "/..");

513

2

append(dest, end, "/..");

514

}

514

}

515

}

515

}

516

else

516

else

517

{

517

{

518

// Output is empty

518

// Output is empty

HIT CBC

519

50

if (is_absolute)

519

50

if (is_absolute)

520

{

520

{

HIT CBC

521

50

append(dest, end, "/..");

521

50

append(dest, end, "/..");

522

}

522

}

523

else

523

else

524

{

524

{

525

// AFREITAS: Although we have no formal proof

525

// AFREITAS: Although we have no formal proof

526

// for that, the output can't be relative

526

// for that, the output can't be relative

527

// and empty at this point because relative

527

// and empty at this point because relative

528

// paths will fall in the `dest0 != dest`

528

// paths will fall in the `dest0 != dest`

529

// case above of this rule C and then the

529

// case above of this rule C and then the

530

// general case of rule E for "..".

530

// general case of rule E for "..".

MIS UBC

531

✗

append(dest, end, "..");

531

✗

append(dest, end, "..");

532

}

532

}

533

}

533

}

HIT CBC

534

215

input.remove_prefix(n - 1);

534

215

input.remove_prefix(n - 1);

HIT CBC

535

215

continue;

535

215

continue;

HIT CBC

536

215

}

536

215

}

537

HIT CBC

538

1802

bool const is_final_dotdot_seg = dot_equal(input, "/..");

538

1802

bool const is_final_dotdot_seg = dot_equal(input, "/..");

HIT CBC

539

1802

if (is_final_dotdot_seg)

539

1802

if (is_final_dotdot_seg)

540

{

540

{

HIT CBC

541

31

core::string_view cur_out(dest0, dest - dest0);

541

31

core::string_view cur_out(dest0, dest - dest0);

HIT CBC

542

31

std::size_t p = cur_out.find_last_of('/');

542

31

std::size_t p = cur_out.find_last_of('/');

HIT CBC

543

31

bool const has_multiple_segs = p != core::string_view::npos;

543

31

bool const has_multiple_segs = p != core::string_view::npos;

HIT CBC

544

31

if (has_multiple_segs)

544

31

if (has_multiple_segs)

545

{

545

{

546

// output has multiple segments

546

// output has multiple segments

547

// "erase" [p, end] if not "/.."

547

// "erase" [p, end] if not "/.."

HIT CBC

548

18

core::string_view last_seg(dest0 + p, dest - (dest0 + p));

548

18

core::string_view last_seg(dest0 + p, dest - (dest0 + p));

HIT CBC

549

18

bool const prev_is_dotdot_seg = dot_equal(last_seg, "/..");

549

18

bool const prev_is_dotdot_seg = dot_equal(last_seg, "/..");

HIT CBC

550

18

if (!prev_is_dotdot_seg)

550

18

if (!prev_is_dotdot_seg)

551

{

551

{

HIT CBC

552

14

dest = dest0 + p;

552

14

dest = dest0 + p;

HIT CBC

553

14

append(dest, end, "/");

553

14

append(dest, end, "/");

554

}

554

}

555

else

555

else

556

{

556

{

HIT CBC

557

4

append(dest, end, "/..");

557

4

append(dest, end, "/..");

558

}

558

}

559

}

559

}

HIT CBC

560

13

else if (dest0 != dest)

560

13

else if (dest0 != dest)

561

{

561

{

562

// Only one segment in the output: remove it

562

// Only one segment in the output: remove it

HIT CBC

563

3

core::string_view last_seg(dest0, dest - dest0);

563

3

core::string_view last_seg(dest0, dest - dest0);

HIT CBC

564

3

bool const prev_is_dotdot_seg = dot_equal(last_seg, "..");

564

3

bool const prev_is_dotdot_seg = dot_equal(last_seg, "..");

HIT CBC

565

3

if (!prev_is_dotdot_seg) {

565

3

if (!prev_is_dotdot_seg) {

HIT CBC

566

1

dest = dest0;

566

1

dest = dest0;

567

}

567

}

568

else

568

else

569

{

569

{

HIT CBC

570

2

append(dest, end, "/..");

570

2

append(dest, end, "/..");

571

}

571

}

572

}

572

}

573

else

573

else

574

{

574

{

575

// Output is empty: append dotdot

575

// Output is empty: append dotdot

HIT CBC

576

10

if (is_absolute)

576

10

if (is_absolute)

577

{

577

{

HIT CBC

578

10

append(dest, end, "/..");

578

10

append(dest, end, "/..");

579

}

579

}

580

else

580

else

581

{

581

{

582

// AFREITAS: Although we have no formal proof

582

// AFREITAS: Although we have no formal proof

583

// for that, the output can't be relative

583

// for that, the output can't be relative

584

// and empty at this point because relative

584

// and empty at this point because relative

585

// paths will fall in the `dest0 != dest`

585

// paths will fall in the `dest0 != dest`

586

// case above of this rule C and then the

586

// case above of this rule C and then the

587

// general case of rule E for "..".

587

// general case of rule E for "..".

MIS UBC

588

✗

append(dest, end, "..");

588

✗

append(dest, end, "..");

589

}

589

}

590

}

590

}

HIT CBC

591

31

input = {};

591

31

input = {};

HIT CBC

592

31

break;

592

31

break;

593

}

593

}

594

595

// Rule E

595

// Rule E

HIT CBC

596

1771

std::size_t p = input.find_first_of('/', 1);

596

1771

std::size_t p = input.find_first_of('/', 1);

HIT CBC

597

1771

if (p != core::string_view::npos)

597

1771

if (p != core::string_view::npos)

598

{

598

{

HIT CBC

599

875

append(dest, end, input.substr(0, p));

599

875

append(dest, end, input.substr(0, p));

HIT CBC

600

875

input.remove_prefix(p);

600

875

input.remove_prefix(p);

601

}

601

}

602

else

602

else

603

{

603

{

HIT CBC

604

896

append(dest, end, input);

604

896

append(dest, end, input);

HIT CBC

605

896

input = {};

605

896

input = {};

606

}

606

}

607

}

607

}

608

609

// 3. Finally, the output buffer is set

609

// 3. Finally, the output buffer is set

610

// as the result of remove_dot_segments,

610

// as the result of remove_dot_segments,

611

// and we return its size

611

// and we return its size

HIT CBC

612

1065

return dest - dest0;

612

1065

return dest - dest0;

613

}

613

}

614

615

char

615

char

HIT CBC

616

1154

path_pop_back( core::string_view& s )

616

1154

path_pop_back( core::string_view& s )

617

{

617

{

HIT CBC

618

1676

if (s.size() < 3 ||

618

1676

if (s.size() < 3 ||

HIT CBC

619

1044

*std::prev(s.end(), 3) != '%')

619

1044

*std::prev(s.end(), 3) != '%')

620

{

620

{

HIT CBC

621

1102

char c = s.back();

621

1102

char c = s.back();

HIT CBC

622

1102

s.remove_suffix(1);

622

1102

s.remove_suffix(1);

HIT CBC

623

1102

return c;

623

1102

return c;

624

}

624

}

HIT CBC

625

52

char c = 0;

625

52

char c = 0;

HIT CBC

626

104

detail::decode_unsafe(

626

104

detail::decode_unsafe(

HIT CBC

627

104

&c, &c + 1, s.substr(s.size() - 3));

627

104

&c, &c + 1, s.substr(s.size() - 3));

HIT CBC

628

52

if (c != '/')

628

52

if (c != '/')

629

{

629

{

HIT CBC

630

44

s.remove_suffix(3);

630

44

s.remove_suffix(3);

HIT CBC

631

44

return c;

631

44

return c;

632

}

632

}

HIT CBC

633

8

c = s.back();

633

8

c = s.back();

HIT CBC

634

8

s.remove_suffix(1);

634

8

s.remove_suffix(1);

HIT CBC

635

8

return c;

635

8

return c;

636

};

636

};

637

638

void

638

void

HIT CBC

639

538

pop_last_segment(

639

538

pop_last_segment(

640

core::string_view& str,

640

core::string_view& str,

641

core::string_view& seg,

641

core::string_view& seg,

642

std::size_t& level,

642

std::size_t& level,

643

bool remove_unmatched) noexcept

643

bool remove_unmatched) noexcept

644

{

644

{

HIT CBC

645

538

seg = {};

645

538

seg = {};

HIT CBC

646

538

std::size_t n = 0;

646

538

std::size_t n = 0;

HIT CBC

647

700

while (!str.empty())

647

700

while (!str.empty())

648

{

648

{

649

// B. if the input buffer begins with a

649

// B. if the input buffer begins with a

650

// prefix of "/./" or "/.", where "." is

650

// prefix of "/./" or "/.", where "." is

651

// a complete path segment, then replace

651

// a complete path segment, then replace

652

// that prefix with "/" in the input

652

// that prefix with "/" in the input

653

// buffer; otherwise,

653

// buffer; otherwise,

HIT CBC

654

558

n = detail::path_ends_with(str, "/./");

654

558

n = detail::path_ends_with(str, "/./");

HIT CBC

655

558

if (n)

655

558

if (n)

656

{

656

{

HIT CBC

657

10

seg = str.substr(str.size() - n);

657

10

seg = str.substr(str.size() - n);

HIT CBC

658

10

str.remove_suffix(n);

658

10

str.remove_suffix(n);

HIT CBC

659

10

continue;

659

10

continue;

660

}

660

}

HIT CBC

661

548

n = detail::path_ends_with(str, "/.");

661

548

n = detail::path_ends_with(str, "/.");

HIT CBC

662

548

if (n)

662

548

if (n)

663

{

663

{

HIT CBC

664

12

seg = str.substr(str.size() - n, 1);

664

12

seg = str.substr(str.size() - n, 1);

HIT CBC

665

12

str.remove_suffix(n);

665

12

str.remove_suffix(n);

HIT CBC

666

12

continue;

666

12

continue;

667

}

667

}

668

669

// C. if the input buffer begins with a

669

// C. if the input buffer begins with a

670

// prefix of "/../" or "/..", where ".."

670

// prefix of "/../" or "/..", where ".."

671

// is a complete path segment, then

671

// is a complete path segment, then

672

// replace that prefix with "/" in the

672

// replace that prefix with "/" in the

673

// input buffer and remove the last

673

// input buffer and remove the last

674

// segment and its preceding "/"

674

// segment and its preceding "/"

675

// (if any) from the output buffer

675

// (if any) from the output buffer

676

// otherwise,

676

// otherwise,

HIT CBC

677

536

n = detail::path_ends_with(str, "/../");

677

536

n = detail::path_ends_with(str, "/../");

HIT CBC

678

536

if (n)

678

536

if (n)

679

{

679

{

HIT CBC

680

42

seg = str.substr(str.size() - n);

680

42

seg = str.substr(str.size() - n);

HIT CBC

681

42

str.remove_suffix(n);

681

42

str.remove_suffix(n);

HIT CBC

682

42

++level;

682

42

++level;

HIT CBC

683

42

continue;

683

42

continue;

684

}

684

}

HIT CBC

685

494

n = detail::path_ends_with(str, "/..");

685

494

n = detail::path_ends_with(str, "/..");

HIT CBC

686

494

if (n)

686

494

if (n)

687

{

687

{

HIT CBC

688

46

seg = str.substr(str.size() - n);

688

46

seg = str.substr(str.size() - n);

HIT CBC

689

46

str.remove_suffix(n);

689

46

str.remove_suffix(n);

HIT CBC

690

46

++level;

690

46

++level;

HIT CBC

691

46

continue;

691

46

continue;

692

}

692

}

693

694

// E. move the first path segment in the

694

// E. move the first path segment in the

695

// input buffer to the end of the output

695

// input buffer to the end of the output

696

// buffer, including the initial "/"

696

// buffer, including the initial "/"

697

// character (if any) and any subsequent

697

// character (if any) and any subsequent

698

// characters up to, but not including,

698

// characters up to, but not including,

699

// the next "/" character or the end of

699

// the next "/" character or the end of

700

// the input buffer.

700

// the input buffer.

HIT CBC

701

448

std::size_t p = str.size() > 1

701

448

std::size_t p = str.size() > 1

HIT CBC

702

448

? str.find_last_of('/', str.size() - 2)

702

448

? str.find_last_of('/', str.size() - 2)

HIT CBC

703

448

: core::string_view::npos;

703

448

: core::string_view::npos;

HIT CBC

704

448

if (p != core::string_view::npos)

704

448

if (p != core::string_view::npos)

705

{

705

{

HIT CBC

706

276

seg = str.substr(p + 1);

706

276

seg = str.substr(p + 1);

HIT CBC

707

276

str.remove_suffix(seg.size());

707

276

str.remove_suffix(seg.size());

708

}

708

}

709

else

709

else

710

{

710

{

HIT CBC

711

172

seg = str;

711

172

seg = str;

HIT CBC

712

172

str = {};

712

172

str = {};

713

}

713

}

714

HIT CBC

715

448

if (level == 0)

715

448

if (level == 0)

HIT CBC

716

396

return;

716

396

return;

HIT CBC

717

52

if (!str.empty())

717

52

if (!str.empty())

HIT CBC

718

42

--level;

718

42

--level;

719

}

719

}

720

// we still need to skip n_skip + 1

720

// we still need to skip n_skip + 1

721

// but the string is empty

721

// but the string is empty

HIT CBC

722

142

if (remove_unmatched && level)

722

142

if (remove_unmatched && level)

723

{

723

{

HIT CBC

724

34

seg = "/";

724

34

seg = "/";

HIT CBC

725

34

level = 0;

725

34

level = 0;

HIT CBC

726

34

return;

726

34

return;

727

}

727

}

HIT CBC

728

108

else if (level)

728

108

else if (level)

729

{

729

{

HIT CBC

730

4

if (!seg.empty())

730

4

if (!seg.empty())

731

{

731

{

HIT CBC

732

4

seg = "/../";

732

4

seg = "/../";

733

}

733

}

734

else

734

else

735

{

735

{

736

// AFREITAS: this condition

736

// AFREITAS: this condition

737

// is correct, but it might

737

// is correct, but it might

738

// unreachable.

738

// unreachable.

MIS UBC

739

✗

seg = "/..";

739

✗

seg = "/..";

740

}

740

}

HIT CBC

741

4

--level;

741

4

--level;

HIT CBC

742

4

return;

742

4

return;

743

}

743

}

HIT CBC

744

104

seg = {};

744

104

seg = {};

745

}

745

}

746

747

void

747

void

HIT CBC

748

304

normalized_path_digest(

748

304

normalized_path_digest(

749

core::string_view str,

749

core::string_view str,

750

bool remove_unmatched,

750

bool remove_unmatched,

751

fnv_1a& hasher) noexcept

751

fnv_1a& hasher) noexcept

752

{

752

{

HIT CBC

753

304

core::string_view seg;

753

304

core::string_view seg;

HIT CBC

754

304

std::size_t level = 0;

754

304

std::size_t level = 0;

755

do

755

do

756

{

756

{

HIT CBC

757

538

pop_last_segment(

757

538

pop_last_segment(

758

str, seg, level, remove_unmatched);

758

str, seg, level, remove_unmatched);

HIT CBC

759

1692

while (!seg.empty())

759

1692

while (!seg.empty())

760

{

760

{

HIT CBC

761

1154

char c = path_pop_back(seg);

761

1154

char c = path_pop_back(seg);

HIT CBC

762

1154

hasher.put(c);

762

1154

hasher.put(c);

763

}

763

}

764

}

764

}

HIT CBC

765

538

while (!str.empty());

765

538

while (!str.empty());

HIT CBC

766

304

}

766

304

}

767

768

// compare segments as if there were a normalized

768

// compare segments as if there were a normalized

769

int

769

int

HIT CBC

770

239

segments_compare(

770

239

segments_compare(

771

segments_encoded_view seg0,

771

segments_encoded_view seg0,

772

segments_encoded_view seg1) noexcept

772

segments_encoded_view seg1) noexcept

773

{

773

{

774

// calculate path size as if it were normalized

774

// calculate path size as if it were normalized

775

auto normalized_size =

775

auto normalized_size =

HIT CBC

776

478

[](segments_encoded_view seg) -> std::size_t

776

478

[](segments_encoded_view seg) -> std::size_t

777

{

777

{

HIT CBC

778

478

if (seg.empty())

778

478

if (seg.empty())

HIT CBC

779

144

return seg.is_absolute();

779

144

return seg.is_absolute();

780

HIT CBC

781

334

std::size_t n = 0;

781

334

std::size_t n = 0;

HIT CBC

782

334

std::size_t skip = 0;

782

334

std::size_t skip = 0;

HIT CBC

783

334

auto begin = seg.begin();

783

334

auto begin = seg.begin();

HIT CBC

784

334

auto it = seg.end();

784

334

auto it = seg.end();

HIT CBC

785

1096

while (it != begin)

785

1096

while (it != begin)

786

{

786

{

HIT CBC

787

762

--it;

787

762

--it;

HIT CBC

788

762

decode_view dseg = **it;

788

762

decode_view dseg = **it;

HIT CBC

789

762

if (dseg == "..")

789

762

if (dseg == "..")

HIT CBC

790

167

++skip;

790

167

++skip;

HIT CBC

791

595

else if (dseg != ".")

791

595

else if (dseg != ".")

792

{

792

{

HIT CBC

793

557

if (skip)

793

557

if (skip)

HIT CBC

794

85

--skip;

794

85

--skip;

795

else

795

else

HIT CBC

796

472

n += dseg.size() + 1;

796

472

n += dseg.size() + 1;

797

}

797

}

798

}

798

}

HIT CBC

799

334

n += skip * 3;

799

334

n += skip * 3;

HIT CBC

800

334

n -= !seg.is_absolute();

800

334

n -= !seg.is_absolute();

HIT CBC

801

334

return n;

801

334

return n;

802

};

802

};

803

804

// find the normalized size for the comparison

804

// find the normalized size for the comparison

HIT CBC

805

239

std::size_t n0 = normalized_size(seg0);

805

239

std::size_t n0 = normalized_size(seg0);

HIT CBC

806

239

std::size_t n1 = normalized_size(seg1);

806

239

std::size_t n1 = normalized_size(seg1);

HIT CBC

807

239

std::size_t n00 = n0;

807

239

std::size_t n00 = n0;

HIT CBC

808

239

std::size_t n10 = n1;

808

239

std::size_t n10 = n1;

809

810

// consume the last char from a segment range

810

// consume the last char from a segment range

811

auto consume_last =

811

auto consume_last =

HIT CBC

812

2064

[](

812

2064

[](

813

std::size_t& n,

813

std::size_t& n,

814

decode_view& dseg,

814

decode_view& dseg,

815

segments_encoded_view::iterator& begin,

815

segments_encoded_view::iterator& begin,

816

segments_encoded_view::iterator& it,

816

segments_encoded_view::iterator& it,

817

decode_view::iterator& cit,

817

decode_view::iterator& cit,

818

std::size_t& skip,

818

std::size_t& skip,

819

bool& at_slash) -> char

819

bool& at_slash) -> char

820

{

820

{

HIT CBC

821

2064

if (cit != dseg.begin())

821

2064

if (cit != dseg.begin())

822

{

822

{

823

// return last char from current segment

823

// return last char from current segment

HIT CBC

824

1387

at_slash = false;

824

1387

at_slash = false;

HIT CBC

825

1387

--cit;

825

1387

--cit;

HIT CBC

826

1387

--n;

826

1387

--n;

HIT CBC

827

1387

return *cit;

827

1387

return *cit;

828

}

828

}

829

HIT CBC

830

677

if (!at_slash)

830

677

if (!at_slash)

831

{

831

{

832

// current segment dseg is over and

832

// current segment dseg is over and

833

// previous char was not a slash

833

// previous char was not a slash

834

// so we output one

834

// so we output one

HIT CBC

835

403

at_slash = true;

835

403

at_slash = true;

HIT CBC

836

403

--n;

836

403

--n;

HIT CBC

837

403

return '/';

837

403

return '/';

838

}

838

}

839

840

// current segment dseg is over and

840

// current segment dseg is over and

841

// last char was already the slash

841

// last char was already the slash

842

// between segments, so take the

842

// between segments, so take the

843

// next final segment to consume

843

// next final segment to consume

HIT CBC

844

274

at_slash = false;

844

274

at_slash = false;

HIT CBC

845

512

while (cit == dseg.begin())

845

512

while (cit == dseg.begin())

846

{

846

{

847

// take next segment

847

// take next segment

HIT CBC

848

512

if (it != begin)

848

512

if (it != begin)

HIT CBC

849

380

--it;

849

380

--it;

850

else

850

else

HIT CBC

851

132

break;

851

132

break;

HIT CBC

852

380

if (**it == "..")

852

380

if (**it == "..")

853

{

853

{

854

// skip next if this is ".."

854

// skip next if this is ".."

HIT CBC

855

140

++skip;

855

140

++skip;

856

}

856

}

HIT CBC

857

240

else if (**it != ".")

857

240

else if (**it != ".")

858

{

858

{

HIT CBC

859

212

if (skip)

859

212

if (skip)

860

{

860

{

861

// discount skips

861

// discount skips

HIT CBC

862

70

--skip;

862

70

--skip;

863

}

863

}

864

else

864

else

865

{

865

{

866

// or update current seg

866

// or update current seg

HIT CBC

867

142

dseg = **it;

867

142

dseg = **it;

HIT CBC

868

142

cit = dseg.end();

868

142

cit = dseg.end();

HIT CBC

869

142

break;

869

142

break;

870

}

870

}

871

}

871

}

872

}

872

}

873

// consume from the new current

873

// consume from the new current

874

// segment

874

// segment

HIT CBC

875

274

--n;

875

274

--n;

HIT CBC

876

274

if (cit != dseg.begin())

876

274

if (cit != dseg.begin())

877

{

877

{

878

// in the general case, we consume

878

// in the general case, we consume

879

// one more character from the end

879

// one more character from the end

HIT CBC

880

127

--cit;

880

127

--cit;

HIT CBC

881

127

return *cit;

881

127

return *cit;

882

}

882

}

883

884

// nothing left to consume in the

884

// nothing left to consume in the

885

// current and new segment

885

// current and new segment

HIT CBC

886

147

if (it == begin)

886

147

if (it == begin)

887

{

887

{

888

// if this is the first

888

// if this is the first

889

// segment, the segments are

889

// segment, the segments are

890

// over and there can only

890

// over and there can only

891

// be repetitions of "../" to

891

// be repetitions of "../" to

892

// output

892

// output

HIT CBC

893

138

return "/.."[n % 3];

893

138

return "/.."[n % 3];

894

}

894

}

895

// at other segments, we need

895

// at other segments, we need

896

// a slash to transition to the

896

// a slash to transition to the

897

// next segment

897

// next segment

HIT CBC

898

9

at_slash = true;

898

9

at_slash = true;

HIT CBC

899

9

return '/';

899

9

return '/';

900

};

900

};

901

902

// consume final segments from seg0 that

902

// consume final segments from seg0 that

903

// should not influence the comparison

903

// should not influence the comparison

HIT CBC

904

239

auto begin0 = seg0.begin();

904

239

auto begin0 = seg0.begin();

HIT CBC

905

239

auto it0 = seg0.end();

905

239

auto it0 = seg0.end();

HIT CBC

906

239

decode_view dseg0;

906

239

decode_view dseg0;

HIT CBC

907

239

if (it0 != seg0.begin())

907

239

if (it0 != seg0.begin())

908

{

908

{

HIT CBC

909

166

--it0;

909

166

--it0;

HIT CBC

910

166

dseg0 = **it0;

910

166

dseg0 = **it0;

911

}

911

}

HIT CBC

912

239

decode_view::iterator cit0 = dseg0.end();

912

239

decode_view::iterator cit0 = dseg0.end();

HIT CBC

913

239

std::size_t skip0 = 0;

913

239

std::size_t skip0 = 0;

HIT CBC

914

239

bool at_slash0 = true;

914

239

bool at_slash0 = true;

HIT CBC

915

377

while (n0 > n1)

915

377

while (n0 > n1)

916

{

916

{

HIT CBC

917

138

consume_last(n0, dseg0, begin0, it0, cit0, skip0, at_slash0);

917

138

consume_last(n0, dseg0, begin0, it0, cit0, skip0, at_slash0);

918

}

918

}

919

920

// consume final segments from seg1 that

920

// consume final segments from seg1 that

921

// should not influence the comparison

921

// should not influence the comparison

HIT CBC

922

239

auto begin1 = seg1.begin();

922

239

auto begin1 = seg1.begin();

HIT CBC

923

239

auto it1 = seg1.end();

923

239

auto it1 = seg1.end();

HIT CBC

924

239

decode_view dseg1;

924

239

decode_view dseg1;

HIT CBC

925

239

if (it1 != seg1.begin())

925

239

if (it1 != seg1.begin())

926

{

926

{

HIT CBC

927

168

--it1;

927

168

--it1;

HIT CBC

928

168

dseg1 = **it1;

928

168

dseg1 = **it1;

929

}

929

}

HIT CBC

930

239

decode_view::iterator cit1 = dseg1.end();

930

239

decode_view::iterator cit1 = dseg1.end();

HIT CBC

931

239

std::size_t skip1 = 0;

931

239

std::size_t skip1 = 0;

HIT CBC

932

239

bool at_slash1 = true;

932

239

bool at_slash1 = true;

HIT CBC

933

285

while (n1 > n0)

933

285

while (n1 > n0)

934

{

934

{

HIT CBC

935

46

consume_last(n1, dseg1, begin1, it1, cit1, skip1, at_slash1);

935

46

consume_last(n1, dseg1, begin1, it1, cit1, skip1, at_slash1);

936

}

936

}

937

HIT CBC

938

239

int cmp = 0;

938

239

int cmp = 0;

HIT CBC

939

1179

while (n0)

939

1179

while (n0)

940

{

940

{

HIT CBC

941

940

char c0 = consume_last(

941

940

char c0 = consume_last(

942

n0, dseg0, begin0, it0, cit0, skip0, at_slash0);

942

n0, dseg0, begin0, it0, cit0, skip0, at_slash0);

HIT CBC

943

940

char c1 = consume_last(

943

940

char c1 = consume_last(

944

n1, dseg1, begin1, it1, cit1, skip1, at_slash1);

944

n1, dseg1, begin1, it1, cit1, skip1, at_slash1);

HIT CBC

945

940

if (c0 < c1)

945

940

if (c0 < c1)

HIT CBC

946

40

cmp = -1;

946

40

cmp = -1;

HIT CBC

947

900

else if (c1 < c0)

947

900

else if (c1 < c0)

HIT CBC

948

44

cmp = +1;

948

44

cmp = +1;

949

}

949

}

950

HIT CBC

951

239

if (cmp != 0)

951

239

if (cmp != 0)

HIT CBC

952

48

return cmp;

952

48

return cmp;

HIT CBC

953

191

if ( n00 == n10 )

953

191

if ( n00 == n10 )

HIT CBC

954

185

return 0;

954

185

return 0;

HIT CBC

955

6

if ( n00 < n10 )

955

6

if ( n00 < n10 )

HIT CBC

956

4

return -1;

956

4

return -1;

HIT CBC

957

2

return 1;

957

2

return 1;

958

}

958

}

959

960

} // detail

960

} // detail

961

} // urls

961

} // urls

962

} // boost

962

} // boost

963