| 1 | efrain | 1 | <?php
 | 
        
           |  |  | 2 |   | 
        
           |  |  | 3 | declare(strict_types=1);
 | 
        
           |  |  | 4 |   | 
        
           |  |  | 5 | namespace GuzzleHttp\Psr7;
 | 
        
           |  |  | 6 |   | 
        
           |  |  | 7 | use Psr\Http\Message\UriInterface;
 | 
        
           |  |  | 8 |   | 
        
           |  |  | 9 | /**
 | 
        
           |  |  | 10 |  * Provides methods to normalize and compare URIs.
 | 
        
           |  |  | 11 |  *
 | 
        
           |  |  | 12 |  * @author Tobias Schultze
 | 
        
           |  |  | 13 |  *
 | 
        
           | 1441 | ariadna | 14 |  * @see https://datatracker.ietf.org/doc/html/rfc3986#section-6
 | 
        
           | 1 | efrain | 15 |  */
 | 
        
           |  |  | 16 | final class UriNormalizer
 | 
        
           |  |  | 17 | {
 | 
        
           |  |  | 18 |     /**
 | 
        
           |  |  | 19 |      * Default normalizations which only include the ones that preserve semantics.
 | 
        
           |  |  | 20 |      */
 | 
        
           |  |  | 21 |     public const PRESERVING_NORMALIZATIONS =
 | 
        
           |  |  | 22 |         self::CAPITALIZE_PERCENT_ENCODING |
 | 
        
           |  |  | 23 |         self::DECODE_UNRESERVED_CHARACTERS |
 | 
        
           |  |  | 24 |         self::CONVERT_EMPTY_PATH |
 | 
        
           |  |  | 25 |         self::REMOVE_DEFAULT_HOST |
 | 
        
           |  |  | 26 |         self::REMOVE_DEFAULT_PORT |
 | 
        
           |  |  | 27 |         self::REMOVE_DOT_SEGMENTS;
 | 
        
           |  |  | 28 |   | 
        
           |  |  | 29 |     /**
 | 
        
           |  |  | 30 |      * All letters within a percent-encoding triplet (e.g., "%3A") are case-insensitive, and should be capitalized.
 | 
        
           |  |  | 31 |      *
 | 
        
           |  |  | 32 |      * Example: http://example.org/a%c2%b1b → http://example.org/a%C2%B1b
 | 
        
           |  |  | 33 |      */
 | 
        
           |  |  | 34 |     public const CAPITALIZE_PERCENT_ENCODING = 1;
 | 
        
           |  |  | 35 |   | 
        
           |  |  | 36 |     /**
 | 
        
           |  |  | 37 |      * Decodes percent-encoded octets of unreserved characters.
 | 
        
           |  |  | 38 |      *
 | 
        
           |  |  | 39 |      * For consistency, percent-encoded octets in the ranges of ALPHA (%41–%5A and %61–%7A), DIGIT (%30–%39),
 | 
        
           |  |  | 40 |      * hyphen (%2D), period (%2E), underscore (%5F), or tilde (%7E) should not be created by URI producers and,
 | 
        
           |  |  | 41 |      * when found in a URI, should be decoded to their corresponding unreserved characters by URI normalizers.
 | 
        
           |  |  | 42 |      *
 | 
        
           |  |  | 43 |      * Example: http://example.org/%7Eusern%61me/ → http://example.org/~username/
 | 
        
           |  |  | 44 |      */
 | 
        
           |  |  | 45 |     public const DECODE_UNRESERVED_CHARACTERS = 2;
 | 
        
           |  |  | 46 |   | 
        
           |  |  | 47 |     /**
 | 
        
           |  |  | 48 |      * Converts the empty path to "/" for http and https URIs.
 | 
        
           |  |  | 49 |      *
 | 
        
           |  |  | 50 |      * Example: http://example.org → http://example.org/
 | 
        
           |  |  | 51 |      */
 | 
        
           |  |  | 52 |     public const CONVERT_EMPTY_PATH = 4;
 | 
        
           |  |  | 53 |   | 
        
           |  |  | 54 |     /**
 | 
        
           |  |  | 55 |      * Removes the default host of the given URI scheme from the URI.
 | 
        
           |  |  | 56 |      *
 | 
        
           |  |  | 57 |      * Only the "file" scheme defines the default host "localhost".
 | 
        
           |  |  | 58 |      * All of `file:/myfile`, `file:///myfile`, and `file://localhost/myfile`
 | 
        
           |  |  | 59 |      * are equivalent according to RFC 3986. The first format is not accepted
 | 
        
           |  |  | 60 |      * by PHPs stream functions and thus already normalized implicitly to the
 | 
        
           |  |  | 61 |      * second format in the Uri class. See `GuzzleHttp\Psr7\Uri::composeComponents`.
 | 
        
           |  |  | 62 |      *
 | 
        
           |  |  | 63 |      * Example: file://localhost/myfile → file:///myfile
 | 
        
           |  |  | 64 |      */
 | 
        
           |  |  | 65 |     public const REMOVE_DEFAULT_HOST = 8;
 | 
        
           |  |  | 66 |   | 
        
           |  |  | 67 |     /**
 | 
        
           |  |  | 68 |      * Removes the default port of the given URI scheme from the URI.
 | 
        
           |  |  | 69 |      *
 | 
        
           |  |  | 70 |      * Example: http://example.org:80/ → http://example.org/
 | 
        
           |  |  | 71 |      */
 | 
        
           |  |  | 72 |     public const REMOVE_DEFAULT_PORT = 16;
 | 
        
           |  |  | 73 |   | 
        
           |  |  | 74 |     /**
 | 
        
           |  |  | 75 |      * Removes unnecessary dot-segments.
 | 
        
           |  |  | 76 |      *
 | 
        
           |  |  | 77 |      * Dot-segments in relative-path references are not removed as it would
 | 
        
           |  |  | 78 |      * change the semantics of the URI reference.
 | 
        
           |  |  | 79 |      *
 | 
        
           |  |  | 80 |      * Example: http://example.org/../a/b/../c/./d.html → http://example.org/a/c/d.html
 | 
        
           |  |  | 81 |      */
 | 
        
           |  |  | 82 |     public const REMOVE_DOT_SEGMENTS = 32;
 | 
        
           |  |  | 83 |   | 
        
           |  |  | 84 |     /**
 | 
        
           |  |  | 85 |      * Paths which include two or more adjacent slashes are converted to one.
 | 
        
           |  |  | 86 |      *
 | 
        
           |  |  | 87 |      * Webservers usually ignore duplicate slashes and treat those URIs equivalent.
 | 
        
           |  |  | 88 |      * But in theory those URIs do not need to be equivalent. So this normalization
 | 
        
           |  |  | 89 |      * may change the semantics. Encoded slashes (%2F) are not removed.
 | 
        
           |  |  | 90 |      *
 | 
        
           |  |  | 91 |      * Example: http://example.org//foo///bar.html → http://example.org/foo/bar.html
 | 
        
           |  |  | 92 |      */
 | 
        
           |  |  | 93 |     public const REMOVE_DUPLICATE_SLASHES = 64;
 | 
        
           |  |  | 94 |   | 
        
           |  |  | 95 |     /**
 | 
        
           |  |  | 96 |      * Sort query parameters with their values in alphabetical order.
 | 
        
           |  |  | 97 |      *
 | 
        
           |  |  | 98 |      * However, the order of parameters in a URI may be significant (this is not defined by the standard).
 | 
        
           |  |  | 99 |      * So this normalization is not safe and may change the semantics of the URI.
 | 
        
           |  |  | 100 |      *
 | 
        
           |  |  | 101 |      * Example: ?lang=en&article=fred → ?article=fred&lang=en
 | 
        
           |  |  | 102 |      *
 | 
        
           |  |  | 103 |      * Note: The sorting is neither locale nor Unicode aware (the URI query does not get decoded at all) as the
 | 
        
           |  |  | 104 |      * purpose is to be able to compare URIs in a reproducible way, not to have the params sorted perfectly.
 | 
        
           |  |  | 105 |      */
 | 
        
           |  |  | 106 |     public const SORT_QUERY_PARAMETERS = 128;
 | 
        
           |  |  | 107 |   | 
        
           |  |  | 108 |     /**
 | 
        
           |  |  | 109 |      * Returns a normalized URI.
 | 
        
           |  |  | 110 |      *
 | 
        
           |  |  | 111 |      * The scheme and host component are already normalized to lowercase per PSR-7 UriInterface.
 | 
        
           |  |  | 112 |      * This methods adds additional normalizations that can be configured with the $flags parameter.
 | 
        
           |  |  | 113 |      *
 | 
        
           |  |  | 114 |      * PSR-7 UriInterface cannot distinguish between an empty component and a missing component as
 | 
        
           |  |  | 115 |      * getQuery(), getFragment() etc. always return a string. This means the URIs "/?#" and "/" are
 | 
        
           |  |  | 116 |      * treated equivalent which is not necessarily true according to RFC 3986. But that difference
 | 
        
           |  |  | 117 |      * is highly uncommon in reality. So this potential normalization is implied in PSR-7 as well.
 | 
        
           |  |  | 118 |      *
 | 
        
           |  |  | 119 |      * @param UriInterface $uri   The URI to normalize
 | 
        
           |  |  | 120 |      * @param int          $flags A bitmask of normalizations to apply, see constants
 | 
        
           |  |  | 121 |      *
 | 
        
           | 1441 | ariadna | 122 |      * @see https://datatracker.ietf.org/doc/html/rfc3986#section-6.2
 | 
        
           | 1 | efrain | 123 |      */
 | 
        
           |  |  | 124 |     public static function normalize(UriInterface $uri, int $flags = self::PRESERVING_NORMALIZATIONS): UriInterface
 | 
        
           |  |  | 125 |     {
 | 
        
           |  |  | 126 |         if ($flags & self::CAPITALIZE_PERCENT_ENCODING) {
 | 
        
           |  |  | 127 |             $uri = self::capitalizePercentEncoding($uri);
 | 
        
           |  |  | 128 |         }
 | 
        
           |  |  | 129 |   | 
        
           |  |  | 130 |         if ($flags & self::DECODE_UNRESERVED_CHARACTERS) {
 | 
        
           |  |  | 131 |             $uri = self::decodeUnreservedCharacters($uri);
 | 
        
           |  |  | 132 |         }
 | 
        
           |  |  | 133 |   | 
        
           | 1441 | ariadna | 134 |         if ($flags & self::CONVERT_EMPTY_PATH && $uri->getPath() === ''
 | 
        
           |  |  | 135 |             && ($uri->getScheme() === 'http' || $uri->getScheme() === 'https')
 | 
        
           | 1 | efrain | 136 |         ) {
 | 
        
           |  |  | 137 |             $uri = $uri->withPath('/');
 | 
        
           |  |  | 138 |         }
 | 
        
           |  |  | 139 |   | 
        
           |  |  | 140 |         if ($flags & self::REMOVE_DEFAULT_HOST && $uri->getScheme() === 'file' && $uri->getHost() === 'localhost') {
 | 
        
           |  |  | 141 |             $uri = $uri->withHost('');
 | 
        
           |  |  | 142 |         }
 | 
        
           |  |  | 143 |   | 
        
           |  |  | 144 |         if ($flags & self::REMOVE_DEFAULT_PORT && $uri->getPort() !== null && Uri::isDefaultPort($uri)) {
 | 
        
           |  |  | 145 |             $uri = $uri->withPort(null);
 | 
        
           |  |  | 146 |         }
 | 
        
           |  |  | 147 |   | 
        
           |  |  | 148 |         if ($flags & self::REMOVE_DOT_SEGMENTS && !Uri::isRelativePathReference($uri)) {
 | 
        
           |  |  | 149 |             $uri = $uri->withPath(UriResolver::removeDotSegments($uri->getPath()));
 | 
        
           |  |  | 150 |         }
 | 
        
           |  |  | 151 |   | 
        
           |  |  | 152 |         if ($flags & self::REMOVE_DUPLICATE_SLASHES) {
 | 
        
           |  |  | 153 |             $uri = $uri->withPath(preg_replace('#//++#', '/', $uri->getPath()));
 | 
        
           |  |  | 154 |         }
 | 
        
           |  |  | 155 |   | 
        
           |  |  | 156 |         if ($flags & self::SORT_QUERY_PARAMETERS && $uri->getQuery() !== '') {
 | 
        
           |  |  | 157 |             $queryKeyValues = explode('&', $uri->getQuery());
 | 
        
           |  |  | 158 |             sort($queryKeyValues);
 | 
        
           |  |  | 159 |             $uri = $uri->withQuery(implode('&', $queryKeyValues));
 | 
        
           |  |  | 160 |         }
 | 
        
           |  |  | 161 |   | 
        
           |  |  | 162 |         return $uri;
 | 
        
           |  |  | 163 |     }
 | 
        
           |  |  | 164 |   | 
        
           |  |  | 165 |     /**
 | 
        
           |  |  | 166 |      * Whether two URIs can be considered equivalent.
 | 
        
           |  |  | 167 |      *
 | 
        
           |  |  | 168 |      * Both URIs are normalized automatically before comparison with the given $normalizations bitmask. The method also
 | 
        
           |  |  | 169 |      * accepts relative URI references and returns true when they are equivalent. This of course assumes they will be
 | 
        
           |  |  | 170 |      * resolved against the same base URI. If this is not the case, determination of equivalence or difference of
 | 
        
           |  |  | 171 |      * relative references does not mean anything.
 | 
        
           |  |  | 172 |      *
 | 
        
           |  |  | 173 |      * @param UriInterface $uri1           An URI to compare
 | 
        
           |  |  | 174 |      * @param UriInterface $uri2           An URI to compare
 | 
        
           |  |  | 175 |      * @param int          $normalizations A bitmask of normalizations to apply, see constants
 | 
        
           |  |  | 176 |      *
 | 
        
           | 1441 | ariadna | 177 |      * @see https://datatracker.ietf.org/doc/html/rfc3986#section-6.1
 | 
        
           | 1 | efrain | 178 |      */
 | 
        
           |  |  | 179 |     public static function isEquivalent(UriInterface $uri1, UriInterface $uri2, int $normalizations = self::PRESERVING_NORMALIZATIONS): bool
 | 
        
           |  |  | 180 |     {
 | 
        
           |  |  | 181 |         return (string) self::normalize($uri1, $normalizations) === (string) self::normalize($uri2, $normalizations);
 | 
        
           |  |  | 182 |     }
 | 
        
           |  |  | 183 |   | 
        
           |  |  | 184 |     private static function capitalizePercentEncoding(UriInterface $uri): UriInterface
 | 
        
           |  |  | 185 |     {
 | 
        
           |  |  | 186 |         $regex = '/(?:%[A-Fa-f0-9]{2})++/';
 | 
        
           |  |  | 187 |   | 
        
           | 1441 | ariadna | 188 |         $callback = function (array $match): string {
 | 
        
           | 1 | efrain | 189 |             return strtoupper($match[0]);
 | 
        
           |  |  | 190 |         };
 | 
        
           |  |  | 191 |   | 
        
           |  |  | 192 |         return
 | 
        
           |  |  | 193 |             $uri->withPath(
 | 
        
           |  |  | 194 |                 preg_replace_callback($regex, $callback, $uri->getPath())
 | 
        
           |  |  | 195 |             )->withQuery(
 | 
        
           |  |  | 196 |                 preg_replace_callback($regex, $callback, $uri->getQuery())
 | 
        
           |  |  | 197 |             );
 | 
        
           |  |  | 198 |     }
 | 
        
           |  |  | 199 |   | 
        
           |  |  | 200 |     private static function decodeUnreservedCharacters(UriInterface $uri): UriInterface
 | 
        
           |  |  | 201 |     {
 | 
        
           |  |  | 202 |         $regex = '/%(?:2D|2E|5F|7E|3[0-9]|[46][1-9A-F]|[57][0-9A])/i';
 | 
        
           |  |  | 203 |   | 
        
           | 1441 | ariadna | 204 |         $callback = function (array $match): string {
 | 
        
           | 1 | efrain | 205 |             return rawurldecode($match[0]);
 | 
        
           |  |  | 206 |         };
 | 
        
           |  |  | 207 |   | 
        
           |  |  | 208 |         return
 | 
        
           |  |  | 209 |             $uri->withPath(
 | 
        
           |  |  | 210 |                 preg_replace_callback($regex, $callback, $uri->getPath())
 | 
        
           |  |  | 211 |             )->withQuery(
 | 
        
           |  |  | 212 |                 preg_replace_callback($regex, $callback, $uri->getQuery())
 | 
        
           |  |  | 213 |             );
 | 
        
           |  |  | 214 |     }
 | 
        
           |  |  | 215 |   | 
        
           |  |  | 216 |     private function __construct()
 | 
        
           |  |  | 217 |     {
 | 
        
           |  |  | 218 |         // cannot be instantiated
 | 
        
           |  |  | 219 |     }
 | 
        
           |  |  | 220 | }
 |