Source code for spack.test.util.util_url

# Copyright 2013-2022 Lawrence Livermore National Security, LLC and other
# Spack Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)

"""Test Spack's URL handling utility functions."""
import os
import os.path
import posixpath
import re
import sys

import pytest

import spack.paths
import spack.util.url as url_util
from spack.util.path import convert_to_posix_path

is_windows = sys.platform == 'win32'
if is_windows:
    drive_m = re.search(r'[A-Za-z]:', spack.paths.test_path)
    drive = drive_m.group() if drive_m else None


[docs]def test_url_parse(): parsed = url_util.parse('/path/to/resource', scheme='fake') assert(parsed.scheme == 'fake') assert(parsed.netloc == '') assert(parsed.path == '/path/to/resource') parsed = url_util.parse('file:///path/to/resource') assert(parsed.scheme == 'file') assert(parsed.netloc == '') assert(parsed.path == '/path/to/resource') parsed = url_util.parse('file:///path/to/resource', scheme='fake') assert(parsed.scheme == 'file') assert(parsed.netloc == '') assert(parsed.path == '/path/to/resource') parsed = url_util.parse('file://path/to/resource') assert(parsed.scheme == 'file') expected = convert_to_posix_path( os.path.abspath( posixpath.join('path', 'to', 'resource'))) if is_windows: expected = expected.lstrip(drive) assert(parsed.path == expected) if is_windows: parsed = url_util.parse('file://%s\\path\\to\\resource' % drive) assert(parsed.scheme == 'file') expected = '/' + posixpath.join('path', 'to', 'resource') assert parsed.path == expected parsed = url_util.parse('https://path/to/resource') assert(parsed.scheme == 'https') assert(parsed.netloc == 'path') assert(parsed.path == '/to/resource') parsed = url_util.parse('gs://path/to/resource') assert(parsed.scheme == 'gs') assert(parsed.netloc == 'path') assert(parsed.path == '/to/resource') spack_root = spack.paths.spack_root parsed = url_util.parse('file://$spack') assert(parsed.scheme == 'file') if is_windows: spack_root = '/' + convert_to_posix_path(spack_root) assert(parsed.netloc + parsed.path == spack_root)
[docs]def test_url_local_file_path(): spack_root = spack.paths.spack_root sep = os.path.sep lfp = url_util.local_file_path('/a/b/c.txt') assert(lfp == sep + os.path.join('a', 'b', 'c.txt')) lfp = url_util.local_file_path('file:///a/b/c.txt') assert(lfp == sep + os.path.join('a', 'b', 'c.txt')) if is_windows: lfp = url_util.local_file_path('file://a/b/c.txt') expected = os.path.abspath(os.path.join('a', 'b', 'c.txt')) assert(lfp == expected) lfp = url_util.local_file_path('file://$spack/a/b/c.txt') expected = os.path.abspath(os.path.join(spack_root, 'a', 'b', 'c.txt')) assert(lfp == expected) if is_windows: lfp = url_util.local_file_path('file:///$spack/a/b/c.txt') expected = os.path.abspath(os.path.join(spack_root, 'a', 'b', 'c.txt')) assert(lfp == expected) lfp = url_util.local_file_path('file://$spack/a/b/c.txt') expected = os.path.abspath(os.path.join(spack_root, 'a', 'b', 'c.txt')) assert(lfp == expected) # not a file:// URL - so no local file path lfp = url_util.local_file_path('http:///a/b/c.txt') assert(lfp is None) lfp = url_util.local_file_path('http://a/b/c.txt') assert(lfp is None) lfp = url_util.local_file_path('http:///$spack/a/b/c.txt') assert(lfp is None) lfp = url_util.local_file_path('http://$spack/a/b/c.txt') assert(lfp is None)
[docs]def test_url_join_local_paths(): # Resolve local link against page URL # wrong: assert( url_util.join( 's3://bucket/index.html', '../other-bucket/document.txt') == 's3://bucket/other-bucket/document.txt') # correct - need to specify resolve_href=True: assert( url_util.join( 's3://bucket/index.html', '../other-bucket/document.txt', resolve_href=True) == 's3://other-bucket/document.txt') # same as above: make sure several components are joined together correctly assert( url_util.join( # with resolve_href=True, first arg is the base url; can not be # broken up 's3://bucket/index.html', # with resolve_href=True, remaining arguments are the components of # the local href that needs to be resolved '..', 'other-bucket', 'document.txt', resolve_href=True) == 's3://other-bucket/document.txt') # Append local path components to prefix URL # wrong: assert( url_util.join( 'https://mirror.spack.io/build_cache', 'my-package', resolve_href=True) == 'https://mirror.spack.io/my-package') # correct - Need to specify resolve_href=False: assert( url_util.join( 'https://mirror.spack.io/build_cache', 'my-package', resolve_href=False) == 'https://mirror.spack.io/build_cache/my-package') # same as above; make sure resolve_href=False is default assert( url_util.join( 'https://mirror.spack.io/build_cache', 'my-package') == 'https://mirror.spack.io/build_cache/my-package') # same as above: make sure several components are joined together correctly assert( url_util.join( # with resolve_href=False, first arg is just a prefix. No # resolution is done. So, there should be no difference between # join('/a/b/c', 'd/e'), # join('/a/b', 'c', 'd/e'), # join('/a', 'b/c', 'd', 'e'), etc. 'https://mirror.spack.io', 'build_cache', 'my-package') == 'https://mirror.spack.io/build_cache/my-package') # file:// URL path components are *NOT* canonicalized spack_root = spack.paths.spack_root if sys.platform != 'win32': join_result = url_util.join('/a/b/c', '$spack') assert(join_result == 'file:///a/b/c/$spack') # not canonicalized format_result = url_util.format(join_result) # canoncalize by hand expected = url_util.format(os.path.abspath(os.path.join( '/', 'a', 'b', 'c', '.' + spack_root))) assert(format_result == expected) # see test_url_join_absolute_paths() for more on absolute path components join_result = url_util.join('/a/b/c', '/$spack') assert(join_result == 'file:///$spack') # not canonicalized format_result = url_util.format(join_result) expected = url_util.format(spack_root) assert(format_result == expected) # For s3:// URLs, the "netloc" (bucket) is considered part of the path. # Make sure join() can cross bucket boundaries in this case. args = ['s3://bucket/a/b', 'new-bucket', 'c'] assert(url_util.join(*args) == 's3://bucket/a/b/new-bucket/c') args.insert(1, '..') assert(url_util.join(*args) == 's3://bucket/a/new-bucket/c') args.insert(1, '..') assert(url_util.join(*args) == 's3://bucket/new-bucket/c') # new-bucket is now the "netloc" (bucket name) args.insert(1, '..') assert(url_util.join(*args) == 's3://new-bucket/c')
[docs]def test_url_join_absolute_paths(): # Handling absolute path components is a little tricky. To this end, we # distinguish "absolute path components", from the more-familiar concept of # "absolute paths" as they are understood for local filesystem paths. # # - All absolute paths are absolute path components. Joining a URL with # these components has the effect of completely replacing the path of the # URL with the absolute path. These components do not specify a URL # scheme, so the scheme of the URL procuced when joining them depend on # those provided by components that came before it (file:// assumed if no # such scheme is provided). # For eaxmple: p = '/path/to/resource' # ...is an absolute path # http:// URL assert( url_util.join('http://example.com/a/b/c', p) == 'http://example.com/path/to/resource') # s3:// URL # also notice how the netloc is treated as part of the path for s3:// URLs assert( url_util.join('s3://example.com/a/b/c', p) == 's3://path/to/resource') # - URL components that specify a scheme are always absolute path # components. Joining a base URL with these components effectively # discards the base URL and "resets" the joining logic starting at the # component in question and using it as the new base URL. # For eaxmple: p = 'http://example.com/path/to' # ...is an http:// URL join_result = url_util.join(p, 'resource') assert(join_result == 'http://example.com/path/to/resource') # works as if everything before the http:// URL was left out assert( url_util.join( 'literally', 'does', 'not', 'matter', p, 'resource') == join_result) # It's important to keep in mind that this logic applies even if the # component's path is not an absolute path! # For eaxmple: p = './d' # ...is *NOT* an absolute path # ...is also *NOT* an absolute path component u = 'file://./d' # ...is a URL # The path of this URL is *NOT* an absolute path # HOWEVER, the URL, itself, *is* an absolute path component # (We just need... cwd = os.getcwd() # ...to work out what resource it points to) if sys.platform == "win32": convert_to_posix_path(cwd) cwd = '/' + cwd # So, even though parse() assumes "file://" URL, the scheme is still # significant in URL path components passed to join(), even if the base # is a file:// URL. path_join_result = 'file:///a/b/c/d' assert(url_util.join('/a/b/c', p) == path_join_result) assert(url_util.join('file:///a/b/c', p) == path_join_result) url_join_result = 'file://{CWD}/d'.format(CWD=cwd) assert(url_util.join('/a/b/c', u) == url_join_result) assert(url_util.join('file:///a/b/c', u) == url_join_result) # Finally, resolve_href should have no effect for how absolute path # components are handled because local hrefs can not be absolute path # components. args = ['s3://does', 'not', 'matter', 'http://example.com', 'also', 'does', 'not', 'matter', '/path'] expected = 'http://example.com/path' assert(url_util.join(*args, resolve_href=True) == expected) assert(url_util.join(*args, resolve_href=False) == expected) # resolve_href only matters for the local path components at the end of the # argument list. args[-1] = '/path/to/page' args.extend(('..', '..', 'resource')) assert(url_util.join(*args, resolve_href=True) == 'http://example.com/resource') assert(url_util.join(*args, resolve_href=False) == 'http://example.com/path/resource')
[docs]@pytest.mark.parametrize("url,parts", [ ("ssh://user@host.xz:500/path/to/repo.git/", ("ssh", "user", "host.xz", 500, "/path/to/repo.git")), ("ssh://user@host.xz/path/to/repo.git/", ("ssh", "user", "host.xz", None, "/path/to/repo.git")), ("ssh://host.xz:500/path/to/repo.git/", ("ssh", None, "host.xz", 500, "/path/to/repo.git")), ("ssh://host.xz/path/to/repo.git/", ("ssh", None, "host.xz", None, "/path/to/repo.git")), ("ssh://user@host.xz/path/to/repo.git/", ("ssh", "user", "host.xz", None, "/path/to/repo.git")), ("ssh://host.xz/path/to/repo.git/", ("ssh", None, "host.xz", None, "/path/to/repo.git")), ("ssh://user@host.xz/~user/path/to/repo.git/", ("ssh", "user", "host.xz", None, "~user/path/to/repo.git")), ("ssh://host.xz/~user/path/to/repo.git/", ("ssh", None, "host.xz", None, "~user/path/to/repo.git")), ("ssh://user@host.xz/~/path/to/repo.git", ("ssh", "user", "host.xz", None, "~/path/to/repo.git")), ("ssh://host.xz/~/path/to/repo.git", ("ssh", None, "host.xz", None, "~/path/to/repo.git")), ("git@github.com:spack/spack.git", (None, "git", "github.com", None, "spack/spack.git")), ("user@host.xz:/path/to/repo.git/", (None, "user", "host.xz", None, "/path/to/repo.git")), ("host.xz:/path/to/repo.git/", (None, None, "host.xz", None, "/path/to/repo.git")), ("user@host.xz:~user/path/to/repo.git/", (None, "user", "host.xz", None, "~user/path/to/repo.git")), ("host.xz:~user/path/to/repo.git/", (None, None, "host.xz", None, "~user/path/to/repo.git")), ("user@host.xz:path/to/repo.git", (None, "user", "host.xz", None, "path/to/repo.git")), ("host.xz:path/to/repo.git", (None, None, "host.xz", None, "path/to/repo.git")), ("rsync://host.xz/path/to/repo.git/", ("rsync", None, "host.xz", None, "/path/to/repo.git")), ("git://host.xz/path/to/repo.git/", ("git", None, "host.xz", None, "/path/to/repo.git")), ("git://host.xz/~user/path/to/repo.git/", ("git", None, "host.xz", None, "~user/path/to/repo.git")), ("http://host.xz/path/to/repo.git/", ("http", None, "host.xz", None, "/path/to/repo.git")), ("https://host.xz/path/to/repo.git/", ("https", None, "host.xz", None, "/path/to/repo.git")), ("https://github.com/spack/spack", ("https", None, "github.com", None, "/spack/spack")), ("https://github.com/spack/spack/", ("https", None, "github.com", None, "/spack/spack")), ("file:///path/to/repo.git/", ("file", None, None, None, "/path/to/repo.git")), ("file://~/path/to/repo.git/", ("file", None, None, None, "~/path/to/repo.git")), # bad ports should give us None ("ssh://host.xz:port/path/to/repo.git/", None), # bad ports should give us None ("ssh://host-foo.xz:port/path/to/repo.git/", None), # regular file paths should give us None ("/path/to/repo.git/", None), ("path/to/repo.git/", None), ("~/path/to/repo.git", None), ]) def test_git_url_parse(url, parts): if parts is None: with pytest.raises(ValueError): url_util.parse_git_url(url) else: assert parts == url_util.parse_git_url(url)