# Copyright 2013-2021 Lawrence Livermore National Security, LLC and other
# Spack Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
"""
Fetch strategies are used to download source code into a staging area
in order to build it. They need to define the following methods:
* fetch()
This should attempt to download/check out source from somewhere.
* check()
Apply a checksum to the downloaded source code, e.g. for an archive.
May not do anything if the fetch method was safe to begin with.
* expand()
Expand (e.g., an archive) downloaded file to source, with the
standard stage source path as the destination directory.
* reset()
Restore original state of downloaded code. Used by clean commands.
This may just remove the expanded source and re-expand an archive,
or it may run something like git reset --hard.
* archive()
Archive a source directory, e.g. for creating a mirror.
"""
import copy
import functools
import os
import os.path
import re
import shutil
import sys
from typing import List, Optional # novm
import six
import six.moves.urllib.parse as urllib_parse
import llnl.util.tty as tty
from llnl.util.filesystem import (
get_single_file,
mkdirp,
temp_cwd,
temp_rename,
working_dir,
)
import spack.config
import spack.error
import spack.util.crypto as crypto
import spack.util.pattern as pattern
import spack.util.url as url_util
import spack.util.web
import spack.version
from spack.util.compression import decompressor_for, extension
from spack.util.executable import CommandNotFoundError, which
from spack.util.string import comma_and, quote
#: List of all fetch strategies, created by FetchStrategy metaclass.
all_strategies = []
CONTENT_TYPE_MISMATCH_WARNING_TEMPLATE = (
"The contents of {subject} look like {content_type}. Either the URL"
" you are trying to use does not exist or you have an internet gateway"
" issue. You can remove the bad archive using 'spack clean"
" <package>', then try again using the correct URL.")
[docs]def warn_content_type_mismatch(subject, content_type='HTML'):
tty.warn(CONTENT_TYPE_MISMATCH_WARNING_TEMPLATE.format(
subject=subject, content_type=content_type))
def _needs_stage(fun):
"""Many methods on fetch strategies require a stage to be set
using set_stage(). This decorator adds a check for self.stage."""
@functools.wraps(fun)
def wrapper(self, *args, **kwargs):
if not self.stage:
raise NoStageError(fun)
return fun(self, *args, **kwargs)
return wrapper
def _ensure_one_stage_entry(stage_path):
"""Ensure there is only one stage entry in the stage path."""
stage_entries = os.listdir(stage_path)
assert len(stage_entries) == 1
return os.path.join(stage_path, stage_entries[0])
[docs]def fetcher(cls):
"""Decorator used to register fetch strategies."""
all_strategies.append(cls)
return cls
[docs]class FetchStrategy(object):
"""Superclass of all fetch strategies."""
#: The URL attribute must be specified either at the package class
#: level, or as a keyword argument to ``version()``. It is used to
#: distinguish fetchers for different versions in the package DSL.
url_attr = None # type: Optional[str]
#: Optional attributes can be used to distinguish fetchers when :
#: classes have multiple ``url_attrs`` at the top-level.
# optional attributes in version() args.
optional_attrs = [] # type: List[str]
def __init__(self, **kwargs):
# The stage is initialized late, so that fetch strategies can be
# constructed at package construction time. This is where things
# will be fetched.
self.stage = None
# Enable or disable caching for this strategy based on
# 'no_cache' option from version directive.
self.cache_enabled = not kwargs.pop('no_cache', False)
# Subclasses need to implement these methods
[docs] def fetch(self):
"""Fetch source code archive or repo.
Returns:
bool: True on success, False on failure.
"""
[docs] def check(self):
"""Checksum the archive fetched by this FetchStrategy."""
[docs] def expand(self):
"""Expand the downloaded archive into the stage source path."""
[docs] def reset(self):
"""Revert to freshly downloaded state.
For archive files, this may just re-expand the archive.
"""
[docs] def archive(self, destination):
"""Create an archive of the downloaded data for a mirror.
For downloaded files, this should preserve the checksum of the
original file. For repositories, it should just create an
expandable tarball out of the downloaded repository.
"""
@property
def cachable(self):
"""Whether fetcher is capable of caching the resource it retrieves.
This generally is determined by whether the resource is
identifiably associated with a specific package version.
Returns:
bool: True if can cache, False otherwise.
"""
[docs] def source_id(self):
"""A unique ID for the source.
It is intended that a human could easily generate this themselves using
the information available to them in the Spack package.
The returned value is added to the content which determines the full
hash for a package using `str()`.
"""
raise NotImplementedError
[docs] def mirror_id(self):
"""This is a unique ID for a source that is intended to help identify
reuse of resources across packages.
It is unique like source-id, but it does not include the package name
and is not necessarily easy for a human to create themselves.
"""
raise NotImplementedError
def __str__(self): # Should be human readable URL.
return "FetchStrategy.__str___"
[docs] @classmethod
def matches(cls, args):
"""Predicate that matches fetch strategies to arguments of
the version directive.
Args:
args: arguments of the version directive
"""
return cls.url_attr in args
[docs]@fetcher
class BundleFetchStrategy(FetchStrategy):
"""
Fetch strategy associated with bundle, or no-code, packages.
Having a basic fetch strategy is a requirement for executing post-install
hooks. Consequently, this class provides the API but does little more
than log messages.
TODO: Remove this class by refactoring resource handling and the link
between composite stages and composite fetch strategies (see #11981).
"""
#: There is no associated URL keyword in ``version()`` for no-code
#: packages but this property is required for some strategy-related
#: functions (e.g., check_pkg_attributes).
url_attr = ''
[docs] def fetch(self):
"""Simply report success -- there is no code to fetch."""
return True
@property
def cachable(self):
"""Report False as there is no code to cache."""
return False
[docs] def source_id(self):
"""BundlePackages don't have a source id."""
return ''
[docs] def mirror_id(self):
"""BundlePackages don't have a mirror id."""
[docs]class FetchStrategyComposite(pattern.Composite):
"""Composite for a FetchStrategy object.
"""
matches = FetchStrategy.matches
def __init__(self):
super(FetchStrategyComposite, self).__init__([
'fetch', 'check', 'expand', 'reset', 'archive', 'cachable',
'mirror_id'
])
[docs] def source_id(self):
component_ids = tuple(i.source_id() for i in self)
if all(component_ids):
return component_ids
[docs]@fetcher
class URLFetchStrategy(FetchStrategy):
"""URLFetchStrategy pulls source code from a URL for an archive, check the
archive against a checksum, and decompresses the archive.
The destination for the resulting file(s) is the standard stage path.
"""
url_attr = 'url'
# these are checksum types. The generic 'checksum' is deprecated for
# specific hash names, but we need it for backward compatibility
optional_attrs = list(crypto.hashes.keys()) + ['checksum']
def __init__(self, url=None, checksum=None, **kwargs):
super(URLFetchStrategy, self).__init__(**kwargs)
# Prefer values in kwargs to the positionals.
self.url = kwargs.get('url', url)
self.mirrors = kwargs.get('mirrors', [])
# digest can be set as the first argument, or from an explicit
# kwarg by the hash name.
self.digest = kwargs.get('checksum', checksum)
for h in self.optional_attrs:
if h in kwargs:
self.digest = kwargs[h]
self.expand_archive = kwargs.get('expand', True)
self.extra_options = kwargs.get('fetch_options', {})
self._curl = None
self.extension = kwargs.get('extension', None)
if not self.url:
raise ValueError("URLFetchStrategy requires a url for fetching.")
@property
def curl(self):
if not self._curl:
try:
self._curl = which('curl', required=True)
except CommandNotFoundError as exc:
tty.error(str(exc))
return self._curl
[docs] def source_id(self):
return self.digest
[docs] def mirror_id(self):
if not self.digest:
return None
# The filename is the digest. A directory is also created based on
# truncating the digest to avoid creating a directory with too many
# entries
return os.path.sep.join(
['archive', self.digest[:2], self.digest])
@property
def candidate_urls(self):
urls = []
for url in [self.url] + (self.mirrors or []):
if url.startswith('file://'):
path = urllib_parse.quote(url[len('file://'):])
url = 'file://' + path
urls.append(url)
return urls
[docs] @_needs_stage
def fetch(self):
if self.archive_file:
tty.debug('Already downloaded {0}'.format(self.archive_file))
return
url = None
errors = []
for url in self.candidate_urls:
if not self._existing_url(url):
continue
try:
partial_file, save_file = self._fetch_from_url(url)
if save_file and (partial_file is not None):
os.rename(partial_file, save_file)
break
except FailedDownloadError as e:
errors.append(str(e))
for msg in errors:
tty.debug(msg)
if not self.archive_file:
raise FailedDownloadError(url)
def _existing_url(self, url):
tty.debug('Checking existence of {0}'.format(url))
if spack.config.get('config:url_fetch_method') == 'curl':
curl = self.curl
# Telling curl to fetch the first byte (-r 0-0) is supposed to be
# portable.
curl_args = ['--stderr', '-', '-s', '-f', '-r', '0-0', url]
if not spack.config.get('config:verify_ssl'):
curl_args.append('-k')
_ = curl(*curl_args, fail_on_error=False, output=os.devnull)
return curl.returncode == 0
else:
# Telling urllib to check if url is accessible
try:
url, headers, response = spack.util.web.read_from_url(url)
except spack.util.web.SpackWebError:
msg = "Urllib fetch failed to verify url {0}".format(url)
raise FailedDownloadError(url, msg)
return (response.getcode() is None or response.getcode() == 200)
def _fetch_from_url(self, url):
if spack.config.get('config:url_fetch_method') == 'curl':
return self._fetch_curl(url)
else:
return self._fetch_urllib(url)
def _check_headers(self, headers):
# Check if we somehow got an HTML file rather than the archive we
# asked for. We only look at the last content type, to handle
# redirects properly.
content_types = re.findall(r'Content-Type:[^\r\n]+', headers,
flags=re.IGNORECASE)
if content_types and 'text/html' in content_types[-1]:
warn_content_type_mismatch(self.archive_file or "the archive")
@_needs_stage
def _fetch_urllib(self, url):
save_file = None
if self.stage.save_filename:
save_file = self.stage.save_filename
tty.msg('Fetching {0}'.format(url))
# Run urllib but grab the mime type from the http headers
try:
url, headers, response = spack.util.web.read_from_url(url)
except spack.util.web.SpackWebError as e:
# clean up archive on failure.
if self.archive_file:
os.remove(self.archive_file)
if save_file and os.path.exists(save_file):
os.remove(save_file)
msg = 'urllib failed to fetch with error {0}'.format(e)
raise FailedDownloadError(url, msg)
with open(save_file, 'wb') as _open_file:
shutil.copyfileobj(response, _open_file)
self._check_headers(str(headers))
return None, save_file
@_needs_stage
def _fetch_curl(self, url):
save_file = None
partial_file = None
if self.stage.save_filename:
save_file = self.stage.save_filename
partial_file = self.stage.save_filename + '.part'
tty.msg('Fetching {0}'.format(url))
if partial_file:
save_args = ['-C',
'-', # continue partial downloads
'-o',
partial_file] # use a .part file
else:
save_args = ['-O']
curl_args = save_args + [
'-f', # fail on >400 errors
'-D',
'-', # print out HTML headers
'-L', # resolve 3xx redirects
url,
]
if not spack.config.get('config:verify_ssl'):
curl_args.append('-k')
if sys.stdout.isatty() and tty.msg_enabled():
curl_args.append('-#') # status bar when using a tty
else:
curl_args.append('-sS') # show errors if fail
connect_timeout = spack.config.get('config:connect_timeout', 10)
if self.extra_options:
cookie = self.extra_options.get('cookie')
if cookie:
curl_args.append('-j') # junk cookies
curl_args.append('-b') # specify cookie
curl_args.append(cookie)
timeout = self.extra_options.get('timeout')
if timeout:
connect_timeout = max(connect_timeout, int(timeout))
if connect_timeout > 0:
# Timeout if can't establish a connection after n sec.
curl_args.extend(['--connect-timeout', str(connect_timeout)])
# Run curl but grab the mime type from the http headers
curl = self.curl
with working_dir(self.stage.path):
headers = curl(*curl_args, output=str, fail_on_error=False)
if curl.returncode != 0:
# clean up archive on failure.
if self.archive_file:
os.remove(self.archive_file)
if partial_file and os.path.exists(partial_file):
os.remove(partial_file)
if curl.returncode == 22:
# This is a 404. Curl will print the error.
raise FailedDownloadError(
url, "URL %s was not found!" % url)
elif curl.returncode == 60:
# This is a certificate error. Suggest spack -k
raise FailedDownloadError(
url,
"Curl was unable to fetch due to invalid certificate. "
"This is either an attack, or your cluster's SSL "
"configuration is bad. If you believe your SSL "
"configuration is bad, you can try running spack -k, "
"which will not check SSL certificates."
"Use this at your own risk.")
else:
# This is some other curl error. Curl will print the
# error, but print a spack message too
raise FailedDownloadError(
url,
"Curl failed with error %d" % curl.returncode)
self._check_headers(headers)
return partial_file, save_file
@property # type: ignore # decorated properties unsupported in mypy
@_needs_stage
def archive_file(self):
"""Path to the source archive within this stage directory."""
return self.stage.archive_file
@property
def cachable(self):
return self.cache_enabled and bool(self.digest)
[docs] @_needs_stage
def expand(self):
if not self.expand_archive:
tty.debug('Staging unexpanded archive {0} in {1}'
.format(self.archive_file, self.stage.source_path))
if not self.stage.expanded:
mkdirp(self.stage.source_path)
dest = os.path.join(self.stage.source_path,
os.path.basename(self.archive_file))
shutil.move(self.archive_file, dest)
return
tty.debug('Staging archive: {0}'.format(self.archive_file))
if not self.archive_file:
raise NoArchiveFileError(
"Couldn't find archive file",
"Failed on expand() for URL %s" % self.url)
if not self.extension:
self.extension = extension(self.archive_file)
if self.stage.expanded:
tty.debug('Source already staged to %s' % self.stage.source_path)
return
decompress = decompressor_for(self.archive_file, self.extension)
# Expand all tarballs in their own directory to contain
# exploding tarballs.
tarball_container = os.path.join(self.stage.path,
"spack-expanded-archive")
# Below we assume that the command to decompress expand the
# archive in the current working directory
mkdirp(tarball_container)
with working_dir(tarball_container):
decompress(self.archive_file)
# Check for an exploding tarball, i.e. one that doesn't expand to
# a single directory. If the tarball *didn't* explode, move its
# contents to the staging source directory & remove the container
# directory. If the tarball did explode, just rename the tarball
# directory to the staging source directory.
#
# NOTE: The tar program on Mac OS X will encode HFS metadata in
# hidden files, which can end up *alongside* a single top-level
# directory. We initially ignore presence of hidden files to
# accomodate these "semi-exploding" tarballs but ensure the files
# are copied to the source directory.
files = os.listdir(tarball_container)
non_hidden = [f for f in files if not f.startswith('.')]
if len(non_hidden) == 1:
src = os.path.join(tarball_container, non_hidden[0])
if os.path.isdir(src):
self.stage.srcdir = non_hidden[0]
shutil.move(src, self.stage.source_path)
if len(files) > 1:
files.remove(non_hidden[0])
for f in files:
src = os.path.join(tarball_container, f)
dest = os.path.join(self.stage.path, f)
shutil.move(src, dest)
os.rmdir(tarball_container)
else:
# This is a non-directory entry (e.g., a patch file) so simply
# rename the tarball container to be the source path.
shutil.move(tarball_container, self.stage.source_path)
else:
shutil.move(tarball_container, self.stage.source_path)
[docs] def archive(self, destination):
"""Just moves this archive to the destination."""
if not self.archive_file:
raise NoArchiveFileError("Cannot call archive() before fetching.")
spack.util.web.push_to_url(
self.archive_file,
destination,
keep_original=True)
[docs] @_needs_stage
def check(self):
"""Check the downloaded archive against a checksum digest.
No-op if this stage checks code out of a repository."""
if not self.digest:
raise NoDigestError(
"Attempt to check URLFetchStrategy with no digest.")
checker = crypto.Checker(self.digest)
if not checker.check(self.archive_file):
raise ChecksumError(
"%s checksum failed for %s" %
(checker.hash_name, self.archive_file),
"Expected %s but got %s" % (self.digest, checker.sum))
[docs] @_needs_stage
def reset(self):
"""
Removes the source path if it exists, then re-expands the archive.
"""
if not self.archive_file:
raise NoArchiveFileError(
"Tried to reset URLFetchStrategy before fetching",
"Failed on reset() for URL %s" % self.url)
# Remove everything but the archive from the stage
for filename in os.listdir(self.stage.path):
abspath = os.path.join(self.stage.path, filename)
if abspath != self.archive_file:
shutil.rmtree(abspath, ignore_errors=True)
# Expand the archive again
self.expand()
def __repr__(self):
url = self.url if self.url else "no url"
return "%s<%s>" % (self.__class__.__name__, url)
def __str__(self):
if self.url:
return self.url
else:
return "[no url]"
[docs]@fetcher
class CacheURLFetchStrategy(URLFetchStrategy):
"""The resource associated with a cache URL may be out of date."""
[docs] @_needs_stage
def fetch(self):
path = re.sub('^file://', '', self.url)
# check whether the cache file exists.
if not os.path.isfile(path):
raise NoCacheError('No cache of %s' % path)
# remove old symlink if one is there.
filename = self.stage.save_filename
if os.path.exists(filename):
os.remove(filename)
# Symlink to local cached archive.
os.symlink(path, filename)
# Remove link if checksum fails, or subsequent fetchers
# will assume they don't need to download.
if self.digest:
try:
self.check()
except ChecksumError:
os.remove(self.archive_file)
raise
# Notify the user how we fetched.
tty.msg('Using cached archive: {0}'.format(path))
[docs]class VCSFetchStrategy(FetchStrategy):
"""Superclass for version control system fetch strategies.
Like all fetchers, VCS fetchers are identified by the attributes
passed to the ``version`` directive. The optional_attrs for a VCS
fetch strategy represent types of revisions, e.g. tags, branches,
commits, etc.
The required attributes (git, svn, etc.) are used to specify the URL
and to distinguish a VCS fetch strategy from a URL fetch strategy.
"""
def __init__(self, **kwargs):
super(VCSFetchStrategy, self).__init__(**kwargs)
# Set a URL based on the type of fetch strategy.
self.url = kwargs.get(self.url_attr, None)
if not self.url:
raise ValueError(
"%s requires %s argument." % (self.__class__, self.url_attr))
for attr in self.optional_attrs:
setattr(self, attr, kwargs.get(attr, None))
[docs] @_needs_stage
def check(self):
tty.debug('No checksum needed when fetching with {0}'
.format(self.url_attr))
[docs] @_needs_stage
def expand(self):
tty.debug(
"Source fetched with %s is already expanded." % self.url_attr)
[docs] @_needs_stage
def archive(self, destination, **kwargs):
assert (extension(destination) == 'tar.gz')
assert (self.stage.source_path.startswith(self.stage.path))
tar = which('tar', required=True)
patterns = kwargs.get('exclude', None)
if patterns is not None:
if isinstance(patterns, six.string_types):
patterns = [patterns]
for p in patterns:
tar.add_default_arg('--exclude=%s' % p)
with working_dir(self.stage.path):
if self.stage.srcdir:
# Here we create an archive with the default repository name.
# The 'tar' command has options for changing the name of a
# directory that is included in the archive, but they differ
# based on OS, so we temporarily rename the repo
with temp_rename(self.stage.source_path, self.stage.srcdir):
tar('-czf', destination, self.stage.srcdir)
else:
tar('-czf', destination,
os.path.basename(self.stage.source_path))
def __str__(self):
return "VCS: %s" % self.url
def __repr__(self):
return "%s<%s>" % (self.__class__, self.url)
[docs]@fetcher
class GoFetchStrategy(VCSFetchStrategy):
"""Fetch strategy that employs the `go get` infrastructure.
Use like this in a package:
version('name',
go='github.com/monochromegane/the_platinum_searcher/...')
Go get does not natively support versions, they can be faked with git.
The fetched source will be moved to the standard stage sourcepath directory
during the expand step.
"""
url_attr = 'go'
def __init__(self, **kwargs):
# Discards the keywords in kwargs that may conflict with the next
# call to __init__
forwarded_args = copy.copy(kwargs)
forwarded_args.pop('name', None)
super(GoFetchStrategy, self).__init__(**forwarded_args)
self._go = None
@property
def go_version(self):
vstring = self.go('version', output=str).split(' ')[2]
return spack.version.Version(vstring)
@property
def go(self):
if not self._go:
self._go = which('go', required=True)
return self._go
[docs] @_needs_stage
def fetch(self):
tty.debug('Getting go resource: {0}'.format(self.url))
with working_dir(self.stage.path):
try:
os.mkdir('go')
except OSError:
pass
env = dict(os.environ)
env['GOPATH'] = os.path.join(os.getcwd(), 'go')
self.go('get', '-v', '-d', self.url, env=env)
[docs] def archive(self, destination):
super(GoFetchStrategy, self).archive(destination, exclude='.git')
[docs] @_needs_stage
def expand(self):
tty.debug(
"Source fetched with %s is already expanded." % self.url_attr)
# Move the directory to the well-known stage source path
repo_root = _ensure_one_stage_entry(self.stage.path)
shutil.move(repo_root, self.stage.source_path)
[docs] @_needs_stage
def reset(self):
with working_dir(self.stage.source_path):
self.go('clean')
def __str__(self):
return "[go] %s" % self.url
[docs]@fetcher
class GitFetchStrategy(VCSFetchStrategy):
"""
Fetch strategy that gets source code from a git repository.
Use like this in a package:
version('name', git='https://github.com/project/repo.git')
Optionally, you can provide a branch, or commit to check out, e.g.:
version('1.1', git='https://github.com/project/repo.git', tag='v1.1')
You can use these three optional attributes in addition to ``git``:
* ``branch``: Particular branch to build from (default is the
repository's default branch)
* ``tag``: Particular tag to check out
* ``commit``: Particular commit hash in the repo
Repositories are cloned into the standard stage source path directory.
"""
url_attr = 'git'
optional_attrs = ['tag', 'branch', 'commit', 'submodules',
'get_full_repo', 'submodules_delete']
git_version_re = r'git version (\S+)'
def __init__(self, **kwargs):
# Discards the keywords in kwargs that may conflict with the next call
# to __init__
forwarded_args = copy.copy(kwargs)
forwarded_args.pop('name', None)
super(GitFetchStrategy, self).__init__(**forwarded_args)
self._git = None
self.submodules = kwargs.get('submodules', False)
self.submodules_delete = kwargs.get('submodules_delete', False)
self.get_full_repo = kwargs.get('get_full_repo', False)
@property
def git_version(self):
return GitFetchStrategy.version_from_git(self.git)
[docs] @staticmethod
def version_from_git(git_exe):
"""Given a git executable, return the Version (this will fail if
the output cannot be parsed into a valid Version).
"""
version_output = git_exe('--version', output=str)
m = re.search(GitFetchStrategy.git_version_re, version_output)
return spack.version.Version(m.group(1))
@property
def git(self):
if not self._git:
self._git = which('git', required=True)
# Disable advice for a quieter fetch
# https://github.com/git/git/blob/master/Documentation/RelNotes/1.7.2.txt
if self.git_version >= spack.version.Version('1.7.2'):
self._git.add_default_arg('-c')
self._git.add_default_arg('advice.detachedHead=false')
# If the user asked for insecure fetching, make that work
# with git as well.
if not spack.config.get('config:verify_ssl'):
self._git.add_default_env('GIT_SSL_NO_VERIFY', 'true')
return self._git
@property
def cachable(self):
return self.cache_enabled and bool(self.commit or self.tag)
[docs] def source_id(self):
return self.commit or self.tag
[docs] def mirror_id(self):
repo_ref = self.commit or self.tag or self.branch
if repo_ref:
repo_path = url_util.parse(self.url).path
result = os.path.sep.join(['git', repo_path, repo_ref])
return result
def _repo_info(self):
args = ''
if self.commit:
args = ' at commit {0}'.format(self.commit)
elif self.tag:
args = ' at tag {0}'.format(self.tag)
elif self.branch:
args = ' on branch {0}'.format(self.branch)
return '{0}{1}'.format(self.url, args)
[docs] @_needs_stage
def fetch(self):
if self.stage.expanded:
tty.debug('Already fetched {0}'.format(self.stage.source_path))
return
self.clone(commit=self.commit, branch=self.branch, tag=self.tag)
[docs] def clone(self, dest=None, commit=None, branch=None, tag=None, bare=False):
"""
Clone a repository to a path.
This method handles cloning from git, but does not require a stage.
Arguments:
dest (str or None): The path into which the code is cloned. If None,
requires a stage and uses the stage's source path.
commit (str or None): A commit to fetch from the remote. Only one of
commit, branch, and tag may be non-None.
branch (str or None): A branch to fetch from the remote.
tag (str or None): A tag to fetch from the remote.
bare (bool): Execute a "bare" git clone (--bare option to git)
"""
# Default to spack source path
dest = dest or self.stage.source_path
tty.debug('Cloning git repository: {0}'.format(self._repo_info()))
git = self.git
debug = spack.config.get('config:debug')
if bare:
# We don't need to worry about which commit/branch/tag is checked out
clone_args = ['clone', '--bare']
if not debug:
clone_args.append('--quiet')
clone_args.extend([self.url, dest])
git(*clone_args)
elif commit:
# Need to do a regular clone and check out everything if
# they asked for a particular commit.
clone_args = ['clone', self.url]
if not debug:
clone_args.insert(1, '--quiet')
with temp_cwd():
git(*clone_args)
repo_name = get_single_file('.')
if self.stage:
self.stage.srcdir = repo_name
shutil.move(repo_name, dest)
with working_dir(dest):
checkout_args = ['checkout', commit]
if not debug:
checkout_args.insert(1, '--quiet')
git(*checkout_args)
else:
# Can be more efficient if not checking out a specific commit.
args = ['clone']
if not debug:
args.append('--quiet')
# If we want a particular branch ask for it.
if branch:
args.extend(['--branch', branch])
elif tag and self.git_version >= spack.version.ver('1.8.5.2'):
args.extend(['--branch', tag])
# Try to be efficient if we're using a new enough git.
# This checks out only one branch's history
if self.git_version >= spack.version.ver('1.7.10'):
if self.get_full_repo:
args.append('--no-single-branch')
else:
args.append('--single-branch')
with temp_cwd():
# Yet more efficiency: only download a 1-commit deep
# tree, if the in-use git and protocol permit it.
if (not self.get_full_repo) and \
self.git_version >= spack.version.ver('1.7.1') and \
self.protocol_supports_shallow_clone():
args.extend(['--depth', '1'])
args.extend([self.url])
git(*args)
repo_name = get_single_file('.')
if self.stage:
self.stage.srcdir = repo_name
shutil.move(repo_name, dest)
with working_dir(dest):
# For tags, be conservative and check them out AFTER
# cloning. Later git versions can do this with clone
# --branch, but older ones fail.
if tag and self.git_version < spack.version.ver('1.8.5.2'):
# pull --tags returns a "special" error code of 1 in
# older versions that we have to ignore.
# see: https://github.com/git/git/commit/19d122b
pull_args = ['pull', '--tags']
co_args = ['checkout', self.tag]
if not spack.config.get('config:debug'):
pull_args.insert(1, '--quiet')
co_args.insert(1, '--quiet')
git(*pull_args, ignore_errors=1)
git(*co_args)
if self.submodules_delete:
with working_dir(dest):
for submodule_to_delete in self.submodules_delete:
args = ['rm', submodule_to_delete]
if not spack.config.get('config:debug'):
args.insert(1, '--quiet')
git(*args)
# Init submodules if the user asked for them.
if self.submodules:
with working_dir(dest):
args = ['submodule', 'update', '--init', '--recursive']
if not spack.config.get('config:debug'):
args.insert(1, '--quiet')
git(*args)
[docs] def archive(self, destination):
super(GitFetchStrategy, self).archive(destination, exclude='.git')
[docs] @_needs_stage
def reset(self):
with working_dir(self.stage.source_path):
co_args = ['checkout', '.']
clean_args = ['clean', '-f']
if spack.config.get('config:debug'):
co_args.insert(1, '--quiet')
clean_args.insert(1, '--quiet')
self.git(*co_args)
self.git(*clean_args)
[docs] def protocol_supports_shallow_clone(self):
"""Shallow clone operations (--depth #) are not supported by the basic
HTTP protocol or by no-protocol file specifications.
Use (e.g.) https:// or file:// instead."""
return not (self.url.startswith('http://') or
self.url.startswith('/'))
def __str__(self):
return '[git] {0}'.format(self._repo_info())
[docs]@fetcher
class CvsFetchStrategy(VCSFetchStrategy):
"""Fetch strategy that gets source code from a CVS repository.
Use like this in a package:
version('name',
cvs=':pserver:anonymous@www.example.com:/cvsroot%module=modulename')
Optionally, you can provide a branch and/or a date for the URL:
version('name',
cvs=':pserver:anonymous@www.example.com:/cvsroot%module=modulename',
branch='branchname', date='date')
Repositories are checked out into the standard stage source path directory.
"""
url_attr = 'cvs'
optional_attrs = ['branch', 'date']
def __init__(self, **kwargs):
# Discards the keywords in kwargs that may conflict with the next call
# to __init__
forwarded_args = copy.copy(kwargs)
forwarded_args.pop('name', None)
super(CvsFetchStrategy, self).__init__(**forwarded_args)
self._cvs = None
if self.branch is not None:
self.branch = str(self.branch)
if self.date is not None:
self.date = str(self.date)
@property
def cvs(self):
if not self._cvs:
self._cvs = which('cvs', required=True)
return self._cvs
@property
def cachable(self):
return self.cache_enabled and (bool(self.branch) or bool(self.date))
[docs] def source_id(self):
if not (self.branch or self.date):
# We need a branch or a date to make a checkout reproducible
return None
id = 'id'
if self.branch:
id += '-branch=' + self.branch
if self.date:
id += '-date=' + self.date
return id
[docs] def mirror_id(self):
if not (self.branch or self.date):
# We need a branch or a date to make a checkout reproducible
return None
repo_path = url_util.parse(self.url).path
result = os.path.sep.join(['cvs', repo_path])
if self.branch:
result += '%branch=' + self.branch
if self.date:
result += '%date=' + self.date
return result
[docs] @_needs_stage
def fetch(self):
if self.stage.expanded:
tty.debug('Already fetched {0}'.format(self.stage.source_path))
return
tty.debug('Checking out CVS repository: {0}'.format(self.url))
with temp_cwd():
url, module = self.url.split('%module=')
# Check out files
args = ['-z9', '-d', url, 'checkout']
if self.branch is not None:
args.extend(['-r', self.branch])
if self.date is not None:
args.extend(['-D', self.date])
args.append(module)
self.cvs(*args)
# Rename repo
repo_name = get_single_file('.')
self.stage.srcdir = repo_name
shutil.move(repo_name, self.stage.source_path)
def _remove_untracked_files(self):
"""Removes untracked files in a CVS repository."""
with working_dir(self.stage.source_path):
status = self.cvs('-qn', 'update', output=str)
for line in status.split('\n'):
if re.match(r'^[?]', line):
path = line[2:].strip()
if os.path.isfile(path):
os.unlink(path)
[docs] def archive(self, destination):
super(CvsFetchStrategy, self).archive(destination, exclude='CVS')
[docs] @_needs_stage
def reset(self):
self._remove_untracked_files()
with working_dir(self.stage.source_path):
self.cvs('update', '-C', '.')
def __str__(self):
return "[cvs] %s" % self.url
[docs]@fetcher
class SvnFetchStrategy(VCSFetchStrategy):
"""Fetch strategy that gets source code from a subversion repository.
Use like this in a package:
version('name', svn='http://www.example.com/svn/trunk')
Optionally, you can provide a revision for the URL:
version('name', svn='http://www.example.com/svn/trunk',
revision='1641')
Repositories are checked out into the standard stage source path directory.
"""
url_attr = 'svn'
optional_attrs = ['revision']
def __init__(self, **kwargs):
# Discards the keywords in kwargs that may conflict with the next call
# to __init__
forwarded_args = copy.copy(kwargs)
forwarded_args.pop('name', None)
super(SvnFetchStrategy, self).__init__(**forwarded_args)
self._svn = None
if self.revision is not None:
self.revision = str(self.revision)
@property
def svn(self):
if not self._svn:
self._svn = which('svn', required=True)
return self._svn
@property
def cachable(self):
return self.cache_enabled and bool(self.revision)
[docs] def source_id(self):
return self.revision
[docs] def mirror_id(self):
if self.revision:
repo_path = url_util.parse(self.url).path
result = os.path.sep.join(['svn', repo_path, self.revision])
return result
[docs] @_needs_stage
def fetch(self):
if self.stage.expanded:
tty.debug('Already fetched {0}'.format(self.stage.source_path))
return
tty.debug('Checking out subversion repository: {0}'.format(self.url))
args = ['checkout', '--force', '--quiet']
if self.revision:
args += ['-r', self.revision]
args.extend([self.url])
with temp_cwd():
self.svn(*args)
repo_name = get_single_file('.')
self.stage.srcdir = repo_name
shutil.move(repo_name, self.stage.source_path)
def _remove_untracked_files(self):
"""Removes untracked files in an svn repository."""
with working_dir(self.stage.source_path):
status = self.svn('status', '--no-ignore', output=str)
self.svn('status', '--no-ignore')
for line in status.split('\n'):
if not re.match('^[I?]', line):
continue
path = line[8:].strip()
if os.path.isfile(path):
os.unlink(path)
elif os.path.isdir(path):
shutil.rmtree(path, ignore_errors=True)
[docs] def archive(self, destination):
super(SvnFetchStrategy, self).archive(destination, exclude='.svn')
[docs] @_needs_stage
def reset(self):
self._remove_untracked_files()
with working_dir(self.stage.source_path):
self.svn('revert', '.', '-R')
def __str__(self):
return "[svn] %s" % self.url
[docs]@fetcher
class HgFetchStrategy(VCSFetchStrategy):
"""
Fetch strategy that gets source code from a Mercurial repository.
Use like this in a package:
version('name', hg='https://jay.grs.rwth-aachen.de/hg/lwm2')
Optionally, you can provide a branch, or revision to check out, e.g.:
version('torus',
hg='https://jay.grs.rwth-aachen.de/hg/lwm2', branch='torus')
You can use the optional 'revision' attribute to check out a
branch, tag, or particular revision in hg. To prevent
non-reproducible builds, using a moving target like a branch is
discouraged.
* ``revision``: Particular revision, branch, or tag.
Repositories are cloned into the standard stage source path directory.
"""
url_attr = 'hg'
optional_attrs = ['revision']
def __init__(self, **kwargs):
# Discards the keywords in kwargs that may conflict with the next call
# to __init__
forwarded_args = copy.copy(kwargs)
forwarded_args.pop('name', None)
super(HgFetchStrategy, self).__init__(**forwarded_args)
self._hg = None
@property
def hg(self):
"""
Returns:
Executable: the hg executable
"""
if not self._hg:
self._hg = which('hg', required=True)
# When building PythonPackages, Spack automatically sets
# PYTHONPATH. This can interfere with hg, which is a Python
# script. Unset PYTHONPATH while running hg.
self._hg.add_default_env('PYTHONPATH', '')
return self._hg
@property
def cachable(self):
return self.cache_enabled and bool(self.revision)
[docs] def source_id(self):
return self.revision
[docs] def mirror_id(self):
if self.revision:
repo_path = url_util.parse(self.url).path
result = os.path.sep.join(['hg', repo_path, self.revision])
return result
[docs] @_needs_stage
def fetch(self):
if self.stage.expanded:
tty.debug('Already fetched {0}'.format(self.stage.source_path))
return
args = []
if self.revision:
args.append('at revision %s' % self.revision)
tty.debug('Cloning mercurial repository: {0} {1}'
.format(self.url, args))
args = ['clone']
if not spack.config.get('config:verify_ssl'):
args.append('--insecure')
if self.revision:
args.extend(['-r', self.revision])
args.extend([self.url])
with temp_cwd():
self.hg(*args)
repo_name = get_single_file('.')
self.stage.srcdir = repo_name
shutil.move(repo_name, self.stage.source_path)
[docs] def archive(self, destination):
super(HgFetchStrategy, self).archive(destination, exclude='.hg')
[docs] @_needs_stage
def reset(self):
with working_dir(self.stage.path):
source_path = self.stage.source_path
scrubbed = "scrubbed-source-tmp"
args = ['clone']
if self.revision:
args += ['-r', self.revision]
args += [source_path, scrubbed]
self.hg(*args)
shutil.rmtree(source_path, ignore_errors=True)
shutil.move(scrubbed, source_path)
def __str__(self):
return "[hg] %s" % self.url
[docs]@fetcher
class S3FetchStrategy(URLFetchStrategy):
"""FetchStrategy that pulls from an S3 bucket."""
url_attr = 's3'
def __init__(self, *args, **kwargs):
try:
super(S3FetchStrategy, self).__init__(*args, **kwargs)
except ValueError:
if not kwargs.get('url'):
raise ValueError(
"S3FetchStrategy requires a url for fetching.")
[docs] @_needs_stage
def fetch(self):
if self.archive_file:
tty.debug('Already downloaded {0}'.format(self.archive_file))
return
parsed_url = url_util.parse(self.url)
if parsed_url.scheme != 's3':
raise FetchError(
'S3FetchStrategy can only fetch from s3:// urls.')
tty.debug('Fetching {0}'.format(self.url))
basename = os.path.basename(parsed_url.path)
with working_dir(self.stage.path):
_, headers, stream = spack.util.web.read_from_url(self.url)
with open(basename, 'wb') as f:
shutil.copyfileobj(stream, f)
content_type = spack.util.web.get_header(headers, 'Content-type')
if content_type == 'text/html':
warn_content_type_mismatch(self.archive_file or "the archive")
if self.stage.save_filename:
os.rename(
os.path.join(self.stage.path, basename),
self.stage.save_filename)
if not self.archive_file:
raise FailedDownloadError(self.url)
[docs]@fetcher
class GCSFetchStrategy(URLFetchStrategy):
"""FetchStrategy that pulls from a GCS bucket."""
url_attr = 'gs'
def __init__(self, *args, **kwargs):
try:
super(GCSFetchStrategy, self).__init__(*args, **kwargs)
except ValueError:
if not kwargs.get('url'):
raise ValueError(
"GCSFetchStrategy requires a url for fetching.")
[docs] @_needs_stage
def fetch(self):
import spack.util.web as web_util
if self.archive_file:
tty.debug('Already downloaded {0}'.format(self.archive_file))
return
parsed_url = url_util.parse(self.url)
if parsed_url.scheme != 'gs':
raise FetchError(
'GCSFetchStrategy can only fetch from gs:// urls.')
tty.debug('Fetching {0}'.format(self.url))
basename = os.path.basename(parsed_url.path)
with working_dir(self.stage.path):
_, headers, stream = web_util.read_from_url(self.url)
with open(basename, 'wb') as f:
shutil.copyfileobj(stream, f)
content_type = web_util.get_header(headers, 'Content-type')
if content_type == 'text/html':
warn_content_type_mismatch(self.archive_file or "the archive")
if self.stage.save_filename:
os.rename(
os.path.join(self.stage.path, basename),
self.stage.save_filename)
if not self.archive_file:
raise FailedDownloadError(self.url)
[docs]def stable_target(fetcher):
"""Returns whether the fetcher target is expected to have a stable
checksum. This is only true if the target is a preexisting archive
file."""
if isinstance(fetcher, URLFetchStrategy) and fetcher.cachable:
return True
return False
[docs]def from_url(url):
"""Given a URL, find an appropriate fetch strategy for it.
Currently just gives you a URLFetchStrategy that uses curl.
TODO: make this return appropriate fetch strategies for other
types of URLs.
"""
return URLFetchStrategy(url)
[docs]def from_kwargs(**kwargs):
"""Construct an appropriate FetchStrategy from the given keyword arguments.
Args:
**kwargs: dictionary of keyword arguments, e.g. from a
``version()`` directive in a package.
Returns:
typing.Callable: The fetch strategy that matches the args, based
on attribute names (e.g., ``git``, ``hg``, etc.)
Raises:
FetchError: If no ``fetch_strategy`` matches the args.
"""
for fetcher in all_strategies:
if fetcher.matches(kwargs):
return fetcher(**kwargs)
raise InvalidArgsError(**kwargs)
[docs]def check_pkg_attributes(pkg):
"""Find ambiguous top-level fetch attributes in a package.
Currently this only ensures that two or more VCS fetch strategies are
not specified at once.
"""
# a single package cannot have URL attributes for multiple VCS fetch
# strategies *unless* they are the same attribute.
conflicts = set([s.url_attr for s in all_strategies
if hasattr(pkg, s.url_attr)])
# URL isn't a VCS fetch method. We can use it with a VCS method.
conflicts -= set(['url'])
if len(conflicts) > 1:
raise FetcherConflict(
'Package %s cannot specify %s together. Pick at most one.'
% (pkg.name, comma_and(quote(conflicts))))
def _check_version_attributes(fetcher, pkg, version):
"""Ensure that the fetcher for a version is not ambiguous.
This assumes that we have already determined the fetcher for the
specific version using ``for_package_version()``
"""
all_optionals = set(a for s in all_strategies for a in s.optional_attrs)
args = pkg.versions[version]
extra\
= set(args) - set(fetcher.optional_attrs) - \
set([fetcher.url_attr, 'no_cache'])
extra.intersection_update(all_optionals)
if extra:
legal_attrs = [fetcher.url_attr] + list(fetcher.optional_attrs)
raise FetcherConflict(
"%s version '%s' has extra arguments: %s"
% (pkg.name, version, comma_and(quote(extra))),
"Valid arguments for a %s fetcher are: \n %s"
% (fetcher.url_attr, comma_and(quote(legal_attrs))))
def _extrapolate(pkg, version):
"""Create a fetcher from an extrapolated URL for this version."""
try:
return URLFetchStrategy(pkg.url_for_version(version),
fetch_options=pkg.fetch_options)
except spack.package.NoURLError:
msg = ("Can't extrapolate a URL for version %s "
"because package %s defines no URLs")
raise ExtrapolationError(msg % (version, pkg.name))
def _from_merged_attrs(fetcher, pkg, version):
"""Create a fetcher from merged package and version attributes."""
if fetcher.url_attr == 'url':
url = pkg.url_for_version(version)
# TODO: refactor this logic into its own method or function
# TODO: to avoid duplication
mirrors = [spack.url.substitute_version(u, version)
for u in getattr(pkg, 'urls', [])[1:]]
attrs = {fetcher.url_attr: url, 'mirrors': mirrors}
else:
url = getattr(pkg, fetcher.url_attr)
attrs = {fetcher.url_attr: url}
attrs['fetch_options'] = pkg.fetch_options
attrs.update(pkg.versions[version])
return fetcher(**attrs)
[docs]def for_package_version(pkg, version):
"""Determine a fetch strategy based on the arguments supplied to
version() in the package description."""
# No-code packages have a custom fetch strategy to work around issues
# with resource staging.
if not pkg.has_code:
return BundleFetchStrategy()
check_pkg_attributes(pkg)
if not isinstance(version, spack.version.Version):
version = spack.version.Version(version)
# if it's a commit, we must use a GitFetchStrategy
if version.is_commit and hasattr(pkg, "git"):
# Populate the version with comparisons to other commits
version.generate_commit_lookup(pkg)
fetcher = GitFetchStrategy(git=pkg.git, commit=str(version))
return fetcher
# If it's not a known version, try to extrapolate one by URL
if version not in pkg.versions:
return _extrapolate(pkg, version)
# Set package args first so version args can override them
args = {'fetch_options': pkg.fetch_options}
# Grab a dict of args out of the package version dict
args.update(pkg.versions[version])
# If the version specifies a `url_attr` directly, use that.
for fetcher in all_strategies:
if fetcher.url_attr in args:
_check_version_attributes(fetcher, pkg, version)
return fetcher(**args)
# if a version's optional attributes imply a particular fetch
# strategy, and we have the `url_attr`, then use that strategy.
for fetcher in all_strategies:
if hasattr(pkg, fetcher.url_attr) or fetcher.url_attr == 'url':
optionals = fetcher.optional_attrs
if optionals and any(a in args for a in optionals):
_check_version_attributes(fetcher, pkg, version)
return _from_merged_attrs(fetcher, pkg, version)
# if the optional attributes tell us nothing, then use any `url_attr`
# on the package. This prefers URL vs. VCS, b/c URLFetchStrategy is
# defined first in this file.
for fetcher in all_strategies:
if hasattr(pkg, fetcher.url_attr):
_check_version_attributes(fetcher, pkg, version)
return _from_merged_attrs(fetcher, pkg, version)
raise InvalidArgsError(pkg, version, **args)
[docs]def from_url_scheme(url, *args, **kwargs):
"""Finds a suitable FetchStrategy by matching its url_attr with the scheme
in the given url."""
url = kwargs.get('url', url)
parsed_url = urllib_parse.urlparse(url, scheme='file')
scheme_mapping = (
kwargs.get('scheme_mapping') or
{
'file': 'url',
'http': 'url',
'https': 'url',
'ftp': 'url',
'ftps': 'url',
})
scheme = parsed_url.scheme
scheme = scheme_mapping.get(scheme, scheme)
for fetcher in all_strategies:
url_attr = getattr(fetcher, 'url_attr', None)
if url_attr and url_attr == scheme:
return fetcher(url, *args, **kwargs)
raise ValueError(
'No FetchStrategy found for url with scheme: "{SCHEME}"'.format(
SCHEME=parsed_url.scheme))
[docs]def from_list_url(pkg):
"""If a package provides a URL which lists URLs for resources by
version, this can can create a fetcher for a URL discovered for
the specified package's version."""
if pkg.list_url:
try:
versions = pkg.fetch_remote_versions()
try:
# get a URL, and a checksum if we have it
url_from_list = versions[pkg.version]
checksum = None
# try to find a known checksum for version, from the package
version = pkg.version
if version in pkg.versions:
args = pkg.versions[version]
checksum = next(
(v for k, v in args.items() if k in crypto.hashes),
args.get('checksum'))
# construct a fetcher
return URLFetchStrategy(url_from_list, checksum,
fetch_options=pkg.fetch_options)
except KeyError as e:
tty.debug(e)
tty.msg("Cannot find version %s in url_list" % pkg.version)
except BaseException as e:
# TODO: Don't catch BaseException here! Be more specific.
tty.debug(e)
tty.msg("Could not determine url from list_url.")
[docs]class FsCache(object):
def __init__(self, root):
self.root = os.path.abspath(root)
[docs] def store(self, fetcher, relative_dest):
# skip fetchers that aren't cachable
if not fetcher.cachable:
return
# Don't store things that are already cached.
if isinstance(fetcher, CacheURLFetchStrategy):
return
dst = os.path.join(self.root, relative_dest)
mkdirp(os.path.dirname(dst))
fetcher.archive(dst)
[docs] def fetcher(self, target_path, digest, **kwargs):
path = os.path.join(self.root, target_path)
return CacheURLFetchStrategy(path, digest, **kwargs)
[docs] def destroy(self):
shutil.rmtree(self.root, ignore_errors=True)
[docs]class FetchError(spack.error.SpackError):
"""Superclass fo fetcher errors."""
[docs]class NoCacheError(FetchError):
"""Raised when there is no cached archive for a package."""
[docs]class FailedDownloadError(FetchError):
"""Raised when a download fails."""
def __init__(self, url, msg=""):
super(FailedDownloadError, self).__init__(
"Failed to fetch file from URL: %s" % url, msg)
self.url = url
[docs]class NoArchiveFileError(FetchError):
""""Raised when an archive file is expected but none exists."""
[docs]class NoDigestError(FetchError):
"""Raised after attempt to checksum when URL has no digest."""
[docs]class FetcherConflict(FetchError):
"""Raised for packages with invalid fetch attributes."""
[docs]class InvalidArgsError(FetchError):
"""Raised when a version can't be deduced from a set of arguments."""
def __init__(self, pkg=None, version=None, **args):
msg = "Could not guess a fetch strategy"
if pkg:
msg += ' for {pkg}'.format(pkg=pkg)
if version:
msg += '@{version}'.format(version=version)
long_msg = 'with arguments: {args}'.format(args=args)
super(InvalidArgsError, self).__init__(msg, long_msg)
[docs]class ChecksumError(FetchError):
"""Raised when archive fails to checksum."""
[docs]class NoStageError(FetchError):
"""Raised when fetch operations are called before set_stage()."""
def __init__(self, method):
super(NoStageError, self).__init__(
"Must call FetchStrategy.set_stage() before calling %s" %
method.__name__)