Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
url = link.url.split('#', 1)[0]
# Check for VCS schemes that do not support lookup as web pages.
vcs_scheme = _match_vcs_scheme(url)
if vcs_scheme:
logger.debug('Cannot look at %s URL %s', vcs_scheme, link)
return None
# Tack index.html onto file:// URLs that point to directories
scheme, _, path, _, _, _ = urllib_parse.urlparse(url)
if (scheme == 'file' and os.path.isdir(urllib_request.url2pathname(path))):
# add trailing slash if not present so urljoin doesn't trim
# final segment
if not url.endswith('/'):
url += '/'
url = urllib_parse.urljoin(url, 'index.html')
logger.debug(' file: URL is directory, getting %s', url)
try:
resp = _get_html_response(url, session=session)
except _NotHTTP:
logger.debug(
'Skipping page %s because it looks like an archive, and cannot '
'be checked by HEAD.', link,
)
except _NotHTML as exc:
logger.debug(
'Skipping page %s because the %s request got Content-Type: %s',
link, exc.request_desc, exc.content_type,
)
except HTTPError as exc:
_handle_get_page_fail(link, exc)
url = link.url.split('#', 1)[0]
# Check for VCS schemes that do not support lookup as web pages.
vcs_scheme = _match_vcs_scheme(url)
if vcs_scheme:
logger.debug('Cannot look at %s URL %s', vcs_scheme, link)
return None
# Tack index.html onto file:// URLs that point to directories
scheme, _, path, _, _, _ = urllib_parse.urlparse(url)
if (scheme == 'file' and os.path.isdir(urllib_request.url2pathname(path))):
# add trailing slash if not present so urljoin doesn't trim
# final segment
if not url.endswith('/'):
url += '/'
url = urllib_parse.urljoin(url, 'index.html')
logger.debug(' file: URL is directory, getting %s', url)
try:
resp = _get_html_response(url, session=session)
except _NotHTTP:
logger.debug(
'Skipping page %s because it looks like an archive, and cannot '
'be checked by HEAD.', link,
)
except _NotHTML as exc:
logger.debug(
'Skipping page %s because the %s request got Content-Type: %s',
link, exc.request_desc, exc.content_type,
)
except HTTPError as exc:
_handle_get_page_fail(link, exc)
content_type = _get_content_type(url, session=session)
if content_type.lower().startswith('text/html'):
break
else:
logger.debug(
'Skipping page %s because of Content-Type: %s',
link,
content_type,
)
return
logger.debug('Getting page %s', url)
# Tack index.html onto file:// URLs that point to directories
(scheme, netloc, path, params, query, fragment) = \
urllib_parse.urlparse(url)
if (scheme == 'file' and
os.path.isdir(urllib_request.url2pathname(path))):
# add trailing slash if not present so urljoin doesn't trim
# final segment
if not url.endswith('/'):
url += '/'
url = urllib_parse.urljoin(url, 'index.html')
logger.debug(' file: URL is directory, getting %s', url)
resp = session.get(
url,
headers={
"Accept": "text/html",
# We don't want to blindly returned cached data for
# /simple/, because authors generally expecting that
# twine upload && pip install will function, but if
content_type,
)
return
logger.debug('Getting page %s', url)
# Tack index.html onto file:// URLs that point to directories
(scheme, netloc, path, params, query, fragment) = \
urllib_parse.urlparse(url)
if (scheme == 'file' and
os.path.isdir(urllib_request.url2pathname(path))):
# add trailing slash if not present so urljoin doesn't trim
# final segment
if not url.endswith('/'):
url += '/'
url = urllib_parse.urljoin(url, 'index.html')
logger.debug(' file: URL is directory, getting %s', url)
resp = session.get(
url,
headers={
"Accept": "text/html",
# We don't want to blindly returned cached data for
# /simple/, because authors generally expecting that
# twine upload && pip install will function, but if
# they've done a pip install in the last ~10 minutes
# it won't. Thus by setting this to zero we will not
# blindly use any cached data, however the benefit of
# using max-age=0 instead of no-cache, is that we will
# still support conditional requests, so we will still
# minimize traffic sent in cases where the page hasn't
# changed at all, we will just always incur the round
import mock
import pip
from unittest import TestCase
from plugins.applications.db2 import db2_crawler
from plugins.applications.db2.feature import DB2Feature
from plugins.applications.db2.db2_container_crawler \
import DB2ContainerCrawler
from plugins.applications.db2.db2_host_crawler \
import DB2HostCrawler
from utils.crawler_exceptions import CrawlError
from requests.exceptions import ConnectionError
pip.main(['install', 'ibm_db'])
class MockedDB2Container1(object):
def __init__(self, container_id):
ports = "[ {\"containerPort\" : \"50000\"} ]"
self.inspect = {"State": {"Pid": 1234}, "Config": {"Labels":
{"annotation.io.kubernetes.container.ports": ports}}}
class MockedDB2Container2(object):
def __init__(self, container_id):
self.inspect = {"State": {"Pid": 1234},
"Config": {"Labels": {"dummy": "dummy"}}}
"""
Returns the correct repository URL and revision by parsing the given
repository URL
"""
error_message = (
"Sorry, '%s' is a malformed VCS url. "
"The format is +://, "
"e.g. svn+http://myrepo/svn/MyApp#egg=MyApp"
)
assert '+' in self.url, error_message % self.url
url = self.url.split('+', 1)[1]
scheme, netloc, path, query, frag = urllib_parse.urlsplit(url)
rev = None
if '@' in path:
path, rev = path.rsplit('@', 1)
url = urllib_parse.urlunsplit((scheme, netloc, path, query, ''))
return url, rev
def get_url_rev_and_auth(cls, url):
# type: (str) -> Tuple[str, Optional[str], AuthInfo]
"""
Parse the repository URL to use, and return the URL, revision,
and auth info to use.
Returns: (url, rev, (username, password)).
"""
scheme, netloc, path, query, frag = urllib_parse.urlsplit(url)
if '+' not in scheme:
raise ValueError(
"Sorry, {!r} is a malformed VCS url. "
"The format is +://, "
"e.g. svn+http://myrepo/svn/MyApp#egg=MyApp".format(url)
)
# Remove the vcs prefix.
scheme = scheme.split('+', 1)[1]
netloc, user_pass = cls.get_netloc_and_auth(netloc, scheme)
rev = None
if '@' in path:
path, rev = path.rsplit('@', 1)
if not rev:
raise InstallationError(
"The URL {!r} has an empty revision (after @) "
"which is not supported. Include a revision after @ "
def filename(self):
_, netloc, path, _, _ = urllib_parse.urlsplit(self.url)
name = posixpath.basename(path.rstrip('/')) or netloc
name = urllib_parse.unquote(name)
assert name, ('URL %r produced no filename' % self.url)
return name
metadata field, specified in PEP 345. This may be specified by
a data-requires-python attribute in the HTML link tag, as
described in PEP 503.
:param yanked_reason: the reason the file has been yanked, if the
file has been yanked, or None if the file hasn't been yanked.
This is the value of the "data-yanked" attribute, if present, in
a simple repository HTML link. If the file has been yanked but
no reason was provided, this should be the empty string. See
PEP 592 for more information and the specification.
"""
# url can be a UNC windows share
if url.startswith('\\\\'):
url = path_to_url(url)
self._parsed_url = urllib_parse.urlsplit(url)
# Store the url as a private attribute to prevent accidentally
# trying to set a new value.
self._url = url
self.comes_from = comes_from
self.requires_python = requires_python if requires_python else None
self.yanked_reason = yanked_reason
super(Link, self).__init__(key=url, defining_class=Link)
def pip_install(package_name, interactive=True):
package_name = package_name.split('.')[0]
if package_name in _package_name_map:
package_name = _package_name_map[package_name]
pip_args = ['-q', 'install']
pip_args.append(package_name)
if interactive and not yesno('Missing package: "{0}". Do you want to install?'.format(package_name)):
return -1
pip_ret = pip.main(pip_args)
if pip_ret != 0:
if yesno('Pip cannot install package, maybe package name differs from module name. Try with another name?'):
return pip_install(raw_input('package name: ').strip(), False)
else:
reload(site)
return pip_ret