aeb21f6
"""Best-effort parser for requirements.txt files"""
aeb21f6
aeb21f6
import urllib.parse
aeb21f6
from pathlib import Path
aeb21f6
import sys
aeb21f6
import os
aeb21f6
import re
aeb21f6
aeb21f6
# `#` starts a comment only at end of line and after whitespace
aeb21f6
COMMENT_RE = re.compile(r'(^|\s+)#.*$')
aeb21f6
aeb21f6
# Assume URLs start with a scheme; don't look for "egg=" URLs otherwise
aeb21f6
URL_START_RE = re.compile(r'^[-_+a-zA-Z0-9]+://')
aeb21f6
aeb21f6
ENV_VAR_RE = re.compile(r'(?P\$\{(?P<name>[A-Z0-9_]+)\})')
aeb21f6
PKGNAME_RE = re.compile(r'^[-_a-zA-Z0-9]+')
aeb21f6
aeb21f6
# The requirements.txt format evolved rather organically; expect weirdness.
aeb21f6
aeb21f6
def convert_requirements_txt(lines, path:Path = None):
aeb21f6
    """Convert lines of a requirements file to PEP 440-style requirement strs
aeb21f6
6ee7a6b
    This does NOT handle all of requirements.txt features (only pip can do
aeb21f6
    that), but tries its best.
aeb21f6
aeb21f6
    The resulting requirements might not actually be valid (either because
aeb21f6
    they're wrong in the file, or because we missed a special case).
aeb21f6
aeb21f6
    path is the path to the requirements.txt file, used for options like `-r`.
aeb21f6
    """
aeb21f6
    requirements = []
aeb21f6
    lines = combine_logical_lines(lines)
aeb21f6
    lines = strip_comments(lines)
aeb21f6
    lines = expand_env_vars(lines)
aeb21f6
    if path:
aeb21f6
        filename = path.name
aeb21f6
    else:
aeb21f6
        filename = '<requirements file>'
aeb21f6
    for line in lines:
aeb21f6
        if URL_START_RE.match(line):
aeb21f6
            # Handle URLs with "egg=..." fragments
aeb21f6
            # see https://pip.pypa.io/en/stable/cli/pip_install/#vcs-support
aeb21f6
            parsed_url = urllib.parse.urlparse(line)
aeb21f6
            parsed_fragment = urllib.parse.parse_qs(parsed_url.fragment)
aeb21f6
            if 'egg' in parsed_fragment:
aeb21f6
                # Prepend the package name to the URL.
aeb21f6
                match = PKGNAME_RE.match(parsed_fragment['egg'][0])
aeb21f6
                if match:
aeb21f6
                    pkg_name = match[0]
aeb21f6
                    requirements.append(f'{pkg_name}@{line}')
aeb21f6
                    continue
aeb21f6
            # If that didn't work, pass the line on;
aeb21f6
            # the caller will deal with invalid requirements
aeb21f6
            requirements.append(line)
aeb21f6
        elif line.startswith('-r'):
aeb21f6
            recursed_path = line[2:].strip()
aeb21f6
            if path:
aeb21f6
                recursed_path = path.parent / recursed_path
aeb21f6
            recursed_path = Path(recursed_path)
aeb21f6
            with recursed_path.open() as f:
aeb21f6
                requirements.extend(convert_requirements_txt(f, recursed_path))
aeb21f6
        elif line.startswith('-'):
aeb21f6
            raise ValueError(f'{filename}: unsupported requirements file option: {line}')
aeb21f6
        else:
aeb21f6
            requirements.append(line)
aeb21f6
    return requirements
aeb21f6
aeb21f6
def combine_logical_lines(lines):
aeb21f6
    """Combine logical lines together (backslash line-continuation)"""
aeb21f6
    pieces = []
aeb21f6
    for line in lines:
aeb21f6
        line = line.rstrip('\n')
aeb21f6
        # Whole-line comments *only* are removed before line-contionuation
aeb21f6
        if COMMENT_RE.match(line):
aeb21f6
            continue
aeb21f6
        if line.endswith('\\'):
aeb21f6
            pieces.append(line[:-1])
aeb21f6
        else:
aeb21f6
            # trailing whitespace is only removed from full logical lines
aeb21f6
            pieces.append(line.rstrip())
aeb21f6
            yield ''.join(pieces)
aeb21f6
            pieces = []
aeb21f6
    yield ''.join(pieces)
aeb21f6
aeb21f6
aeb21f6
def strip_comments(lines):
aeb21f6
    for line in lines:
aeb21f6
        line, *rest = COMMENT_RE.split(line, maxsplit=1)
aeb21f6
        line = line.strip()
aeb21f6
        if line:
aeb21f6
            yield line
aeb21f6
aeb21f6
aeb21f6
def expand_env_vars(lines):
aeb21f6
    def repl(match):
aeb21f6
        value = os.getenv(match['name'])
aeb21f6
        if value is None:
aeb21f6
            return match['var']
aeb21f6
        return value
aeb21f6
    for line in lines:
aeb21f6
        yield ENV_VAR_RE.sub(repl, line)