github.com/apache/beam/sdks/v2@v2.48.2/python/setup.py (about)

     1  #
     2  # Licensed to the Apache Software Foundation (ASF) under one or more
     3  # contributor license agreements.  See the NOTICE file distributed with
     4  # this work for additional information regarding copyright ownership.
     5  # The ASF licenses this file to You under the Apache License, Version 2.0
     6  # (the "License"); you may not use this file except in compliance with
     7  # the License.  You may obtain a copy of the License at
     8  #
     9  #    http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  #
    17  
    18  """Apache Beam SDK for Python setup file."""
    19  
    20  import os
    21  import sys
    22  import warnings
    23  from pathlib import Path
    24  
    25  # Pylint and isort disagree here.
    26  # pylint: disable=ungrouped-imports
    27  import setuptools
    28  from pkg_resources import DistributionNotFound
    29  from pkg_resources import get_distribution
    30  from pkg_resources import normalize_path
    31  from pkg_resources import parse_version
    32  from pkg_resources import to_filename
    33  from setuptools import Command
    34  
    35  # pylint: disable=wrong-import-order
    36  # It is recommended to import setuptools prior to importing distutils to avoid
    37  # using legacy behavior from distutils.
    38  # https://setuptools.readthedocs.io/en/latest/history.html#v48-0-0
    39  from distutils.errors import DistutilsError  # isort:skip
    40  
    41  
    42  class mypy(Command):
    43    user_options = []
    44  
    45    def initialize_options(self):
    46      """Abstract method that is required to be overwritten"""
    47  
    48    def finalize_options(self):
    49      """Abstract method that is required to be overwritten"""
    50  
    51    def get_project_path(self):
    52      self.run_command('egg_info')
    53  
    54      # Build extensions in-place
    55      self.reinitialize_command('build_ext', inplace=1)
    56      self.run_command('build_ext')
    57  
    58      ei_cmd = self.get_finalized_command("egg_info")
    59  
    60      project_path = normalize_path(ei_cmd.egg_base)
    61      return os.path.join(project_path, to_filename(ei_cmd.egg_name))
    62  
    63    def run(self):
    64      import subprocess
    65      args = ['mypy', self.get_project_path()]
    66      result = subprocess.call(args)
    67      if result != 0:
    68        raise DistutilsError("mypy exited with status %d" % result)
    69  
    70  
    71  def get_version():
    72    global_names = {}
    73    exec(  # pylint: disable=exec-used
    74        open(os.path.join(
    75            os.path.dirname(os.path.abspath(__file__)),
    76            'apache_beam/version.py')
    77            ).read(),
    78        global_names
    79    )
    80    return global_names['__version__']
    81  
    82  
    83  PACKAGE_NAME = 'apache-beam'
    84  PACKAGE_VERSION = get_version()
    85  PACKAGE_DESCRIPTION = 'Apache Beam SDK for Python'
    86  PACKAGE_URL = 'https://beam.apache.org'
    87  PACKAGE_DOWNLOAD_URL = 'https://pypi.python.org/pypi/apache-beam'
    88  PACKAGE_AUTHOR = 'Apache Software Foundation'
    89  PACKAGE_EMAIL = 'dev@beam.apache.org'
    90  PACKAGE_KEYWORDS = 'apache beam'
    91  PACKAGE_LONG_DESCRIPTION = '''
    92  Apache Beam is a unified programming model for both batch and streaming
    93  data processing, enabling efficient execution across diverse distributed
    94  execution engines and providing extensibility points for connecting to
    95  different technologies and user communities.
    96  '''
    97  
    98  RECOMMENDED_MIN_PIP_VERSION = '19.3.0'
    99  try:
   100    _PIP_VERSION = get_distribution('pip').version
   101    if parse_version(_PIP_VERSION) < parse_version(RECOMMENDED_MIN_PIP_VERSION):
   102      warnings.warn(
   103          "You are using version {0} of pip. " \
   104          "However, the recommended min version is {1}.".format(
   105              _PIP_VERSION, RECOMMENDED_MIN_PIP_VERSION
   106          )
   107      )
   108  except DistributionNotFound:
   109    # Do nothing if pip is not found. This can happen when using `Poetry` or
   110    # `pipenv` package managers.
   111    pass
   112  
   113  REQUIRED_CYTHON_VERSION = '0.28.1'
   114  try:
   115    _CYTHON_VERSION = get_distribution('cython').version
   116    if parse_version(_CYTHON_VERSION) < parse_version(REQUIRED_CYTHON_VERSION):
   117      warnings.warn(
   118          "You are using version {0} of cython. " \
   119          "However, version {1} is recommended.".format(
   120              _CYTHON_VERSION, REQUIRED_CYTHON_VERSION
   121          )
   122      )
   123  except DistributionNotFound:
   124    # do nothing if Cython is not installed
   125    pass
   126  
   127  try:
   128    # pylint: disable=wrong-import-position
   129    from Cython.Build import cythonize as cythonize0
   130  
   131    def cythonize(*args, **kwargs):
   132      import numpy
   133      extensions = cythonize0(*args, **kwargs)
   134      for e in extensions:
   135        e.include_dirs.append(numpy.get_include())
   136      return extensions
   137  except ImportError:
   138    cythonize = lambda *args, **kwargs: []
   139  
   140  # [BEAM-8181] pyarrow cannot be installed on 32-bit Windows platforms.
   141  if sys.platform == 'win32' and sys.maxsize <= 2**32:
   142    pyarrow_dependency = ''
   143  else:
   144    pyarrow_dependency = 'pyarrow>=3.0.0,<12.0.0'
   145  
   146  # Exclude pandas<=1.4.2 since it doesn't work with numpy 1.24.x.
   147  # Exclude 1.5.0 and 1.5.1 because of
   148  # https://github.com/pandas-dev/pandas/issues/45725
   149  dataframe_dependency = [
   150      'pandas<1.6.0;python_version=="3.7"',
   151      'pandas>=1.4.3,!=1.5.0,!=1.5.1,<1.6;python_version>="3.8"',
   152  ]
   153  
   154  
   155  # We must generate protos after setup_requires are installed.
   156  def generate_protos_first():
   157    try:
   158      # pylint: disable=wrong-import-position
   159      import gen_protos
   160      gen_protos.generate_proto_files()
   161  
   162    except ImportError:
   163      warnings.warn("Could not import gen_protos, skipping proto generation.")
   164  
   165  
   166  def get_portability_package_data():
   167    files = []
   168    portability_dir = Path(__file__).parent / 'apache_beam' / \
   169                      'portability' / 'api'
   170    for ext in ['*.pyi', '*.yaml']:
   171      files.extend(
   172          str(p.relative_to(portability_dir.parent.parent))
   173          for p in portability_dir.rglob(ext))
   174  
   175    return files
   176  
   177  
   178  python_requires = '>=3.7'
   179  
   180  if sys.version_info.major == 3 and sys.version_info.minor >= 12:
   181    warnings.warn(
   182        'This version of Apache Beam has not been sufficiently tested on '
   183        'Python %s.%s. You may encounter bugs or missing features.' %
   184        (sys.version_info.major, sys.version_info.minor))
   185  
   186  if __name__ == '__main__':
   187    # In order to find the tree of proto packages, the directory
   188    # structure must exist before the call to setuptools.find_packages()
   189    # executes below.
   190    generate_protos_first()
   191    # Keep all dependencies inlined in the setup call, otherwise Dependabot won't
   192    # be able to parse it.
   193    setuptools.setup(
   194        name=PACKAGE_NAME,
   195        version=PACKAGE_VERSION,
   196        description=PACKAGE_DESCRIPTION,
   197        long_description=PACKAGE_LONG_DESCRIPTION,
   198        url=PACKAGE_URL,
   199        download_url=PACKAGE_DOWNLOAD_URL,
   200        author=PACKAGE_AUTHOR,
   201        author_email=PACKAGE_EMAIL,
   202        packages=setuptools.find_packages(),
   203        package_data={
   204            'apache_beam': [
   205                '*/*.pyx',
   206                '*/*/*.pyx',
   207                '*/*.pxd',
   208                '*/*/*.pxd',
   209                '*/*.h',
   210                '*/*/*.h',
   211                'testing/data/*.yaml',
   212                *get_portability_package_data()
   213            ]
   214        },
   215        ext_modules=cythonize([
   216            'apache_beam/**/*.pyx',
   217            'apache_beam/coders/coder_impl.py',
   218            'apache_beam/metrics/cells.py',
   219            'apache_beam/metrics/execution.py',
   220            'apache_beam/runners/common.py',
   221            'apache_beam/runners/worker/logger.py',
   222            'apache_beam/runners/worker/opcounters.py',
   223            'apache_beam/runners/worker/operations.py',
   224            'apache_beam/transforms/cy_combiners.py',
   225            'apache_beam/transforms/stats.py',
   226            'apache_beam/utils/counters.py',
   227            'apache_beam/utils/windowed_value.py',
   228        ], language_level=3),
   229        install_requires = [
   230          'crcmod>=1.7,<2.0',
   231          'orjson<4.0',
   232          # Dill doesn't have forwards-compatibility guarantees within minor
   233          # version. Pickles created with a new version of dill may not unpickle
   234          # using older version of dill. It is best to use the same version of
   235          # dill on client and server, therefore list of allowed versions is very
   236          # narrow. See: https://github.com/uqfoundation/dill/issues/341.
   237          'dill>=0.3.1.1,<0.3.2',
   238          # It is prudent to use the same version of pickler at job submission
   239          # and at runtime, therefore bounds need to be tight.
   240          # To avoid depending on an old dependency, update the minor version on
   241          # every Beam release, see: https://github.com/apache/beam/issues/23119
   242          'cloudpickle~=2.2.1',
   243          'fastavro>=0.23.6,<2',
   244          'fasteners>=0.3,<1.0',
   245          'grpcio>=1.33.1,!=1.48.0,<2',
   246          'hdfs>=2.1.0,<3.0.0',
   247          'httplib2>=0.8,<0.23.0',
   248          # numpy can have breaking changes in minor versions.
   249          # Use a strict upper bound.
   250          'numpy>=1.14.3,<1.25.0',   # Update build-requirements.txt as well.
   251          'objsize>=0.6.1,<0.7.0',
   252          'pymongo>=3.8.0,<5.0.0',
   253          'proto-plus>=1.7.1,<2',
   254          # use a tighter upper bound in protobuf dependency
   255          # to make sure the minor version at job submission
   256          # does not exceed the minor version at runtime.
   257          # To avoid depending on an old dependency, update the minor version on
   258          # every Beam release, see: https://github.com/apache/beam/issues/25590
   259          'protobuf>=3.20.3,<4.24.0',
   260          'pydot>=1.2.0,<2',
   261          'python-dateutil>=2.8.0,<3',
   262          'pytz>=2018.3',
   263          'regex>=2020.6.8',
   264          'requests>=2.24.0,<3.0.0',
   265          'typing-extensions>=3.7.0',
   266          'zstandard>=0.18.0,<1',
   267        # Dynamic dependencies must be specified in a separate list, otherwise
   268        # Dependabot won't be able to parse the main list. Any dynamic
   269        # dependencies will not receive updates from Dependabot.
   270        ] + [pyarrow_dependency],
   271        python_requires=python_requires,
   272        # BEAM-8840: Do NOT use tests_require or setup_requires.
   273        extras_require={
   274            'docs': [
   275                'Sphinx>=1.5.2,<2.0',
   276                # Pinning docutils as a workaround for Sphinx issue:
   277                # https://github.com/sphinx-doc/sphinx/issues/9727
   278                'docutils==0.17.1',
   279                'pandas<2.0.0',
   280            ],
   281            'test': [
   282              'freezegun>=0.3.12',
   283              'joblib>=1.0.1',
   284              'mock>=1.0.1,<6.0.0',
   285              'pandas<2.0.0',
   286              'parameterized>=0.7.1,<0.10.0',
   287              'pyhamcrest>=1.9,!=1.10.0,<3.0.0',
   288              'pyyaml>=3.12,<7.0.0',
   289              'requests_mock>=1.7,<2.0',
   290              'tenacity>=8.0.0,<9',
   291              'pytest>=7.1.2,<8.0',
   292              'pytest-xdist>=2.5.0,<4',
   293              'pytest-timeout>=2.1.0,<3',
   294              'scikit-learn>=0.20.0',
   295              'sqlalchemy>=1.3,<2.0',
   296              'psycopg2-binary>=2.8.5,<3.0.0',
   297              'testcontainers[mysql]>=3.0.3,<4.0.0',
   298              'cryptography>=36.0.0',
   299              'hypothesis>5.0.0,<=7.0.0',
   300            ],
   301            'gcp': [
   302              'cachetools>=3.1.0,<6',
   303              'google-apitools>=0.5.31,<0.5.32',
   304              # NOTE: Maintainers, please do not require google-auth>=2.x.x
   305              # Until this issue is closed
   306              # https://github.com/googleapis/google-cloud-python/issues/10566
   307              'google-auth>=1.18.0,<3',
   308              'google-auth-httplib2>=0.1.0,<0.2.0',
   309              'google-cloud-datastore>=2.0.0,<3',
   310              'google-cloud-pubsub>=2.1.0,<3',
   311              'google-cloud-pubsublite>=1.2.0,<2',
   312              # GCP packages required by tests
   313              'google-cloud-bigquery>=2.0.0,<4',
   314              'google-cloud-bigquery-storage>=2.6.3,<3',
   315              'google-cloud-core>=2.0.0,<3',
   316              # TODO(https://github.com/apache/beam/issues/26673)
   317              # 2.18.x breaks unit test
   318              'google-cloud-bigtable>=2.0.0,<2.18.0',
   319              'google-cloud-spanner>=3.0.0,<4',
   320              # GCP Packages required by ML functionality
   321              'google-cloud-dlp>=3.0.0,<4',
   322              'google-cloud-language>=2.0,<3',
   323              'google-cloud-videointelligence>=2.0,<3',
   324              'google-cloud-vision>=2,<4',
   325              'google-cloud-recommendations-ai>=0.1.0,<0.11.0'
   326            ],
   327            'interactive': [
   328              'facets-overview>=1.1.0,<2',
   329              'google-cloud-dataproc>=5.0.0,<6',
   330              # IPython>=8 is not compatible with Python<=3.7
   331              'ipython>=7,<8;python_version<="3.7"',
   332              'ipython>=8,<9;python_version>"3.7"',
   333              'ipykernel>=6,<7',
   334              'ipywidgets>=8,<9',
   335              # Skip version 6.1.13 due to
   336              # https://github.com/jupyter/jupyter_client/issues/637
   337              'jupyter-client>=6.1.11,!=6.1.13,<8.2.1',
   338              'timeloop>=1.0.2,<2',
   339            ] + dataframe_dependency,
   340            'interactive_test': [
   341              # notebok utils
   342              'nbformat>=5.0.5,<6',
   343              'nbconvert>=6.2.0,<8',
   344              # headless chrome based integration tests
   345              'needle>=0.5.0,<1',
   346              'chromedriver-binary>=100,<114',
   347              # use a fixed major version of PIL for different python versions
   348              'pillow>=7.1.1,<10',
   349            ],
   350            'aws': ['boto3>=1.9,<2'],
   351            'azure': [
   352              'azure-storage-blob>=12.3.2,<13',
   353              'azure-core>=1.7.0,<2',
   354              'azure-identity>=1.12.0,<2',
   355            ],
   356            'dataframe': dataframe_dependency,
   357            'dask': [
   358              'dask >= 2022.6',
   359              'distributed >= 2022.6',
   360            ],
   361        },
   362        zip_safe=False,
   363        # PyPI package information.
   364        classifiers=[
   365            'Intended Audience :: End Users/Desktop',
   366            'License :: OSI Approved :: Apache Software License',
   367            'Operating System :: POSIX :: Linux',
   368            'Programming Language :: Python :: 3.7',
   369            'Programming Language :: Python :: 3.8',
   370            'Programming Language :: Python :: 3.9',
   371            'Programming Language :: Python :: 3.10',
   372            'Programming Language :: Python :: 3.11',
   373            # When updating version classifiers, also update version warnings
   374            # above and in apache_beam/__init__.py.
   375            'Topic :: Software Development :: Libraries',
   376            'Topic :: Software Development :: Libraries :: Python Modules',
   377        ],
   378        license='Apache License, Version 2.0',
   379        keywords=PACKAGE_KEYWORDS,
   380        cmdclass={
   381            'mypy': mypy,
   382        },
   383    )