github.com/eth-easl/loader@v0.0.0-20230908084258-8a37e1d94279/pkg/generator/specification_statistical_test.py (about)

     1  #  MIT License
     2  #
     3  #  Copyright (c) 2023 EASL and the vHive community
     4  #
     5  #  Permission is hereby granted, free of charge, to any person obtaining a copy
     6  #  of this software and associated documentation files (the "Software"), to deal
     7  #  in the Software without restriction, including without limitation the rights
     8  #  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     9  #  copies of the Software, and to permit persons to whom the Software is
    10  #  furnished to do so, subject to the following conditions:
    11  #
    12  #  The above copyright notice and this permission notice shall be included in all
    13  #  copies or substantial portions of the Software.
    14  #
    15  #  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    16  #  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    17  #  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    18  #  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    19  #  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    20  #  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
    21  #  SOFTWARE.
    22  
    23  import matplotlib.pyplot as plt
    24  import numpy as np
    25  import sys
    26  from scipy import stats
    27  
    28  distribution = sys.argv[1]
    29  inputFile = sys.argv[2]
    30  
    31  alpha = 0.05
    32  
    33  f = np.loadtxt(inputFile, dtype=float)
    34  
    35  if distribution == "uniform":
    36      minBoundary = 0
    37      maxBoundary = max(f)
    38  
    39      cdf = stats.uniform(loc=minBoundary, scale=maxBoundary).cdf
    40  elif distribution == "exponential":
    41      maximum = max(f)
    42      totalDuration = float(sys.argv[3])
    43      for i in range(len(f)):
    44          f[i] = f[i] / 60_000_000 * totalDuration
    45  
    46      cdf = stats.expon.cdf
    47  else:
    48      exit(2)  # unsupported distribution
    49  
    50  test = stats.kstest(f, cdf)
    51  
    52  plt.hist(f, density=True, bins=30)
    53  plt.savefig(f"distribution_{distribution}.png")
    54  
    55  print(test)
    56  
    57  if test.pvalue > alpha:
    58      exit(0)  # the sample satisfies the distribution
    59  else:
    60      exit(1)  # the sample does not satisfy the distribution