Tal Einat - Pastebin.com (original) (raw)

Guest User

a guest

Oct 12th, 2010

219

0

Never

Not a member of Pastebin yet? Sign Up, it unlocks many cool features!

  1. """A library for executing running calculations.
  2. A running calculation is an object that can be fed one value at a time. This
  3. allows running several running calculations on a single iterator of values in
  4. parallel. This isn't possible with the built-in variants of most calculations,
  5. such as max() and heapq.nlargest().
  6. """
  7. from math import sqrt
  8. from heapq import heappush, heappushpop
  9. from functools import partial
  10. class RunningCalc(object):
  11. pass
  12. def apply(iterable, *running_calcs):
  13. """Run several running calculations on a single iterable of values."""
  14. feeds = [rcalc.feed for rcalc in running_calcs]
  15. for value in iterable:
  16. for rcalc_feed in running_calcs:
  17. rcalc_feed(value)
  18. return tuple([rcalc.value for rcalc in running_calcs])
  19. class RunningMax(RunningCalc):
  20. def __init__(self):
  21. self.value = None
  22. def feed(self, value):
  23. if self.value is None or value > self.value:
  24. self.value = value
  25. class RunningMin(RunningCalc):
  26. def __init__(self):
  27. self.value = None
  28. def feed(self, value):
  29. if self.value is None or value < self.value:
  30. self.value = value
  31. class RunningCount(RunningCalc):
  32. def __init__(self, initial_value=0):
  33. self.value = initial_value
  34. def feed(self, value):
  35. self.value += 1
  36. class RunningSum(RunningCalc):
  37. def __init__(self, initial_value=0):
  38. self.value = initial_value
  39. def feed(self, value):
  40. self.value += value
  41. class RunningAverage(RunningCalc):
  42. def __init__(self):
  43. self.value = 0.0
  44. self.n = 0
  45. def feed(self, value):
  46. self.n += 1
  47. self.value += (value - self.value) / self.n
  48. class RunningVariance(RunningCalc):
  49. """calculate a running variance using the Welford algorithm"""
  50. def __init__(self):
  51. self.n = 0
  52. self.mean = 0.0
  53. self.M2 = 0.0
  54. def feed(self, value):
  55. self.n += 1
  56. delta = value - mean
  57. self.mean += delta / n
  58. self.M2 += delta * (value - self.mean) # uses the new value of mean!
  59. @property
  60. def populationVariance(self):
  61. return (self.M2 / self.n) if self.n > 0 else 0
  62. value = populationVariance
  63. @property
  64. def sampleVariance(self):
  65. return (self.M2 / (self.n - 1)) if self.n > 1 else 0
  66. def RunningStandardDeviation(RunningCalc):
  67. def __init__(self):
  68. self._running_variance = RunningVariance()
  69. def feed(self, value):
  70. self._running_variance.feed(value)
  71. @property
  72. def populationStandardDeviation(self):
  73. return sqrt(self._running_variance.populationVariance)
  74. value = populationStandardDeviation
  75. @property
  76. def samplepopulationStandardDeviation(self):
  77. return sqrt(self._running_variance.sampleVariance)
  78. class RunningNLargest(RunningCalc):
  79. def __init__(self, N):
  80. self.heap = []
  81. self.count = 0
  82. self.N = N
  83. def feed(self, value):
  84. self.count += 1
  85. if self.count <= self.N:
  86. heappush(self.heap, value)
  87. else:
  88. heappushpop(self.heap, value)
  89. @property
  90. def value(self):
  91. return sorted(self.heap, reversed=True)
  92. class RunningNSmallest(RunningNLargest):
  93. """Only works on negatable values!"""
  94. # Why isn't there a built-in max-heap? :(
  95. def feed(self, value):
  96. RunningNLargest.feed(self, -value) # note the minus!
  97. @property
  98. def value(self):
  99. return sorted([-x for x in self.heap])