Source code for cawdrey.tally

#!/usr/bin/env python
#
#  tally.py
"""
Subclass of :class:`collections.Counter` with additional methods.

.. versionadded:: 0.3.0
"""
#
#  Copyright © 2021 Dominic Davis-Foster <dominic@davis-foster.co.uk>
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU Lesser General Public License as published by
#  the Free Software Foundation; either version 3 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
#  GNU Lesser General Public License for more details.
#
#  You should have received a copy of the GNU Lesser General Public License
#  along with this program; if not, write to the Free Software
#  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
#  MA 02110-1301, USA.
#
#  most_common method based on CPython.
#  Licensed under the Python Software Foundation License Version 2.
#  Copyright © 2001-2020 Python Software Foundation. All rights reserved.
#  Copyright © 2000 BeOpen.com. All rights reserved.
#  Copyright © 1995-2000 Corporation for National Research Initiatives. All rights reserved.
#  Copyright © 1991-1995 Stichting Mathematisch Centrum. All rights reserved.
#

# stdlib
import heapq
from numbers import Real
from operator import itemgetter
from typing import Counter, Dict, Iterable, List, Optional, Tuple, TypeVar, Union, overload

# 3rd party
from typing_extensions import Protocol, runtime_checkable

# this package
from cawdrey.base import KT

__all__ = ["_F", "SupportsMostCommon", "Tally", "Percentage"]

_F = TypeVar("_F", float, int, Real)


[docs]@runtime_checkable class SupportsMostCommon(Protocol[KT]): """ :class:`typing.Protocol` for classes which support a :class:`collections.Counter`-like :meth:`collections.Counter.most_common` method. """ # noqa: D400
[docs] def items(self) -> Iterable[Tuple[KT, float]]: """ Returns an iterator over the mapping’s items (as ``(key, value)`` pairs). """ # noqa: D402 raise NotImplementedError
[docs] def most_common(self, n: Optional[int] = None) -> Union[List[Tuple[KT, float]], List[Tuple[KT, int]]]: """ List the ``n`` most common elements and their counts from the most common to the least. If ``n`` is :py:obj:`None` then list all element counts. .. code-block:: python >>> Counter('abracadabra').most_common(3) [('a', 5), ('b', 2), ('r', 2)] :param n: """ raise NotImplementedError
[docs]class Tally(Counter[KT]): """ Subclass of :class:`collections.Counter` with additional methods. .. versionadded:: 0.3.0 .. autosummary-widths:: 4/10 """
[docs] def as_percentage(self) -> "Percentage[KT]": """ Returns the :class:`~.Tally` as a :class:`collections.OrderedDict` comprising the count for each element as a percentage of the sum of all elements. .. important:: The sum of the dictionary's values may not add up to exactly ``1.0`` due to limitations of floating-point numbers. """ # noqa: D400 # Based on https://stackoverflow.com/a/48952797 # By https://stackoverflow.com/users/271351/cjbarth # CC BY-SA 3.0 total = self.total return Percentage((i, count / total) for i, count in self.most_common())
@property def total(self) -> int: """ Returns the total count for all elements. """ return sum(self.values()) @overload def get_percentage(self, item: KT) -> Optional[float]: ... # pragma: no cover @overload def get_percentage(self, item: KT, default: _F) -> Union[_F, float]: ... # pragma: no cover
[docs] def get_percentage(self, item: KT, default: Optional[_F] = None) -> Union[None, _F, float]: """ Returns the count for ``item``, as a percentage of the sum of all elements. :param item: :param default: A default percentage (as a :class:`float`) to return if ``item`` is not in the dictionary. :rtype: .. latex:clearpage:: """ value = self.get(item) if value is None: return default else: return value / self.total
[docs] def most_common(self, n: Optional[int] = None) -> List[Tuple[KT, int]]: """ List the ``n`` most common elements and their counts from the most common to the least. If ``n`` is :py:obj:`None` then list all element counts. .. code-block:: python >>> Tally('abracadabra').most_common(3) [('a', 5), ('b', 2), ('r', 2)] :param n: """ return super().most_common(n)
[docs]class Percentage(Dict[KT, float]): """ Provides a dictionary interface, but with :class:`collections.Counter`'s :meth:`collections.Counter.most_common` method. Represents the return type of :meth:`cawdrey.tally.Tally.as_percentage()`. """ # noqa
[docs] def most_common(self, n: Optional[int] = None) -> List[Tuple[KT, float]]: """ List the ``n`` most common elements and their counts from the most common to the least. If ``n`` is :py:obj:`None` then list all element counts. .. code-block:: python >>> Tally('abracadabra').as_percentage().most_common(3) [('a', 0.45454545454545453), ('b', 0.18181818181818182), ('r', 0.18181818181818182)] :param n: """ # Emulate Bag.sortedByCount from Smalltalk if n is None: return sorted(self.items(), key=itemgetter(1), reverse=True) return heapq.nlargest(n, self.items(), key=itemgetter(1))