Source code for raccoon.series

"""
Series class
"""

from __future__ import annotations

from abc import ABC, abstractmethod
from bisect import bisect_left, bisect_right
from collections import OrderedDict
from collections.abc import Sequence
from itertools import compress
from typing import Any, Literal, Self, cast, overload

from tabulate import tabulate

from raccoon import DataFrame
from raccoon.sort_utils import sorted_exists, sorted_index, sorted_list_indexes


def _is_non_string_sequence(value: Any) -> bool:
    return isinstance(value, Sequence) and not isinstance(value, (str, bytes, bytearray))


[docs] class SeriesBase[IndexT, T](ABC): """ Base Series abstract base class that concrete implementations inherit from. Note that the .data and .index property methods in Series are views to the underlying data and not copies. """ # Define slots to make object faster __slots__ = ["_data", "_data_name", "_index", "_index_name", "_sort"] def __init__(self): """ No specific parameters, those are defined in the child classed """ self._index: list[Any] = [] self._index_name: str | tuple | None = None self._data: Any = [] self._data_name: str | tuple | None = None self._sort: bool = False def __len__(self) -> int: return len(self._index) def __repr__(self) -> str: return "object id: %s\ndata:\n%s\nindex:\n%s\n" % (id(self), self._data, self._index) def __str__(self) -> str: return self._make_table() def _make_table(self, index: bool = True, **kwargs: Any) -> str: kwargs["headers"] = "keys" if "headers" not in kwargs.keys() else kwargs["headers"] return tabulate(self.to_dict(ordered=True, index=index), **kwargs)
[docs] def print(self, index: bool = True, **kwargs: Any) -> None: """ Print the contents of the Series. This method uses the tabulate function from the tabulate package. Use the kwargs to pass along any arguments to the tabulate function. :param index: If True then include the indexes as a column in the output, if False ignore the index :param kwargs: Parameters to pass along to the tabulate function :return: output of the tabulate function """ print(self._make_table(index=index, **kwargs))
@property @abstractmethod def data(self) -> Sequence[T]: pass @property @abstractmethod def index(self) -> list[IndexT]: pass @index.setter @abstractmethod def index(self, index_list: list[Any]): pass @property def data_name(self) -> str | tuple | None: return self._data_name @data_name.setter def data_name(self, name: str | tuple | None) -> None: self._data_name = name @property def index_name(self) -> str | tuple | None: return self._index_name @index_name.setter def index_name(self, name: str | tuple | None) -> None: self._index_name = name @property @abstractmethod def sort(self) -> bool: pass @overload def get(self, indexes: list[IndexT] | list[bool], as_list: Literal[True]) -> list[T]: ... @overload def get(self, indexes: list[IndexT] | list[bool], as_list: Literal[False] = False) -> Series[IndexT, T]: ... @overload def get(self, indexes: IndexT, as_list: bool = False) -> T: ...
[docs] def get(self, indexes: Any | list[Any] | list[bool], as_list: bool = False) -> Series[IndexT, T] | list[T] | T: """ Given indexes will return a sub-set of the Series. This method will direct to the specific methods based on what types are passed in for the indexes. The type of the return is determined by the types of the parameters. :param indexes: index value, list of index values, or a list of booleans. :param as_list: if True then return the values as a list, if False return a Series. :return: either Series, list, or single value. The return is a shallow copy """ if isinstance(indexes, list): return self.get_rows(indexes, as_list=True) if as_list else self.get_rows(indexes) else: assert not isinstance(indexes, list) return self.get_cell(indexes)
[docs] def get_cell(self, index: IndexT) -> T: """ For a single index and return the value :param index: index value :return: value """ i = sorted_index(self._index, index) if self._sort else self._index.index(index) return self._data[i]
@overload def get_rows(self, indexes: list[IndexT] | list[bool], as_list: Literal[True]) -> list[T]: ... @overload def get_rows(self, indexes: list[IndexT] | list[bool], as_list: Literal[False] = False) -> Series[IndexT, T]: ...
[docs] def get_rows(self, indexes: list[Any] | list[bool], as_list: bool = False) -> Series[IndexT, T] | list[T]: """ For a list of indexes return the values of the indexes in that column. :param indexes: either a list of index values or a list of booleans with same length as all indexes :param as_list: if True return a list, if False return Series :return: Series if as_list if False, a list if as_list is True """ if indexes and isinstance(indexes[0], bool) and all(isinstance(i, bool) for i in indexes): # boolean list if len(indexes) != len(self._index): raise ValueError("boolean index list must be same size of existing index") if all(indexes): # the entire column data = list(self._data) index = list(self._index) else: data = list(compress(self._data, indexes)) index = list(compress(self._index, indexes)) else: # index values list locations = ( [sorted_index(self._index, x) for x in indexes] if self._sort else [self._index.index(x) for x in indexes] ) data = [self._data[i] for i in locations] index = [self._index[i] for i in locations] return ( data if as_list else Series( data=data, index=index, data_name=self._data_name, index_name=self._index_name, sort=self._sort, ) )
[docs] def get_location(self, location: int) -> dict[Any, IndexT | T]: """ For an index location return a dict of the index and value. This is optimized for speed because it does not need to look up the index location with a search. Also, can accept relative indexing from the end of the SEries in standard python notation [-3, -2, -1] :param location: index location in standard python form of positive or negative number :return: dictionary """ return {self.index_name: self._index[location], self.data_name: self._data[location]}
@overload def get_locations(self, locations: list[int], as_list: Literal[True]) -> list[T]: ... @overload def get_locations(self, locations: list[int], as_list: Literal[False] = False) -> Series[IndexT, T]: ...
[docs] def get_locations(self, locations: list[int], as_list: bool = False) -> Series[IndexT, T] | list[T]: """ For list of locations return a Series or list of the values. :param locations: list of index locations :param as_list: True to return a list of values :return: Series or list """ indexes = [self._index[x] for x in locations] if as_list: return self.get(indexes, as_list=True) return self.get(indexes, as_list=False)
@overload def get_slice( self, start_index: Any = None, stop_index: Any = None, *, as_list: Literal[True], ) -> tuple[list[IndexT], list[T]]: ... @overload def get_slice( self, start_index: Any = None, stop_index: Any = None, *, as_list: Literal[False] = False, ) -> Series[IndexT, T]: ...
[docs] def get_slice( self, start_index: Any = None, stop_index: Any = None, as_list: bool = False, ) -> Series[IndexT, T] | tuple[list[IndexT], list[T]]: """ For sorted Series will return either a Series or list of all the rows where the index is greater than or equal to the start_index if provided and less than or equal to the stop_index if provided. If either the start or stop index is None then will include from the first or last element, similar to standard python slide of [:5] or [:5]. Both end points are considered inclusive. :param start_index: lowest index value to include, or None to start from the first row :param stop_index: highest index value to include, or None to end at the last row :param as_list: if True then return a list of the indexes and values :return: Series or tuple of (index list, values list) """ if not self._sort: raise RuntimeError("Can only use get_slice on sorted Series") start_location = bisect_left(self._index, start_index) if start_index is not None else None stop_location = bisect_right(self._index, stop_index) if stop_index is not None else None index = list(self._index[start_location:stop_location]) data = list(self._data[start_location:stop_location]) if as_list: return index, data else: return Series( data=data, index=index, data_name=self._data_name, index_name=self._index_name, sort=self._sort, )
def _slice_index(self, slicer: slice) -> list[bool]: try: start_index = sorted_index(self._index, slicer.start) if self._sort else self._index.index(slicer.start) except ValueError: raise IndexError("start of slice not in the index") try: end_index = sorted_index(self._index, slicer.stop) if self._sort else self._index.index(slicer.stop) except ValueError: raise IndexError("end of slice not in the index") if end_index < start_index: raise IndexError("end of slice is before start of slice") index_len = len(self._index) return [False] * start_index + [True] * (end_index - start_index + 1) + [False] * (index_len - 1 - end_index) def _validate_index(self, indexes: list[IndexT]) -> None: """ Raises an error if the indexes are not valid :param list indexes: list of indexes :return: nothing """ if not (isinstance(indexes, list) or indexes is None): raise TypeError("indexes must be list or None") if len(indexes) != len(set(indexes)): # noqa raise ValueError("index contains duplicates") if self._data: if len(indexes) != len(self._data): # noqa raise ValueError("index length does not match data length")
[docs] def validate_integrity(self) -> None: """ Validate the integrity of the Series. This checks that the indexes, column names and internal data are not corrupted. Will raise an error if there is a problem. :return: nothing """ self._validate_index(self._index)
[docs] def to_dict(self, index: bool = True, ordered: bool = False) -> dict[Any, Any] | OrderedDict[Any, Any]: """ Returns a dict where the keys are the data and index names and the values are list of the data and index. :param index: If True then include the index in the dict with the index_name as the key :param ordered: If True then return an OrderedDict() to preserve the order of the columns in the Series :return: dict or OrderedDict() """ result = OrderedDict() if ordered else dict() if index: result.update({self._index_name: self._index}) if ordered: data_dict = [(self._data_name, self._data)] else: data_dict = {self._data_name: self._data} result.update(data_dict) return result
[docs] def head(self, rows: int) -> Series[IndexT, T]: """ Return a Series of the first N rows :param rows: number of rows :return: Series """ rows_bool = [True] * min(rows, len(self._index)) rows_bool.extend([False] * max(0, len(self._index) - rows)) return self.get(indexes=rows_bool)
[docs] def tail(self, rows: int) -> Series[IndexT, T]: """ Return a Series of the last N rows :param rows: number of rows :return: Series """ rows_bool = [False] * max(0, len(self._index) - rows) rows_bool.extend([True] * min(rows, len(self._index))) return self.get(indexes=rows_bool)
@overload def select_index(self, compare: Any | tuple, result: Literal["boolean"] = "boolean") -> list[bool]: ... @overload def select_index(self, compare: IndexT | tuple[Any, ...], result: Literal["value"]) -> list[IndexT]: ...
[docs] def select_index( self, compare: IndexT | tuple[Any, ...], result: Literal["boolean", "value"] = "boolean" ) -> list[bool] | list[IndexT]: """ Finds the elements in the index that match the compare parameter and returns either a list of the values that match, of a boolean list the length of the index with True to each index that matches. If the indexes are tuples then the compare is a tuple where None in any field of the tuple will be treated as "*" and match all values. :param compare: value to compare as a singleton or tuple :param result: 'boolean' = returns a list of booleans, 'value' = returns a list of index values that match :return: list of booleans or values """ if isinstance(compare, tuple): booleans = [] for value in self._index: if not isinstance(value, tuple): booleans.append(False) continue booleans.append( all(compare[i] == item if compare[i] is not None else True for i, item in enumerate(value)) ) else: booleans = [False] * len(self._index) if self._sort: booleans[sorted_index(self._index, compare)] = True else: booleans[self._index.index(compare)] = True if result == "boolean": return booleans elif result == "value": return list(compress(self._index, booleans)) else: raise ValueError("only valid values for result parameter are: boolean or value.")
[docs] def isin(self, compare_list: list[Any]) -> list[bool]: """ Returns a boolean list where each element is whether that element in the column is in the compare_list. :param compare_list: list of items to compare to :return: list of booleans """ compare_set = set(compare_list) return [x in compare_set for x in self._data]
[docs] def equality(self, indexes: list[IndexT] | list[bool] | None = None, value: Any = None) -> list[bool]: """ Math helper method. Given a column and optional indexes will return a list of booleans on the equality of the value for that index in the DataFrame to the value parameter. :param indexes: list of index values or list of booleans. If a list of booleans then the list must be the same\ length as the DataFrame :param value: value to compare :return: list of booleans """ indexes = [True] * len(self._index) if indexes is None else indexes compare_list = self.get_rows(indexes, as_list=True) return [x == value for x in compare_list]
[docs] class Series[IndexT, T](SeriesBase[IndexT, T]): """ Series class. The raccoon Series implements a simplified version of the pandas Series with the key objective difference that the raccoon Series is meant for use cases where the size of the Series rows is expanding frequently. This is known to be slow with Pandas due to the use of numpy as the underlying data structure. Raccoon uses native lists as the underlying data structure which is quick to expand and grow the size. The Series can be designated as sort, in which case the rows will be sort by index on construction, and then any addition of a new row will insert it into the Series so that the index remains sort. """ def __init__( self, data: list[T] | None = None, index: Sequence[IndexT] | None = None, data_name: str | tuple | None = "value", index_name: str | tuple | None = "index", sort: bool | None = None, ): """ :param data: (optional) list of values. :param index: (optional) list of index values. If None then the index will be integers starting with zero :param data_name: (optional) name of the data column, or will default to 'value' :param index_name: (optional) name for the index. Default is "index" :param sort: if True then Series will keep the index sort. If True all index values must be of same type. If None then will default to True if no index is provided. """ super().__init__() if index is not None and not _is_non_string_sequence(index): raise TypeError("index must be a non-string sequence") # standard variable setup self._index = [] self._index_name = index_name self._data = [] self._data_name = data_name # setup data list if data is None: self._data = list() if index: # pad out to the number of rows self._pad_data(len(index)) self.index = list(index) else: self.index = list() elif isinstance(data, list): self._data = [x for x in data] # setup index if index: self.index = list(index) else: self.index = list(range(len(self._data))) else: raise TypeError("Not valid data type.") # setup sort if sort is not None: self.sort = sort else: if index: self.sort = False else: self.sort = True def _pad_data(self, index_len: int) -> None: """ Pad the data in Series with [None] to ensure that data is the same length as index :param index_len: length of index to extend data to :return: nothing """ self._data.extend([None] * (index_len - len(self._data))) @property def data(self) -> list[T]: return self._data @property def index(self) -> list[IndexT]: return self._index @index.setter def index(self, index_list: list[Any]) -> None: self._validate_index(index_list) self._index = list(index_list) @property def sort(self) -> bool: return self._sort @sort.setter def sort(self, boolean: bool) -> None: self._sort = boolean if self._sort: self.sort_index()
[docs] def sort_index(self) -> None: """ Sort the Series by the index. The sort modifies the Series inplace :return: nothing """ sort = sorted_list_indexes(self._index) # sort index self._index = [self._index[x] for x in sort] # sort data self._data = [self._data[x] for x in sort]
[docs] def set(self, indexes: Any | list[Any] | list[bool], values: T | list[T] | Any = None) -> None: """ Given indexes will set a sub-set of the Series to the values provided. This method will direct to the below methods based on what types are passed in for the indexes. If the indexes contain values not in the Series then new rows or columns will be added. :param indexes: indexes value, list of indexes values, or a list of booleans. :param values: value or list of values to set. If a list then must be the same length as the index's parameter. :return: nothing """ if isinstance(indexes, list): self.set_rows(indexes, values) else: assert not isinstance(indexes, list) self.set_cell(indexes, values)
def _add_row(self, index: IndexT) -> None: """ Add a new row to the Series :param index: index of the new row :return: nothing """ self._index.append(index) self._data.append(None) def _insert_row(self, i: int, index: IndexT) -> None: """ Insert a new row in the Series. :param i: index location to insert :param index: index value to insert into the index list :return: nothing """ if i == len(self._index): self._add_row(index) else: self._index.insert(i, index) self._data.insert(i, None) def _add_missing_rows(self, indexes: list[Any]) -> None: """ Given a list of indexes, find all the indexes that are not currently in the Series and make a new row for that index by appending to the Series. This does not maintain sorted order for the index. :param indexes: list of indexes :return: nothing """ existing = set(self._index) new_indexes = [x for x in indexes if x not in existing] for x in new_indexes: self._add_row(x) def _insert_missing_rows(self, indexes: list[Any]) -> None: """ Given a list of indexes, find all the indexes that are not currently in the Series and make a new row for that index, inserting into the index. This requires the Series to be sorted=True :param indexes: list of indexes :return: nothing """ existing = set(self._index) new_indexes = [x for x in indexes if x not in existing] for x in new_indexes: self._insert_row(bisect_left(self._index, x), x)
[docs] def set_cell(self, index: IndexT, value: T | Any) -> None: """ Sets the value of a single cell. If the index is not in the current index then a new index will be created. :param index: index value :param value: value to set :return: nothing """ if self._sort: exists, i = sorted_exists(self._index, index) if not exists: self._insert_row(i, index) else: try: i = self._index.index(index) except ValueError: i = len(self._index) self._add_row(index) self._data[i] = value
[docs] def set_rows(self, index: list[Any] | list[bool], values: T | list[T] | Any = None) -> None: """ Set rows to a single value or list of values. If any of the index values are not in the current indexes then a new row will be created. :param index: list of index values or list of booleans. If a list of booleans then the list must be the same\ length as the Series :param values: either a single value or a list. The list must be the same length as the index list if the index\ list is values, or the length of the True values in the index list if the index list is booleans :return: nothing """ if index and isinstance(index[0], bool) and all(isinstance(i, bool) for i in index): # boolean list value_list = list(values) if isinstance(values, list) else [values for x in index if x] if len(index) != len(self._index): raise ValueError("boolean index list must be same size of existing index") if len(value_list) != index.count(True): raise ValueError("length of values list must equal number of True entries in index list") indexes = [i for i, x in enumerate(index) if x] for x, i in enumerate(indexes): self._data[i] = value_list[x] else: # list of index value_list = list(values) if isinstance(values, list) else [values for _ in index] if len(value_list) != len(index): raise ValueError("length of values and index must be the same.") # insert or append indexes as needed if self._sort: exists = [] indexes = [] for x in index: e, i = sorted_exists(self._index, x) exists.append(e) indexes.append(i) if not all(exists): self._insert_missing_rows(index) indexes = [sorted_index(self._index, x) for x in index] else: try: # all index in current index indexes = [self._index.index(x) for x in index] except ValueError: # new rows need to be added self._add_missing_rows(index) indexes = [self._index.index(x) for x in index] for x, i in enumerate(indexes): self._data[i] = value_list[x]
[docs] def set_location(self, location: int, value: Any) -> None: """ For a location set the value :param location: location :param value: value :return: nothing """ self._data[location] = value
[docs] def set_locations(self, locations: list[int], values: list[Any] | Any) -> None: """ For a list of locations set the values. :param locations: list of index locations :param values: list of values or a single value :return: nothing """ indexes = [self._index[x] for x in locations] self.set(indexes, values)
def __setitem__(self, index: Any | list[Any] | slice, value: T | list[T] | Any) -> None: """ Convenience wrapper around the set() method for using srs[] = X Usage... df[1] -- set cell at index=1 df[[0, 3]] -- set index=[0, 3] df[1:2] -- set index slice 1:2 :param index: any of the parameter examples above :param value: single value or list of values :return: nothing """ indexes = self._slice_index(index) if isinstance(index, slice) else index return self.set(indexes=indexes, values=value) @overload def __getitem__(self, index: slice) -> Series[IndexT, T]: ... @overload def __getitem__(self, index: list[IndexT] | list[bool]) -> Series[IndexT, T]: ... @overload def __getitem__(self, index: IndexT) -> T: ... def __getitem__(self, index: IndexT | list[IndexT] | list[bool] | slice) -> Series[IndexT, T] | T: """ Convenience wrapper around the get() method for using srs[] Usage... df[5] -- get cell at index=5 df[[4, 5]] -- get indexes=[4, 5] df[4:10] -- get indexes=[4, 5, 6, 7, 8, 9, 10] can also use a boolean list for anything :param index: any of the parameters above :return: Series of the subset slice """ if isinstance(index, slice): # just a slice of index if self._sort: # faster version for sort=True return self.get_slice(index.start, index.stop, as_list=False) else: return self.get(indexes=self._slice_index(index)) else: # just a single cell or list of cells return self.get(index)
[docs] def append_row(self, index: IndexT, value: T) -> None: """ Appends a row of value to the end of the data. Be very careful with this function as for sorted Series it will not enforce sort order. Use this only for speed when needed, be careful. :param index: index :param value: value :return: nothing """ if index in self._index: raise IndexError("index already in Series") self._index.append(index) self._data.append(value)
[docs] def append_rows(self, indexes: list[IndexT], values: list[T]) -> None: """ Appends values to the end of the data. Be very careful with this function as for sort DataFrames it will not enforce sort order. Use this only for speed when needed, be careful. :param indexes: list of indexes to append :param values: list of values to append :return: nothing """ # check that the values data is less than or equal to the length of the indexes if len(values) != len(indexes): raise ValueError("length of values is not equal to length of indexes") # check the indexes are not duplicates combined_index = self._index + indexes if len(set(combined_index)) != len(combined_index): raise IndexError("duplicate indexes in Series") # append index value self._index.extend(indexes) self._data.extend(values)
[docs] def delete(self, indexes: Any | list[Any] | list[bool]) -> None: """ Delete rows from the DataFrame :param indexes: either a list of values or list of booleans for the rows to delete :return: nothing """ index_list = indexes if isinstance(indexes, list) else [indexes] if ( index_list and isinstance(index_list[0], bool) and all(isinstance(i, bool) for i in index_list) ): # boolean list if len(index_list) != len(self._index): raise ValueError("boolean indexes list must be same size of existing indexes") delete_locations = [i for i, x in enumerate(index_list) if x] else: delete_locations = ( [sorted_index(self._index, x) for x in index_list] if self._sort else [self._index.index(x) for x in index_list] ) delete_locations = sorted(delete_locations, reverse=True) # need to sort and reverse list so deleting works for i in delete_locations: del self._data[i] del self._index[i]
[docs] def reset_index(self) -> None: """ Resets the index of the Series to simple integer list and the index name to 'index'. :return: nothing """ self.index = list(range(self.__len__())) self.index_name = "index"
[docs] class ViewSeries[IndexT, T](SeriesBase[IndexT, T]): """ ViewSeries class. The raccoon ViewSeries implements a view only version of the Series object with the key objective difference that the raccoon ViewSeries is meant for view only use cases where the underlying index and data are modified elsewhere or static. Use this for a view into a single column of a DataFrame. There is no type checking of the data, so it is assumed the data type is list-style. """ def __init__( self, data: Sequence[T] | None = None, index: Sequence[IndexT] | None = None, data_name: str | tuple | None = "value", index_name: str | tuple | None = "index", sort: bool = False, offset: int = 0, ): """ :param data: (optional) sequence of values. :param index: (optional) list of index values. If None then the index will be integers starting with zero :param data_name: (optional) name of the data column, or will default to 'value' :param index_name: (optional) name for the index. Default is "index" :param sort: if True then assumes the index is sorted for faster set/get operations :param offset: integer to add to location to transform to standard python list location index """ super().__init__() # check inputs if index is None: raise ValueError("Index cannot be None.") if not _is_non_string_sequence(index): raise TypeError("index must be a non-string sequence") if data is None: raise ValueError("Data cannot be None.") # standard variable setup self._data = data # direct view, no copy self._data_name = data_name self.index = index if isinstance(index, list) else list(index) self._index_name = index_name self._sort = sort self._offset = offset @property def data(self) -> Sequence[T]: return self._data @property def index(self) -> list[IndexT]: return self._index @index.setter def index(self, index_list: list[Any]) -> None: self._validate_index(index_list) self._index = index_list @property def sort(self) -> bool: return self._sort @property def offset(self) -> int: return self._offset @overload def value(self, indexes: int, int_as_index: bool = False) -> T: ... @overload def value(self, indexes: slice, int_as_index: bool = False) -> list[T]: ... @overload def value(self, indexes: list[int] | list[IndexT] | list[bool], int_as_index: bool = False) -> list[T]: ... @overload def value(self, indexes: object, int_as_index: bool = False) -> T: ...
[docs] def value(self, indexes: int | Any | list[int] | list[Any] | list[bool], int_as_index: bool = False) -> T | list[T]: """ Wrapper function for get. It will return a list, no index. If the indexes are integers it will be assumed that they are locations unless int_as_index = True. If the indexes are locations then they will be rotated to the left by offset number of locations. :param indexes: integer location, single index, list of indexes or list of boolean :param int_as_index: if True then will treat int index values as indexes and not locations :return: value or list of values """ # single integer value if isinstance(indexes, int): if int_as_index: return self.get(cast(IndexT, indexes)) else: indexes = indexes - self._offset return self._data[indexes] # slice elif isinstance(indexes, slice): if isinstance(indexes.start, int) and not int_as_index: # treat as location start = indexes.start - self._offset stop = indexes.stop - self._offset + 1 # to capture the last value # check locations are valid and will not return empty if start > stop: raise IndexError("end of slice is before start of slice") if (start > 0 > stop) or (start < 0 < stop): raise IndexError("slide indexes invalid with given offset:%f" % self._offset) # where end is the last element if (start < 0) and stop == 0: return list(self._data[start:]) return list(self._data[start:stop]) else: # treat as index indexes = self._slice_index(indexes) return self.get(indexes, as_list=True) # list of booleans elif isinstance(indexes, list) and all(isinstance(x, bool) for x in indexes): return self.get(cast(list[IndexT] | list[bool], indexes), as_list=True) # list of values elif isinstance(indexes, list): if int_as_index or not isinstance(indexes[0], int): return self.get(cast(list[IndexT] | list[bool], indexes), as_list=True) else: indexes = [x - self._offset for x in indexes] return self.get_locations(indexes, as_list=True) # just a single value else: assert not isinstance(indexes, list) return self.get_cell(indexes)
@overload def __getitem__(self, index: slice) -> list[T]: ... @overload def __getitem__(self, index: list[int] | list[IndexT] | list[bool]) -> list[T]: ... @overload def __getitem__(self, index: int | IndexT) -> T: ... def __getitem__(self, index: int | IndexT | list[int] | list[IndexT] | list[bool] | slice) -> T | list[T]: """ Convenience wrapper around the value() method for using srs[]. This will treat all integers as locations Usage... df[5] -- get cell at location=5 df[[4, 5]] -- get locations=[4, 5] df[[-1:0]] -- get locations at slices can also use a boolean list for anything :param index: any of the parameters above :return: DataFrame of the subset slice """ return self.value(index, int_as_index=False) # Series creation functions
[docs] @classmethod def from_dataframe(cls, dataframe: DataFrame[IndexT, Any], column: str | tuple | None, offset: int = 0) -> Self: """ Creates and return a Series from a DataFrame and specific column :param dataframe: raccoon DataFrame :param column: column name :param offset: offset value must be provided as there is no equivalent for a DataFrame :return: Series """ return cls( data=dataframe.get_entire_column(column, as_list=True), index=dataframe.index, data_name=column, index_name=dataframe.index_name, sort=dataframe.sort, offset=offset, )
[docs] @classmethod def from_series(cls, series: Series[IndexT, T], offset: int = 0) -> Self: """ Creates and return a Series from a Series :param series: raccoon Series :param offset: offset value must be provided as there is no equivalent for a DataFrame :return: Series """ return cls( data=series.data, index=series.index, data_name=series.data_name, index_name=series.index_name, sort=series.sort, offset=offset, )