Example Usage for DataFrame
===========================

.. code:: ipython3

    # remove comment to use latest development version
    import sys
    from pathlib import Path
    
    repo_root = Path.cwd()
    if not (repo_root / 'raccoon').exists():
        repo_root = repo_root.parent
    
    sys.path.insert(0, str(repo_root))

.. code:: ipython3

    # import libraries
    import raccoon as rc

Initialize
----------

.. code:: ipython3

    # empty DataFrame
    df = rc.DataFrame()
    df


.. parsed-literal::

    object id: 2167256510720
    columns:
    []
    data:
    []
    index:
    []


.. code:: ipython3

    # with columns and indexes but no data
    df = rc.DataFrame(columns=['a', 'b', 'c'], index=[1, 2, 3])
    df


.. parsed-literal::

    object id: 2167257130512
    columns:
    ['a', 'b', 'c']
    data:
    [[None, None, None], [None, None, None], [None, None, None]]
    index:
    [1, 2, 3]


.. code:: ipython3

    # with data
    df = rc.DataFrame(data={'a': [1, 2, 3], 'b': [4, 5, 6]}, index=[10, 11, 12], columns=['a', 'b'])
    df


.. parsed-literal::

    object id: 2167257128192
    columns:
    ['a', 'b']
    data:
    [[1, 2, 3], [4, 5, 6]]
    index:
    [10, 11, 12]


Print
-----

.. code:: ipython3

    df.print()


.. parsed-literal::

      index    a    b
    -------  ---  ---
         10    1    4
         11    2    5
         12    3    6
    

.. code:: ipython3

    print(df)


.. parsed-literal::

      index    a    b
    -------  ---  ---
         10    1    4
         11    2    5
         12    3    6
    

Setters and Getters
-------------------

.. code:: ipython3

    # columns
    df.columns


.. parsed-literal::

    ['a', 'b']


.. code:: ipython3

    df.columns = ['first', 'second']
    print(df)


.. parsed-literal::

      index    first    second
    -------  -------  --------
         10        1         4
         11        2         5
         12        3         6
    

.. code:: ipython3

    # columns can be renamed with a dict()
    df.rename_columns({'second': 'b', 'first': 'a'})
    df.columns


.. parsed-literal::

    ['a', 'b']


.. code:: ipython3

    # index
    df.index


.. parsed-literal::

    [10, 11, 12]


.. code:: ipython3

    #indexes can be any non-repeating unique values
    df.index = ['apple', 'pear', 7.7]
    df.print()


.. parsed-literal::

    index      a    b
    -------  ---  ---
    apple      1    4
    pear       2    5
    7.7        3    6
    

.. code:: ipython3

    df.index = [10, 11, 12]
    print(df)


.. parsed-literal::

      index    a    b
    -------  ---  ---
         10    1    4
         11    2    5
         12    3    6
    

.. code:: ipython3

    # the index can also have a name, befault it is "index"
    df.index_name


.. parsed-literal::

    'index'


.. code:: ipython3

    df.index_name = 'units'
    df.index_name


.. parsed-literal::

    'units'


.. code:: ipython3

    # data is a shallow copy, be careful on how this is used
    df.index_name = 'index'
    df.data


.. parsed-literal::

    [[1, 2, 3], [4, 5, 6]]


Select Index
------------

.. code:: ipython3

    df.select_index(11)


.. parsed-literal::

    [False, True, False]


Set Values
----------

.. code:: ipython3

    # set a single cell
    df.set(10, 'a', 100)
    print(df)


.. parsed-literal::

      index    a    b
    -------  ---  ---
         10  100    4
         11    2    5
         12    3    6
    

.. code:: ipython3

    # set a value outside current range creates a new row and/or column. Can also use [] for setting
    df[13, 'c'] = 9
    df.print()


.. parsed-literal::

      index    a    b    c
    -------  ---  ---  ---
         10  100    4
         11    2    5
         12    3    6
         13              9
    

.. code:: ipython3

    # set column
    df['b'] = 55
    print(df)


.. parsed-literal::

      index    a    b    c
    -------  ---  ---  ---
         10  100   55
         11    2   55
         12    3   55
         13        55    9
    

.. code:: ipython3

    # set a subset of column
    df[[10, 12], 'b'] = 66
    print(df)


.. parsed-literal::

      index    a    b    c
    -------  ---  ---  ---
         10  100   66
         11    2   55
         12    3   66
         13        55    9
    

.. code:: ipython3

    # using boolean list
    df.set([True, False, True, False], 'b', [88, 99])
    print(df)


.. parsed-literal::

      index    a    b    c
    -------  ---  ---  ---
         10  100   88
         11    2   55
         12    3   99
         13        55    9
    

.. code:: ipython3

    # setting with slices
    df[12:13, 'a'] = 33
    print(df)


.. parsed-literal::

      index    a    b    c
    -------  ---  ---  ---
         10  100   88
         11    2   55
         12   33   99
         13   33   55    9
    

.. code:: ipython3

    df[10:12, 'c'] = [1, 2, 3]
    print(df)


.. parsed-literal::

      index    a    b    c
    -------  ---  ---  ---
         10  100   88    1
         11    2   55    2
         12   33   99    3
         13   33   55    9
    

.. code:: ipython3

    # append a row, DANGEROUS as there is not validation checking, but can be used for speed
    df.append_row(14, {'a': 44, 'c': 100, 'd': 99})
    print(df)


.. parsed-literal::

      index    a    b    c    d
    -------  ---  ---  ---  ---
         10  100   88    1
         11    2   55    2
         12   33   99    3
         13   33   55    9
         14   44       100   99
    

.. code:: ipython3

    # append rows, again use caution
    df.append_rows([15, 16], {'a': [55, 56], 'd': [100, 101]})
    print(df)


.. parsed-literal::

      index    a    b    c    d
    -------  ---  ---  ---  ---
         10  100   88    1
         11    2   55    2
         12   33   99    3
         13   33   55    9
         14   44       100   99
         15   55            100
         16   56            101
    

Get Values
----------

.. code:: ipython3

    # get a single cell
    df[10, 'a']


.. parsed-literal::

    100


.. code:: ipython3

    # get an entire column
    df['c'].print()


.. parsed-literal::

      index    c
    -------  ---
         10    1
         11    2
         12    3
         13    9
         14  100
         15
         16
    

.. code:: ipython3

    # get list of columns
    df[['a', 'c']].print()


.. parsed-literal::

      index    a    c
    -------  ---  ---
         10  100    1
         11    2    2
         12   33    3
         13   33    9
         14   44  100
         15   55
         16   56
    

.. code:: ipython3

    # get subset of the index
    df[[11, 12, 13], 'b'].print()


.. parsed-literal::

      index    b
    -------  ---
         11   55
         12   99
         13   55
    

.. code:: ipython3

    # get using slices
    df[11:13, 'b'].print()


.. parsed-literal::

      index    b
    -------  ---
         11   55
         12   99
         13   55
    

.. code:: ipython3

    # get a matrix
    df[10:11, ['a', 'c']].print()


.. parsed-literal::

      index    a    c
    -------  ---  ---
         10  100    1
         11    2    2
    

.. code:: ipython3

    # get a column, return as a list
    df.get(columns='a', as_list=True)


.. parsed-literal::

    [100, 2, 33, 33, 44, 55, 56]


.. code:: ipython3

    # get a row and return as a dictionary
    df.get_columns(index=13, columns=['a', 'b'], as_dict=True)


.. parsed-literal::

    {'a': 33, 'b': 55, 'index': 13}


.. code:: ipython3

    # get a row and return as a namedtuple, excluding the index
    df.get_columns(index=13, columns=['a', 'b'], as_namedtuple=True, name="tuplename", include_index=False)


.. parsed-literal::

    tuplename(a=33, b=55)


Set and Get by Location
-----------------------

Locations are the index of the index, in other words the index locations
from 0…len(index)

.. code:: ipython3

    # get a single cell
    df.get_location(2, 'a')


.. parsed-literal::

    33


.. code:: ipython3

    # get an entire row when the columns is None
    print(df.get_location(2))


.. parsed-literal::

      index    a    b    c  d
    -------  ---  ---  ---  ---
         12   33   99    3
    

.. code:: ipython3

    df.get_location(0, ['b', 'c'], as_dict=True)


.. parsed-literal::

    {'b': 88, 'c': 1, 'index': 10}


.. code:: ipython3

    df.get_location(1, as_namedtuple=True, name="tuplename", index=False)


.. parsed-literal::

    tuplename(a=2, b=55, c=2, d=None)


.. code:: ipython3

    df.get_location(-1).print()


.. parsed-literal::

      index    a  b    c      d
    -------  ---  ---  ---  ---
         16   56            101
    

.. code:: ipython3

    df.get_locations(locations=[0, 2]).print()


.. parsed-literal::

      index    a    b    c  d
    -------  ---  ---  ---  ---
         10  100   88    1
         12   33   99    3
    

.. code:: ipython3

    df.set_locations(locations=[0, 2], column='a', values=-9)
    df.print()


.. parsed-literal::

      index    a    b    c    d
    -------  ---  ---  ---  ---
         10   -9   88    1
         11    2   55    2
         12   -9   99    3
         13   33   55    9
         14   44       100   99
         15   55            100
         16   56            101
    

Head and Tail
-------------

.. code:: ipython3

    df.head(2).print()


.. parsed-literal::

      index    a    b    c  d
    -------  ---  ---  ---  ---
         10   -9   88    1
         11    2   55    2
    

.. code:: ipython3

    df.tail(2).print()


.. parsed-literal::

      index    a  b    c      d
    -------  ---  ---  ---  ---
         15   55            100
         16   56            101
    

Delete colunmns and rows
------------------------

.. code:: ipython3

    df.delete_rows([10, 13])
    print(df)


.. parsed-literal::

      index    a    b    c    d
    -------  ---  ---  ---  ---
         11    2   55    2
         12   -9   99    3
         14   44       100   99
         15   55            100
         16   56            101
    

.. code:: ipython3

    df.delete_columns('b')
    print(df)


.. parsed-literal::

      index    a    c    d
    -------  ---  ---  ---
         11    2    2
         12   -9    3
         14   44  100   99
         15   55       100
         16   56       101
    

Convert
-------

.. code:: ipython3

    # return a dict
    df.to_dict()


.. parsed-literal::

    {'index': [11, 12, 14, 15, 16],
     'a': [2, -9, 44, 55, 56],
     'c': [2, 3, 100, None, None],
     'd': [None, None, 99, 100, 101]}


.. code:: ipython3

    # exclude the index
    df.to_dict(index=False)


.. parsed-literal::

    {'a': [2, -9, 44, 55, 56],
     'c': [2, 3, 100, None, None],
     'd': [None, None, 99, 100, 101]}


.. code:: ipython3

    # return an OrderedDict()
    df.to_dict(ordered=True)


.. parsed-literal::

    OrderedDict([('index', [11, 12, 14, 15, 16]),
                 ('a', [2, -9, 44, 55, 56]),
                 ('c', [2, 3, 100, None, None]),
                 ('d', [None, None, 99, 100, 101])])


.. code:: ipython3

    # return a list of just one column
    df['c'].to_list()


.. parsed-literal::

    [2, 3, 100, None, None]


.. code:: ipython3

    # convert to JSON
    string = df.to_json()
    print(string)


.. parsed-literal::

    {"data": {"a": [2, -9, 44, 55, 56], "c": [2, 3, 100, null, null], "d": [null, null, 99, 100, 101]}, "index": [11, 12, 14, 15, 16], "meta_data": {"index_name": "index", "columns": ["a", "c", "d"], "sort": false}}
    

.. code:: ipython3

    # construct DataFrame from JSON
    df_from_json = rc.DataFrame.from_json(string)
    print(df_from_json)


.. parsed-literal::

      index    a    c    d
    -------  ---  ---  ---
         11    2    2
         12   -9    3
         14   44  100   99
         15   55       100
         16   56       101
    

Sort by Index and Column
------------------------

.. code:: ipython3

    df = rc.DataFrame({'a': [4, 3, 2, 1], 'b': [6, 7, 8, 9]}, index=[25, 24, 23, 22])
    print(df)


.. parsed-literal::

      index    a    b
    -------  ---  ---
         25    4    6
         24    3    7
         23    2    8
         22    1    9
    

.. code:: ipython3

    # sort by index. Sorts are inplace
    df.sort_index()
    print(df)


.. parsed-literal::

      index    a    b
    -------  ---  ---
         22    1    9
         23    2    8
         24    3    7
         25    4    6
    

.. code:: ipython3

    # sort by column
    df.sort_columns('b')
    print(df)


.. parsed-literal::

      index    a    b
    -------  ---  ---
         25    4    6
         24    3    7
         23    2    8
         22    1    9
    

.. code:: ipython3

    # sort by column in reverse order
    df.sort_columns('b', reverse=True)
    print(df)


.. parsed-literal::

      index    a    b
    -------  ---  ---
         22    1    9
         23    2    8
         24    3    7
         25    4    6
    

.. code:: ipython3

    # sorting with a key function is avaialble, see tests for examples

Append
------

.. code:: ipython3

    df1 = rc.DataFrame({'a': [1, 2], 'b': [5, 6]}, index=[1, 2])
    df1.print()


.. parsed-literal::

      index    a    b
    -------  ---  ---
          1    1    5
          2    2    6
    

.. code:: ipython3

    df2 = rc.DataFrame({'b': [7, 8], 'c': [11, 12]}, index=[3, 4])
    print(df2)


.. parsed-literal::

      index    b    c
    -------  ---  ---
          3    7   11
          4    8   12
    

.. code:: ipython3

    df1.append(df2)
    print(df1)


.. parsed-literal::

      index    a    b    c
    -------  ---  ---  ---
          1    1    5
          2    2    6
          3         7   11
          4         8   12
    

Math Methods
------------

.. code:: ipython3

    df = rc.DataFrame({'a': [1, 2, 3], 'b': [2, 8, 9]})

.. code:: ipython3

    # test for equality
    df.equality('a', value=3)


.. parsed-literal::

    [False, False, True]


.. code:: ipython3

    # all math methods can operate on a subset of the index
    df.equality('b', indexes=[1, 2], value=2)


.. parsed-literal::

    [False, False]


.. code:: ipython3

    # add two columns
    df.add('a', 'b')


.. parsed-literal::

    [3, 10, 12]


.. code:: ipython3

    # subtract
    df.subtract('b', 'a')


.. parsed-literal::

    [1, 6, 6]


.. code:: ipython3

    # multiply
    df.multiply('a', 'b', [0, 2])


.. parsed-literal::

    [2, 27]


.. code:: ipython3

    # divide
    df.divide('b', 'a')


.. parsed-literal::

    [2.0, 4.0, 3.0]


Multi-Index
-----------

Raccoon does not have true hierarchical mulit-index capabilities like
Pandas, but attempts to mimic some of the capabilities with the use of
tuples as the index. Raccoon does not provide any checking to make sure
the indexes are all the same length or any other integrity checking.

.. code:: ipython3

    tuples = [('a', 1, 3), ('a', 1, 4), ('a', 2, 3), ('b', 1, 4), ('b', 2, 1), ('b', 3, 3)]
    df = rc.DataFrame({'a': [1, 2, 3, 4, 5, 6]}, index=tuples)
    print(df)


.. parsed-literal::

    index          a
    -----------  ---
    ('a', 1, 3)    1
    ('a', 1, 4)    2
    ('a', 2, 3)    3
    ('b', 1, 4)    4
    ('b', 2, 1)    5
    ('b', 3, 3)    6
    

The select_index method works with tuples by allowing the \* to act as a
wild card for matching.

.. code:: ipython3

    compare = ('a', None, None)
    df.select_index(compare)


.. parsed-literal::

    [True, True, True, False, False, False]


.. code:: ipython3

    compare = ('a', None, 3)
    df.select_index(compare, 'boolean')


.. parsed-literal::

    [True, False, True, False, False, False]


.. code:: ipython3

    compare = (None, 2, None)
    df.select_index(compare, 'value')


.. parsed-literal::

    [('a', 2, 3), ('b', 2, 1)]


.. code:: ipython3

    compare = (None, None, 3)
    df.select_index(compare, 'value')


.. parsed-literal::

    [('a', 1, 3), ('a', 2, 3), ('b', 3, 3)]


.. code:: ipython3

    compare = (None, None, None)
    df.select_index(compare)


.. parsed-literal::

    [True, True, True, True, True, True]


Reset Index
-----------

.. code:: ipython3

    df = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, columns=['a', 'b'])
    print(df)


.. parsed-literal::

      index    a    b
    -------  ---  ---
          0    1    4
          1    2    5
          2    3    6
    

.. code:: ipython3

    df.reset_index()
    df


.. parsed-literal::

    object id: 2167256509840
    columns:
    ['a', 'b', 'index_0']
    data:
    [[1, 2, 3], [4, 5, 6], [0, 1, 2]]
    index:
    [0, 1, 2]


.. code:: ipython3

    df = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, columns=['a', 'b'], index=['x', 'y', 'z'], index_name='jelo')
    print(df)


.. parsed-literal::

    jelo      a    b
    ------  ---  ---
    x         1    4
    y         2    5
    z         3    6
    

.. code:: ipython3

    df.reset_index()
    print(df)


.. parsed-literal::

      index    a    b  jelo
    -------  ---  ---  ------
          0    1    4  x
          1    2    5  y
          2    3    6  z
    

.. code:: ipython3

    df = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, columns=['a', 'b'],
                      index=[('a', 10, 'x'), ('b', 11, 'y'), ('c', 12, 'z')], index_name=('melo', 'helo', 'gelo'))
    print(df)


.. parsed-literal::

    ('melo', 'helo', 'gelo')      a    b
    --------------------------  ---  ---
    ('a', 10, 'x')                1    4
    ('b', 11, 'y')                2    5
    ('c', 12, 'z')                3    6
    

.. code:: ipython3

    df.reset_index()
    print(df)


.. parsed-literal::

      index    a    b  melo      helo  gelo
    -------  ---  ---  ------  ------  ------
          0    1    4  a           10  x
          1    2    5  b           11  y
          2    3    6  c           12  z
    

.. code:: ipython3

    df = rc.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, columns=['a', 'b'], index=['x', 'y', 'z'], index_name='jelo')
    print(df)


.. parsed-literal::

    jelo      a    b
    ------  ---  ---
    x         1    4
    y         2    5
    z         3    6
    

.. code:: ipython3

    df.reset_index(drop=True)
    print(df)


.. parsed-literal::

      index    a    b
    -------  ---  ---
          0    1    4
          1    2    5
          2    3    6
    

Iterators
---------

.. code:: ipython3

    df = rc.DataFrame({'a': [1, 2, 'c'], 'b': [5, 6, 'd']}, index=[1, 2, 3])

.. code:: ipython3

    for row in df.iterrows():
        print(row)


.. parsed-literal::

    {'index': 1, 'a': 1, 'b': 5}
    {'index': 2, 'a': 2, 'b': 6}
    {'index': 3, 'a': 'c', 'b': 'd'}
    

.. code:: ipython3

    for row in df.itertuples():
        print(row)


.. parsed-literal::

    Raccoon(index=1, a=1, b=5)
    Raccoon(index=2, a=2, b=6)
    Raccoon(index=3, a='c', b='d')
    

Sorted DataFrames
-----------------

DataFrames will be set to sorted by default if no index is given at
initialization. If an index is given at initialization then the
parameter sorted must be set to True

.. code:: ipython3

    df = rc.DataFrame({'a': [3, 5, 4], 'b': [6, 8, 7]}, index=[12, 15, 14], sort=True)

When sorted=True on initialization the data will be sorted by index to
start

.. code:: ipython3

    df.print()


.. parsed-literal::

      index    a    b
    -------  ---  ---
         12    3    6
         14    4    7
         15    5    8
    

.. code:: ipython3

    df[16, 'b'] = 9
    print(df)


.. parsed-literal::

      index    a    b
    -------  ---  ---
         12    3    6
         14    4    7
         15    5    8
         16         9
    

.. code:: ipython3

    df.set(indexes=13, values={'a': 3.5, 'b': 6.5})
    print(df)


.. parsed-literal::

      index    a    b
    -------  ---  ---
         12  3    6
         13  3.5  6.5
         14  4    7
         15  5    8
         16       9