Example Usage for Series

# remove comment to use latest development version
import sys; sys.path.insert(0, '../')
# import libraries
import raccoon as rc

Initialize

# empty DataFrame
srs = rc.Series()
srs
object id: 1891392163736
data:
[]
index:
[]
# with indexes but no data
srs = rc.Series(index=[1, 2, 3])
srs
object id: 1891392163568
data:
[None, None, None]
index:
[1, 2, 3]
# with data
srs = rc.Series(data=[4, 5, 6], index=[10, 11, 12])
srs
object id: 1891392217440
data:
[4, 5, 6]
index:
[10, 11, 12]

Print

srs.show()
  index    value
-------  -------
     10        4
     11        5
     12        6
print(srs)
  index    value
-------  -------
     10        4
     11        5
     12        6

Setters and Getters

# data_name
srs.data_name
'value'
srs.data_name = 'new_data'
print(srs)
  index    new_data
-------  ----------
     10           4
     11           5
     12           6
# index
srs.index
[10, 11, 12]
#indexes can be any non-repeating unique values
srs.index = ['apple', 'pear', 7.7]
srs.show()
index      new_data
-------  ----------
apple             4
pear              5
7.7               6
srs.index = [10, 11, 12]
print(srs)
  index    new_data
-------  ----------
     10           4
     11           5
     12           6
# the index can also have a name, befault it is "index"
srs.index_name
'index'
srs.index_name = 'units'
srs.index_name
'units'
# data is a shallow copy, be careful on how this is used
srs.index_name = 'index'
srs.data
[4, 5, 6]

Select Index

srs.select_index(11)
[False, True, False]

Set Values

# set a single cell
srs.set(10, 100)
print(srs)
  index    new_data
-------  ----------
     10         100
     11           5
     12           6
# set a value outside current range creates a new row. Can also use [] for setting
srs[13] = 9
srs.show()
  index    new_data
-------  ----------
     10         100
     11           5
     12           6
     13           9
# set a subset of rows
srs[[10, 12]] = 66
print(srs)
  index    new_data
-------  ----------
     10          66
     11           5
     12          66
     13           9
# using boolean list
srs.set([True, False, True, False], [88, 99])
print(srs)
  index    new_data
-------  ----------
     10          88
     11           5
     12          99
     13           9
# setting with slices
srs[12:13] = 33
print(srs)
  index    new_data
-------  ----------
     10          88
     11           5
     12          33
     13          33
srs[10:12] = [1, 2, 3]
print(srs)
  index    new_data
-------  ----------
     10           1
     11           2
     12           3
     13          33
# set a location
srs.set_location(1, 22)
print(srs)
  index    new_data
-------  ----------
     10           1
     11          22
     12           3
     13          33
# set multiple locations
srs.set_locations([0, 2], [11, 27])
print(srs)
  index    new_data
-------  ----------
     10          11
     11          22
     12          27
     13          33
# append a row, DANGEROUS as there is not validation checking, but can be used for speed
srs.append_row(14, 99)
print(srs)
  index    new_data
-------  ----------
     10          11
     11          22
     12          27
     13          33
     14          99
# append multiple rows, again no sort check
srs.append_rows([15, 16], [100, 110])
print(srs)
  index    new_data
-------  ----------
     10          11
     11          22
     12          27
     13          33
     14          99
     15         100
     16         110

Get Values

# get a single cell
srs[10]
11
# get subset of the index
srs[[11, 12, 13]].show()
  index    new_data
-------  ----------
     11          22
     12          27
     13          33
# get using slices
srs[11:13].show()
  index    new_data
-------  ----------
     11          22
     12          27
     13          33
# return as a list
srs.get([11, 12, 13], as_list=True)
[22, 27, 33]

Set and Get by Location

Locations are the index of the index, in other words the index locations from 0...len(index)

print(srs.get_location(2))
{'index': 12, 'new_data': 27}
srs.get_location(-1)
{'index': 16, 'new_data': 110}
srs.get_locations(locations=[0, 2]).show()
  index    new_data
-------  ----------
     10          11
     12          27
srs.get_locations(locations=[0, 2], as_list=True)
[11, 27]
srs.set_locations([-1, -2], values=[10, 9])
print(srs)
  index    new_data
-------  ----------
     10          11
     11          22
     12          27
     13          33
     14          99
     15           9
     16          10

Head and Tail

srs.head(2).show()
  index    new_data
-------  ----------
     10          11
     11          22
srs.tail(2).show()
  index    new_data
-------  ----------
     15           9
     16          10

Delete rows

srs.delete([10, 13])
print(srs)
  index    new_data
-------  ----------
     11          22
     12          27
     14          99
     15           9
     16          10

Convert

# return a dict
srs.to_dict()
{'index': [11, 12, 14, 15, 16], 'new_data': [22, 27, 99, 9, 10]}
# exclude the index
srs.to_dict(index=False)
{'new_data': [22, 27, 99, 9, 10]}
# return an OrderedDict()
srs.to_dict(ordered=True)
OrderedDict([('index', [11, 12, 14, 15, 16]),
             ('new_data', [22, 27, 99, 9, 10])])

Sort by Index

srs = rc.Series([6, 7, 8, 9], index=[25, 24, 23, 22])
print(srs)
  index    value
-------  -------
     25        6
     24        7
     23        8
     22        9
# sort by index. Sorts are inplace
srs.sort_index()
print(srs)
  index    value
-------  -------
     22        9
     23        8
     24        7
     25        6

Math Methods

srs = rc.Series([1, 2, 3])
# test for equality
srs.equality(value=3)
[False, False, True]
# all math methods can operate on a subset of the index
srs.equality(indexes=[1, 2], value=2)
[True, False]

Multi-Index

Raccoon does not have true hierarchical mulit-index capabilities like Pandas, but attempts to mimic some of the capabilities with the use of tuples as the index. Raccoon does not provide any checking to make sure the indexes are all the same length or any other integrity checking.

tuples = [('a', 1, 3), ('a', 1, 4), ('a', 2, 3), ('b', 1, 4), ('b', 2, 1), ('b', 3, 3)]
srs = rc.Series([1, 2, 3, 4, 5, 6], index=tuples)
print(srs)
index          value
-----------  -------
('a', 1, 3)        1
('a', 1, 4)        2
('a', 2, 3)        3
('b', 1, 4)        4
('b', 2, 1)        5
('b', 3, 3)        6

The select_index method works with tuples by allowing the * to act as a wild card for matching.

compare = ('a', None, None)
srs.select_index(compare)
[True, True, True, False, False, False]
compare = ('a', None, 3)
srs.select_index(compare, 'boolean')
[True, False, True, False, False, False]
compare = (None, 2, None)
srs.select_index(compare, 'value')
[('a', 2, 3), ('b', 2, 1)]
compare = (None, None, 3)
srs.select_index(compare, 'value')
[('a', 1, 3), ('a', 2, 3), ('b', 3, 3)]
compare = (None, None, None)
srs.select_index(compare)
[True, True, True, True, True, True]

Reset Index

srs = rc.Series([1, 2, 3], index=[9, 10, 11])
print(srs)
  index    value
-------  -------
      9        1
     10        2
     11        3
srs.reset_index()
srs
object id: 1891392288752
data:
[1, 2, 3]
index:
[0, 1, 2]
srs = rc.Series([1, 2, 3], index=[9, 10, 11], index_name='new name')
print(srs)
  new name    value
----------  -------
         9        1
        10        2
        11        3
srs.reset_index()
print(srs)
  index    value
-------  -------
      0        1
      1        2
      2        3

Sorted Series

Series will be set to sorted by default if no index is given at initialization. If an index is given at initialization then the parameter sorted must be set to True

srs = rc.Series([3, 5, 4], index=[12, 15, 14], sort=True)

When sorted=True on initialization the data will be sorted by index to start

srs.show()
  index    value
-------  -------
     12        3
     14        4
     15        5
srs[16] = 9
print(srs)
  index    value
-------  -------
     12        3
     14        4
     15        5
     16        9
srs.set(indexes=13, values=3.5)
print(srs)
  index    value
-------  -------
     12      3
     13      3.5
     14      4
     15      5
     16      9

List or BList

The underlying data structure can be either blist (default) or list

# Construct with blist=True, the default
srs_blist = rc.Series([1, 2, 3], index=[5, 6, 7], use_blist=True)
# see that the data structures are all blists
srs_blist.data
blist([1, 2, 3])
srs_blist.index
blist([5, 6, 7])
# now construct as blist = False and they are all lists
srs_list = rc.Series([1, 2, 3], index=[5, 6, 7], use_blist=False)
srs_list.data
[1, 2, 3]
srs_list.index
[5, 6, 7]