This post describes a simple pandas style DataFrame implementation with the array calculations performed in JavaScript.
The code for this post can be found here .
The DataFrame and Series Implementations The series and data frame implementations are taken from myprevious post with minor modifications.
Type awareness The series (and therefore the data frame) will need to be aware of the type of data they contain. For simplicity I have limited the types to:
A series is constructed as follows.
1 const s1 = new Series ('height' , [1.82 , 1.73 , 1.69 , 1.92 ], 'double' )
The code for the series now looks like this.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 import arrayMethods from './array-methods' export class Series { constructor (name, array, type) { this .name = name this .array = array this .type = type return new Proxy (this , { get : (obj, prop, receiver ) => { if (prop in obj) { return Reflect .get (obj, prop, receiver) } else if (arrayMethods.has (prop)) { return (...args ) => new Series ('' , ...arrayMethods.get (prop)(obj, ...args)) } else { return Reflect .get (obj.array , prop, receiver.array ) } }, set : (obj, prop, value, receiver ) => { if (prop in obj) { return Reflect .set (obj, prop, value, receiver) } else { return Reflect .set (obj.array , prop, value, receiver.array ) } }, apply : (target, thisArgument, argumentList ) => { return Reflect .apply (target, thisArgument, argumentList) }, defineProperty : Reflect .defineProperty , getOwnPropertyDescriptor : Reflect .getOwnPropertyDescriptor , deleteProperty : Reflect .deleteProperty , getPrototypeOf : Reflect .getPrototypeOf , setPrototypeOf : Reflect .setPrototypeOf , isExtensible : Reflect .isExtensible , preventExtensions : Reflect .preventExtensions , has : Reflect .has , ownKeys : Reflect .ownKeys }) } toString () { return `(${this .name} ;${this .type} ): ${this .array.join(', ' )} ` } }
Comparing this to theprevious implementation we can see three changes (plus a minor change to toString
).
The constructor takes in an extra type
parameter.
The operations are no longer defined in the class; they are now provided by the array-methods
module.
1 import arrayMethods from './array-methods'
The last change is to modify the way the proxy finds the operator methods with arrayMethods.has(prop)
.
1 2 3 4 5 6 7 8 9 10 11 12 return new Proxy (this , { get : (obj, prop, receiver ) => { if (prop in obj) { return Reflect .get (obj, prop, receiver) } else if (arrayMethods.has (prop)) { return (...args ) => new Series ('' , ...arrayMethods.get (prop)(obj, ...args)) } else { return Reflect .get (obj.array , prop, receiver.array ) } }, ... }
Here is the implementation of the DataFrame.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 import { Series } from './Series' export class DataFrame { constructor (series) { this .series = {} for (const item of series) { this .series [item.name ] = item } return new Proxy (this , { get : (obj, prop, receiver ) => { return prop in obj ? Reflect .get (obj, prop, receiver) : Reflect .get (obj.series , prop, receiver.series ) }, set : (obj, prop, value, receiver ) => { if (prop in obj) { Reflect .set (obj, prop, value, receiver) } else { value.name = prop return Reflect .set (obj.series , prop, value, receiver.series ) } }, apply : (target, thisArgument, argumentList ) => { return target in thisArgument ? Reflect .apply (target, thisArgument, argumentList) : Reflect .apply (target, thisArgument.array , argumentList) }, defineProperty : Reflect .defineProperty , getOwnPropertyDescriptor : Reflect .getOwnPropertyDescriptor , deleteProperty : Reflect .deleteProperty , getPrototypeOf : Reflect .getPrototypeOf , setPrototypeOf : Reflect .setPrototypeOf , isExtensible : Reflect .isExtensible , preventExtensions : Reflect .preventExtensions , has : Reflect .has , ownKeys : Reflect .ownKeys }) } static fromObject (data, types) { const series = {} for (let i = 0 ; i < data.length ; i++) { for (const column in data[i]) { if (!(column in series)) { series[column] = new Series (column, new Array (data.length ), types[column]) } series[column][i] = data[i][column] } } const seriesList = Object .values (series) return new DataFrame (seriesList) } toString () { const columns = Object .getOwnPropertyNames (this .series ) let s = columns.join (', ' ) + '\n' const maxLength = Object .values (this .series ) .map (x => x.length ) .reduce ((accumulator, currentValue ) => Math .max (accumulator, currentValue), 0 ) for (let i = 0 ; i < maxLength; i++) { const row = [] for (const column of columns) { if (i < this .series [column].length ) { row.push (this .series [column][i]) } else { row.push (null ) } } s += row.join (', ' ) + '\n' } s += columns.map (column => this .series [column].type ).join (', ' ) + '\n' return s } }
The only changes are in the fromObject
helper method which takes an extra argument for the types and the toString
to print the types. A DataFrame is now constructed as follows.
1 2 3 4 5 6 7 const df = DataFrame .fromObject ( [ { col0 : 'a' , col1 : 5 , col2 : 8.1 }, { col0 : 'b' , col1 : 6 , col2 : 3.2 } ], { col0 : 'object' , col1 : 'int' , col2 : 'double' } )
Array Methods A new file has been added to create an ArrayMethods
singleton. This is used as a central store of array methods.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 class ArrayMethods { constructor () { if (!ArrayMethods .instance ) { this ._methods = {} ArrayMethods .instance = this } return ArrayMethods .instance } set (name, method) { this ._methods [name] = method } has (name) { return name in this ._methods } get (name) { return this ._methods [name] } } const instance = new ArrayMethods ()Object .freeze (instance)export default instance
The WebAssembly Calculations The calculations have been written in C following the methods describe inthis post , and this post .
Here is the code for addition.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 __attribute__((used)) int * addInt32Arrays (int *array1, int * array2, int length) { int * result = (int *) allocateMemory(length * sizeof (int )); if (result == 0 ) return 0 ; for (int i = 0 ; i < length; ++i) { result[i] = array1[i] + array2[i]; } return result; } __attribute__((used)) double * addFloat64Arrays (double * array1, double * array2, int length) { double * result = (double *) allocateMemory(length * sizeof (double )); if (result == 0 ) return 0 ; for (int i = 0 ; i < length; ++i) { result[i] = array1[i] + array2[i]; } return result; }
Note how we need one function for integers and another for doubles.
There is a makefile to build the wasm.
Marshalling JavaScript to WebAssembly I have tidied up the marshalling between JavaScript and WebAssembly. I created a class to manage the wasm functions.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 export class WasmFunctionManager { constructor (memory, allocateMemory, freeMemory) { this .memory = memory this .allocateMemory = allocateMemory this .freeMemory = freeMemory } createTypedArray (typedArrayType, length) { const typedArray = new typedArrayType ( this .memory .buffer , this .allocateMemory (length * typedArrayType.BYTES_PER_ELEMENT ), length) if (typedArray.byteOffset === 0 ) { throw new RangeError ('Unable to allocate memory for typed array' ) } return typedArray } invokeUnaryFunction (func, array, typedArrayType ) { let input = null let output = null try { input = this .createTypedArray (typedArrayType, array.length ) input.set (array) output = new typedArrayType ( this .memory .buffer , func (input.byteOffset , array.length ), array.length ) if (output.byteOffset === 0 ) { throw new RangeError ('Failed to allocate memory' ) } const result = Array .from (output) return result } finally { this .freeMemory (input.byteOffset ) this .freeMemory (output.byteOffset ) } } invokeBinaryFunction (func, lhs, rhs, typedArrayType ) { if (lhs.length !== rhs.length ) { throw new RangeError ('Arrays must the the same length' ) } const length = lhs.length let input1 = null let input2 = null let output = null try { input1 = this .createTypedArray (typedArrayType, length) input2 = this .createTypedArray (typedArrayType, length) input1.set (lhs) input2.set (rhs) output = new typedArrayType ( this .memory .buffer , func (input1.byteOffset , input2.byteOffset , length), length ) if (output.byteOffset === 0 ) { throw new RangeError ('Failed to allocate memory' ) } const result = Array .from (output) return result } finally { this .freeMemory (input1.byteOffset ) this .freeMemory (input2.byteOffset ) this .freeMemory (output.byteOffset ) } } }
This follows the ideas presented in the previous posts, but adds some error checking, and a try ... finally
clause to prevent memory leaks.
Setting up the operators To set up the operators we first need to decide whether to use the 'int'
,'double'
or 'object'
functions.
1 2 3 4 5 6 7 8 9 10 11 function chooseBestType (lhsType, rhsType ) { if (lhsType === 'int' && rhsType == 'int' ) { return 'int' } else if ( (lhsType === 'int' && rhsType === 'double' ) || (lhsType === 'double' && rhsType === 'int' )) { return 'double' } else { return 'object' } }
Once we have chosen the type we need to build a wrapper function to invoke the appropriate method. Here is the helper that makes a binary operation.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 function makeBinaryOperation (wasmFunctionManager, intFunc, doubleFunc, defaultFunc ) { return (lhs, rhs ) => { const bestType = chooseBestType (lhs.type , rhs.type ) if (bestType === 'int' ) { const result = wasmFunctionManager.invokeBinaryFunction ( intFunc, lhs.array , rhs.array , Int32Array ) return [result, bestType] } else if (bestType === 'double' ) { const result = wasmFunctionManager.invokeBinaryFunction ( doubleFunc, lhs.array , rhs.array , Float64Array ) return [result, bestType] } else { const result = defaultFunc (lhs, rhs) return [result, bestType] } } }
Lastly we create the wasm instance and register the types. Here is an edited version which demonstrates registering the addition operator.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 export async function setupWasm () { const buf = fs.readFileSync ('./src-wasm/data-frame.wasm' ) const res = await WebAssembly .instantiate (buf, {}) const { memory, allocateMemory, freeMemory, addInt32Arrays, addFloat64Arrays, } = res.instance .exports const wasmFunctionManager = new WasmFunctionManager (memory, allocateMemory, freeMemory) arrayMethods.set ( Symbol .for ('+' ), makeBinaryOperation ( wasmFunctionManager, addInt32Arrays, addFloat64Arrays, (lhs, rhs ) => lhs.array .map ((value, index ) => value + rhs.array [index]) ) ) }
Running the code We can run the code as follows.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 import { DataFrame } from './DataFrame' import { setupWasm } from './setup-wasm' function example () { 'operator-overloading enabled' const df = DataFrame .fromObject ( [ { col0 : 'a' , col1 : 5 , col2 : 8.1 }, { col0 : 'b' , col1 : 6 , col2 : 3.2 } ], { col0 : 'object' , col1 : 'int' , col2 : 'double' } ) console .log (df.toString ()) df['col3' ] = df['col1' ] + df['col2' ] console .log (df.toString ()) } async function main () { await setupWasm () example () } main ().then (() => console .log ('Done' )).catch (error => console .error (error))
Thoughts Clearly the DataFrame needs a lot of work. There is no concept of grouping, indices, dates and times, etc. However we have a working proof on concept that provides a syntactically elegant and efficient implementation.