Skip to main content

Table operations cheat sheet

Create tables#

Empty tables#

from deephaven.TableTools import emptyTableresult = emptyTable(5)
# Empty tables are often followed with a formularesult1 = result.update("X = 5")

New tables#

Columns are created using the following methods:

from deephaven.TableTools import newTable, intCol, stringCol
result = newTable(    intCol("IntegerColumn", 1, 2, 3),    stringCol("Strings", "These", "are", "Strings"))

Time tables#

The following code makes a timeTable that updates every second.

from deephaven.TableTools import timeTable
result = timeTable("00:00:01")

Filter#

tip

You should filter your data before performing other operations to optimize performance. Less data generally means better, faster queries.

where and whereOneOf#

tip

For SQL developers: In Deephaven, filter your data before joining using the where and whereOneOf operations. Deephaven is optimized for filtering rather than matching. Use where and whereOneOf.

from deephaven.TableTools import newTable, stringCol, intCol, doubleColfrom deephaven.conversion_utils import NULL_INT
source = newTable(    stringCol("Letter", "A", "C", "F", "B", "E", "D", "A"),    intCol("Number", NULL_INT, 2, 1, NULL_INT, 4, 5, 3),    stringCol("Color", "red", "blue", "orange", "purple", "yellow", "pink", "blue"),    intCol("Code", 12, 13, 11, NULL_INT, 16, 14, NULL_INT),)
resultSingleFilter = source.where("Color = `blue`")resultOR = source.whereOneOf("Color = `blue`", "Number > 2") # OR operation - result will have _either_ criteriaresultAND = source.where("Color = `blue`", "Number > 2") # AND operation - result will have _both_ criteria

To filter results based on a filterTable:

filterTable = newTable(    stringCol("Colors", "blue", "red", "purple", "white"))
# returns a new table containing rows from the source tableresult = source.whereIn(filterTable, "Color = Colors")result = source.whereIn(filterTable, "Color = Colors", "Code = Codes") # AND operation - result will have both criteriaresult = source.whereNotIn(filterTable, "Color = Colors")

head and tail#

Used to reduce the number of rows:

result = source.tail(5) # returns last 5 rowsresult = source.tailPct(0.25) # returns last 25% of rowsresult = source.headPct(0.75)  # returns first 75% of rowsresult = source.head(2) # returns first 2 rows

Join data#

See our guide Choose the right join for more details.

tip

For SQL developers: in Deephaven, joins are normally used to enrich a data set, not filter. Use the where or whereOneOf functions to filter your data instead of using a join.

Joins for close matches (time)#

aj (As-Of Join)#

aj

As-of joins aj find "the exact match" of the key or "the record just before". For timestamp aj-keys, this means "that time or the record just before".

leftTable = rightTable.aj(columnsToMatch, columnsToAdd)

from deephaven.TableTools import newTable, stringCol, intCol, doubleCol, dateTimeColfrom deephaven.DBTimeUtils import convertDateTime
trades = newTable(    stringCol("Ticker", "AAPL", "AAPL", "AAPL", "IBM", "IBM"),    dateTimeCol("Timestamp", convertDateTime("2021-04-05T09:10:00 NY"), convertDateTime("2021-04-05T09:31:00 NY"), convertDateTime("2021-04-05T16:00:00 NY"), convertDateTime("2021-04-05T16:00:00 NY"), convertDateTime("2021-04-05T16:30:00 NY")),    doubleCol("Price", 2.5, 3.7, 3.0, 100.50, 110),    intCol("Size", 52, 14, 73, 11, 6))
quotes = newTable(    stringCol("Ticker", "AAPL", "AAPL", "IBM", "IBM", "IBM"),    dateTimeCol("Timestamp", convertDateTime("2021-04-05T09:11:00 NY"), convertDateTime("2021-04-05T09:30:00 NY"), convertDateTime("2021-04-05T16:00:00 NY"), convertDateTime("2021-04-05T16:30:00 NY"), convertDateTime("2021-04-05T17:00:00 NY")),    doubleCol("Bid", 2.5, 3.4, 97, 102, 108),    intCol("BidSize", 10, 20, 5, 13, 23),    doubleCol("Ask", 2.5, 3.4, 105, 110, 111),    intCol("AskSize", 83, 33, 47, 15, 5),)
result = trades.aj(quotes, "Ticker, Timestamp")

raj (Reverse As-Of Join)#

raj

Reverse As-of joins raj find "the exact match" of the key or "the record just after". For timestamp reverse aj-keys, this means "that time or the record just after".

leftTable = rightTable.raj(columnsToMatch, columnsToAdd)

result = trades.raj(quotes, "Ticker, TradeTime = QuoteTime", "Bid, Offer = Ask")

Joins with exact match#

nj (Natural Join)#

naturalJoin

  • Returns all the rows of the left table, along with up to one matching row from the right table.
  • If there is no match in the right table for a given row, nulls will appear for that row in the columns from the right table.
  • If there are multiple matches in the right table for a given row, the query will fail.

leftTable.naturalJoin(rightTable, columnsToMatch, columnsToAdd)

note

The right table of the join needs to have only one match based on the key(s).

result = leftTable.naturalJoin(rightTable, "DeptID", "DeptName, DeptTelephone = Telephone")

join#

Similar to SQL inner join, join returns all rows that match between the left and right tables, potentially with duplicates.

  • Returns only matching rows.
  • Multiple matches will have duplicate values, which can result in a long table.

leftJoin#

leftJoin

  • Returns all rows of leftTable.
  • If there are no matching keys, appended row values are NULL.
  • If row A on the left matches rows B and C on the right, we get a new row with “A, [B,C]” where the matches on the right get added into an array together with multiple matches.

exactJoin#

exactJoin

  • Returns all rows of leftTable.
  • If there are no matching keys result will fail.
  • Multiple matches will fail.

Merge tables#

Create a new table made of all of table 1, followed by all of table 2, etc. All tables must have the same column names (schema) when merged.

from deephaven.TableTools import merge, newTable, col
source1 = newTable(col("Letter", "A", "B", "D"), col("Number", 1, 2, 3))source2 = newTable(col("Letter", "C", "D", "E"), col("Number", 14, 15, 16))source3 = newTable(col("Letter", "E", "F", "A"), col("Number", 22, 25, 27))
tableArray = [source1, source2, source3]
result = merge(tableArray)

View table metadata#

Useful to make sure schema matches before merging. Shows the column names, data types, partitions, and groups for the table.

seeMetadata = source.getMeta()

Sort#

Single direction sorting:

Sort on multiple column or directions:

Reverse the order of rows in a table:

from deephaven import SortPairfrom deephaven.TableTools import newTable, stringCol, intColfrom deephaven.conversion_utils import NULL_INT
source = newTable(    stringCol("Letter", "A", "C", "F", "B", "E", "D", "A"),    intCol("Number", NULL_INT, 2, 1, NULL_INT, 4, 5, 3),    stringCol("Color", "red", "blue", "orange", "purple", "yellow", "pink", "blue"),    intCol("Code", 12, 13, 11, NULL_INT, 16, 14, NULL_INT),)
resultMultiDirection = source.sort(SortPair.ascending("Letter"), SortPair.descending("Number"))resultMultiSort = source.sort("Letter", "Number") # Letter then TimesortDesc = source.sortDescending("Number") # highest to lowestsortAsc = source.sort("Number") #  lowest to highestreverseTable = source.reverse() # HEAVILY USED! Very cheap to support GUIs

Select and create new columns#

Option 1:  Choose and add new columns - calculate and write to memory

Use select and update when data is expensive to calculate or accessed frequently. Results are saved in RAM for faster access, but takes more memory.

from deephaven.TableTools import newTable, stringCol, intColfrom deephaven.conversion_utils import NULL_INT
source = newTable(    stringCol("Letter", "A", "C", "F", "B", "E", "D", "A"),    intCol("Number", NULL_INT, 2, 1, NULL_INT, 4, 5, 3),    stringCol("Color", "red", "blue", "orange", "purple", "yellow", "pink", "blue"),    intCol("Code", 12, 13, 11, NULL_INT, 16, 14, NULL_INT),)
selectColumns = source.select("Letter", "Number")# constrain to only those 2 columns, write to memory
selectAddCol = source.select("Letter", "Number",\"New = Number - 5")# constrain and add a new calculated column
selectAndUpdateCol = source.select("Letter", "Number").update("New = Number - 5")# add a new calculated column - logically equivalent to previous example

Option 2:  Choose and add new columns - reference a formula and calculate on the fly

Use view and updateView when formula is quick to calculate or only a portion of the data is used at a time. Minimizes RAM used.

viewColumns = source.view("Letter", "Number")# similar to select(), but uses on-demand formula
viewAddCol = source.updateView("Letter", "Number",\"New = Number - 5")# view set and add a column with an on-demand formula
viewAndUpdateViewCol = source.view("Letter", "Number").updateView("New = Number - 5")# logically equivalent to previous example

Option 3:  Add new columns - reference a formula and calculate on the fly

Use lazyUpdate when there are a small number of unique values. On-demand formula results are stored in cache and re-used.

lazyUpdateEx = source.lazyUpdate("Letter", "Number",\    ""New = Number - 5")

Manipulate columns#

uniqueValues = source.selectDistinct("Letter") # show unique set# works on all data types - be careful with doubles, longs
renameStuff = source.renameColumns("NewLetter = Letter", "NewNumber = Number")dropColumn = source.dropColumns("Number") # drop one or many
putColsAtStart = source.moveUpColumns("Number") # make Number the first column(s)putColsWherever = source.moveColumns(1, "Number")            # make Number the second column
colDescription = source.withColumnDescription("Letter", "Underlying Letter")            # hover on column header to see description

Group#

See How to group and ungroup data for more details.

groupToArrays1 = source.by("Letter") # one row per key; all other columns are arraysmultipleKeys = source.by("Letter", "Number") # one row for each key-combination

Ungroup#

Expands out each row so that each value in any array inside that row becomes itself a new row.

aggByKey = source.by("Letter")# one row per Letter; other fields are arrays from sourceungroupThatOutput = aggByKey.ungroup() # no arguments usually    # each array value becomes its own row    # in this case turns grouped table back into source

Aggregate#

See our guides for more details:

# IMPORTANT: Any columns not in the parentheses of the whateverBy("Col1", "Col2") statement# need to be an appropriate type for that aggregation method# i.e., sums need to have all non-key columns be numbers.
firstByKey = source.firstBy("Number")firstByTwoKeys = source.firstBy("Number", "Letter") # all below work with multi
countOfEntireTable = source.countBy("Letter") # single argument returns total countcountOfGroup = source.countBy("Number", "Letter")
firstOfGroup = source.firstBy("Letter")lastOfGroup = source.lastBy("Letter")
sumOfGroup = source.view("Letter", "Number").sumBy("Letter")# non-key field must be numericalavgOfGroup = source.view("Letter", "Number").avgBy("Letter")stdOfGroup = source.view("Letter", "Number").stdBy("Letter")varOfGroup = source.view("Letter", "Number").varBy("Letter")medianOfGroup = source.view("Letter", "Number").medianBy("Letter")minOfGroup = source.view("Letter", "Number").minBy("Letter")maxOfGroup= source.view("Letter", "Number").maxBy("Letter")## Combined Aggregations# combine aggregations in a single method (using the same key-grouping)from deephaven import ComboAggregateFactory as cafcombinationAgg  = source.updateView("Number = Number * Code")\.by(caf.AggCombo(\caf.AggLast("LastNumber = Number","LastLetter = Number"),\caf.AggCount("Number"),\caf.AggSum("Sum = Number", "Code"),\caf.AggFirst("First = Number"),\caf.AggMax("Max = Number"),\caf.AggMin("Min = Number"),\caf.AggAvg("AvgNumber = Number"),\caf.AggWAvg("Number", "WtdAvgNumber = Number"),\caf.AggVar("VarNumber = Number"),\caf.AggStd("StdNumber = Number"),\caf.AggMed("MedianNumber = Number"),\caf.AggPct(0.75, "Perc75Number = Number"),\), "Code", "Letter")

Other useful methods#

note

Copy and paste these working examples into the console.

Reduce ticking frequency#

Uses snapshot to reduce the ticking frequency.

from deephaven.TableTools import timeTableimport random
source = timeTable("00:00:00.5").update("X = (int) random.randint(0, 100)", "Y = sqrt(X)")
trigger = timeTable("00:00:05").renameColumns("TriggerTimestamp = Timestamp")
result = trigger.snapshot(source)

Capture the history of ticking tables#

Uses snapshotHistory to capture the history of ticking tables.

from deephaven.TableTools import timeTableimport random
source = timeTable("00:00:00.01").update("X = i%2 == 0 ? `A` : `B`", "Y = (int) random.randint(0, 100)", "Z = sqrt(Y)").lastBy("X")
trigger = timeTable("00:00:01").renameColumns("TriggerTimestamp = Timestamp")
result = trigger.snapshotHistory(source)

Use DynamicTableWriter and Pandas#

See our guide How to write data to an in-memory, real-time table.

import osos.system("pip install pycoingecko")
from pycoingecko import CoinGeckoAPI
from deephaven.DBTimeUtils import secondsToTime, millisToTimefrom deephaven.TableTools import mergefrom deephaven import DynamicTableWriterimport deephaven.Types as dht
from time import sleep, timeimport pandas as pdimport threading

# minutes to query crypto pricestimeToWatch = 20
# secondsToSleep should be 10 or higher. If too fast, will hit request limit.secondsToSleep = 10

getHistory = False
# if getHistory = true, the days to pulldaysHistory = 90
# coins to get dataids=['bitcoin', 'ethereum','litecoin', 'dogecoin', 'tether', 'binancecoin', 'cardano', 'ripple', 'polkadot']
# array to store tables for current and previous datatableArray=[]
tableWriter = DynamicTableWriter(["coin", "dateTime", "price", "marketCap", "totalVolume"], [dht.string, dht.datetime, dht.double, dht.double, dht.double])
tableArray.append(tableWriter.getTable())
cg = CoinGeckoAPI()
# get historical dataif getHistory:    for names in ids:        coin_data_hist = cg.get_coin_market_chart_by_id(names, vs_currency = "usd", days = daysHistory)        sub = pd.DataFrame(coin_data_hist)        tableArray.append(dataFrameToTable(sub).view("dateTime = millisToTime((long)prices_[i][0])", "coin = names", "price = prices_[i][1]", "marketCap = market_caps_[i][1]", "totalVolume = total_volumes_[i][1]").moveUpColumns("dateTime", "coin"))
#add each coin data to the master tableresult = merge(tableArray).selectDistinct("dateTime", "coin", "price", "marketCap", "totalVolume").sortDescending("dateTime")
def thread_func():
    cg.get_coins_markets(vs_currency = 'usd')
    for i in range(int(timeToWatch*60/secondsToSleep)):        coin_data = cg.get_price(ids, vs_currencies = 'usd', include_market_cap = True, include_24hr_vol = True, include_24hr_change = False, include_last_updated_at = True)
        for id in ids:            #Add new data to the dynamic table            tableWriter.logRow( id, secondsToTime(int(coin_data[id]['last_updated_at'])), float(coin_data[id]['usd']), coin_data[id]['usd_market_cap'], coin_data[id]['usd_24h_vol'])
        sleep(secondsToSleep)
thread = threading.Thread(target = thread_func)thread.start()