Skip to main content
Version: Python

Ultimate cheat sheet

Neccesary data

The named tables in these lines are used throughout this page and should be run first.

Create static tables

from deephaven import empty_table
from deephaven.time import to_datetime
import random
import string

letters_upp = string.ascii_uppercase

def get_random_string(length):
# choose from all lowercase letter
letters = string.ascii_letters
result_str = ''.join(random.choice(letters) for i in range(length))
return(result_str)

static_source_1 = empty_table(26300)\
.update(['X = ii', 'Double1 = randomDouble(1, 100)', 'Int1 = randomInt(1, 100)', 'Timestamp = convertDateTime(`2016-01-01T01:00 NY`) + i * HOUR', 'Date = Timestamp.toDateString()', 'String1 = (java.lang.String)random.choice(letters_upp)', 'String2 = (java.lang.String)get_random_string(random.randint(1,4))'])

static_source_2 = empty_table(26300)\
.update(['X = ii', 'Double2 = randomDouble(1, 80)', 'Int2 = randomInt(1, 100)', \
'Timestamp = convertDateTime(`2019-01-01T01:00 NY`) + i * HOUR', 'String3 = (java.lang.String)random.choice(letters_upp)', 'String4 = (java.lang.String)get_random_string(random.randint(1,4))'])

Create ticking tables

from deephaven import time_table

ticking_source = time_table("00:00:01")\
.update(['X = ii', 'Double3 = randomDouble(1, 100)', 'Int3 = randomInt(1, 100)', \
'Date = Timestamp.toDateString()', 'String5 = (java.lang.String)random.choice(letters_upp)', 'String6 = (java.lang.String)get_random_string(random.randint(1,4))'])

Merge tables

To merge tables, the schema must be identical: same column names, same column data types. This applies to both static and updating tables.

from deephaven import merge

copy1 = static_source_1
merge_same_static = merge([static_source_1, copy1])

copy_updating_1 = ticking_source
copy_updating_2 = ticking_source
merge_same_dynamic = merge([copy_updating_1, copy_updating_2])

# one can merge static and ticking tables
static_source_1v2 = static_source_1.view(["Date", "Timestamp", "SourceRowNum = X"])
ticking_source_v2 = ticking_source.view(["Date", "Timestamp", "SourceRowNum = X"])
merge_static_and_dynamic = merge([static_source_1v2, ticking_source_v2])

View metadata of table

Metadata shows the column names, data types, partitions, and groups for the table.

static_1_meta = static_source_1.meta_table

Filter

Most queries benefit by starting with filters. Less data generally means better performance.

For SQL developers: in Deephaven, Joins are not a primary operation for filtering. Use where, where_in, and where_not_in.

note

Backticks [`] are used for strings and single quotes ['] are used for timestamps and characters

Date & Time examples

from deephaven.time import to_datetime

todays_data_1 = ticking_source.where(["Date = currentDay()"]) # Date for this is a String
todays_data_2 = ticking_source.where(["Date = currentDateNy()"]) # popular for NY
todays_data_3 = ticking_source.where(["Date = currentDate(TZ_SYD)"]) # sydney timezone

single_string_date = static_source_1.where(["Date = `2017-01-03`"]) # HEAVILY USED!
less_than_date = static_source_1.where(["Date < `2017-01-03`"]) # use >=, etc. as well
one_month_string = static_source_1.where(["Date.startsWith(`2017-02`)"])

single_db_date = static_source_2.where(["formatDate(Timestamp, TZ_NY) = `2020-03-01`"])
after_db_datetime = static_source_1.where(["Timestamp > '2017-01-05T10:30:00 NY'"])

just_biz_time = static_source_1.where(["isBusinessTime(Timestamp)"]) # HEAVILY USED!
just_august_datetime = static_source_2.where(["monthOfYear(Timestamp, TZ_NY) = 8"])
just_10am_hour = static_source_1.where(["hourOfDay(Timestamp, TZ_NY) = 10"])
just_tues = static_source_2.where(["dayOfWeek(Timestamp).getValue() = 2"]) # Tuesdays

time1 = to_datetime('2017-08-21T09:45:00 NY')
time2 = to_datetime('2017-08-21T10:10:00 NY')
trades = static_source_1.where(["inRange(Timestamp, time1, time2)"])

# Experienced Python users might prefer to use Python3 methods for casting instead
time1 = '2017-08-21T09:45:00 NY'
time2 = '2017-08-21T12:10:00 NY'
trades = static_source_1.where([f"inRange(Timestamp, '{time1}', '{time2}')"])

String examples

one_string_match = static_source_1.where(["String1 = `A`"]) # match filter

string_set_match = static_source_1.where(["String1 in `A`, `M`, `G`"])
case_insensitive = static_source_1.where(["String1 icase in `a`, `m`, `g`"])
not_in_example = static_source_1.where(["String1 not in `A`, `M`, `G`"]) # see "not"

contains_example = static_source_1.where(["String2.contains(`P`)"])
not_contains_example = static_source_1.where(["!String2.contains(`P`)"])
starts_with_example = static_source_1.where(["String2.startsWith(`A`)"])
ends_with_example = static_source_1.where(["String2.endsWith(`M`)"])

Number examples

equals_example = static_source_2.where(["round(Int2) = 44"])
less_than_example = static_source_2.where(["Double2 < 8.42"])
some_manipulation = static_source_2.where(["(Double2 - Int2) / Int2 > 0.05"])
mod_example_1 = static_source_2.where(["i % 10 = 0"]) # every 10th row
mod_example_2 = static_source_2.where(["String4.length() % 2 = 0"])
# even char-count Tickers

Multiple filters

conjunctive_comma = static_source_1.where(["Date = `2017-08-23`", "String1 = `A`"])
# HEAVILY USED!
conjunctive_ampersand = static_source_1.where(["Date = `2017-08-23` && String1 = `A`"])

disjunctive_same_col = static_source_1.where(["Date = `2017-08-23`|| Date = `2017-08-25`"])
disjunctive_diff_col = static_source_1.where(["Date = `2017-08-23` || String1 = `A`"])

range_lame_way = static_source_1.where(["Date >= `2017-08-21`", "Date <= `2017-08-23`"])
in_range_best = static_source_1.where(["inRange(Date, `2017-08-21`, `2017-08-23`)"])
# HEAVILY USED!

in_range_best_string = static_source_1.where("inRange(String2.substring(0,1), `A`, `D`)")
# starts with letters A - D

where_in/where_not_in

# run these two queries together
string_set_driver = static_source_1.rename_columns(cols=["String4 = String2"]).head(10)
where_in_example_1 = static_source_2.where_in(string_set_driver, ["String4"]) # HEAVILY USED!

where_in_example_2 = static_source_2.where_in(static_source_1, ["String4 = String2"])
# Ticker in static_source_2 within list of USym of static_source_1

where_not_in_example = static_source_2.where_not_in(static_source_1, ["String4 = String2"])
# see the "not"

Nulls and NaNs

null_example = static_source_1.where(["isNull(Int1)"]) # all data types supported
not_null_example = static_source_1.where(["!isNull(Int1)"])

nan_example = static_source_1.where(["isNaN(Int1)"])
not_nan_example = static_source_1.where(["!isNaN(Int1)"])

Head and Tail

first_10k_rows = static_source_2.head(10_000)
ticking_last_rows = ticking_source.tail(100)

first_15_percent = ticking_source.reverse().head_pct(0.15) # the first 15% of rows
last_15_percent = static_source_2.tail_pct(0.15) # the last 15% of rows

first_10_by_key = static_source_1.head_by(10, ["String1"]) # first 20 rows for each String1-key
last_10_by_key = static_source_1.tail_by(20, ["String1"]) # first 20 rows for each String1-key

Sort

# works for all data types

sort_time_v1 = static_source_1.sort(order_by=['Timestamp']) # sort ascending
sort_time_v2 = static_source_1.sort("Double1")

sort_numbers = static_source_1.sort_descending(['Int1']) # sort descending
sort_strings = static_source_1.sort_descending(['String2'])

from deephaven import SortDirection
sort_time_v1 = static_source_1.sort_descending(order_by=['String1', 'Timestamp']) # multiple
sort_multi = static_source_1.sort("Int1", "String1")

sort_multiple_col = [
SortDirection.ASCENDING,
SortDirection.DESCENDING
]

multi_sort = static_source_1.sort(['Int1', 'Double1'], sort_multiple_col) # Int ascending then Double descending
tip

Reversing tables is faster than sorting, and ften used in UIs for seeing appending rows at top of table.

reverse_table = static_source_1.reverse()
reverse_updating = ticking_source.reverse()

Select and create new columns

Option 1: Choose and add new columns - Calculate and write to memory

Use select and update when it is expensive to caperform a calculation or in cases where the result will be accessed frequently.

select_columns = static_source_2.select(["Timestamp", "Int2", "Double2", "String3"])
# constrain to only those 4 columns, write to memory
select_add_col = static_source_2.select(["Timestamp", "Int2", "Double2", "String3", "Difference = Double2 - Int2"])
# constrain and add a new column calculating

select_and_update_col = static_source_2.select(["Timestamp", "Int2", "Double2", "String3"])\
.update("difference = Double2 - Int2")
# add a new column calculating - logically equivalent to previous example

Option 2: Choose and add new columns - Reference a formula and calculate on the fly

Use view and update_view when formula is quick or only a portion of the data is used at a time. Minimizes RAM used.

view_columns = static_source_2.view(["Timestamp", "Int2", "Double2", "String3"])
# similar to select(), though uses on-demand formula
view_add_col = static_source_2.view(["Timestamp", "Int2", "Double2", "String3", "Difference = Double2 - Int2"])
# view set and add a column, though with an on-demand formula

view_and_update_view_col = static_source_2\
.view(["Timestamp", "Int2", "Double2", "String3"])\
.update_view(["Difference = Double2 - Int2"])
# logically equivalent to previous example

Option 3: Add new columns - Reference a formula and calculate on the fly

Use lazy_update when there are a small-ish number of unique values; on-demand formula results are stored in cache and re-used.

lazy_update_example = static_source_2.lazy_update(["Timestamp", "Int2", "Double2", "String3", "Difference = Double2 - Int2"])

Getting the row number

get_row_num = static_source_2.update_view(["RowNum_int = i", "RowNum_long = ii"])

Do math

#should use .update() when working with random numbers
simple_math = static_source_2.update(["RandomNumber = Math.random()", \
"RandomInt100 = new Random().nextInt(100)", \
"Arithmetic = Int2 * Double2", \
"SigNum = Math.signum(RandomNumber - 0.5)", \
"Signed = SigNum * Arithmetic", \
"AbsDlrVolume = abs(Signed)", \
"Power = Math.pow(i, 2)", \
"Exp = Double2 * 1E2", \
"Log = Double2 * log(2.0)", \
"Round = round(Double2)", \
"Floor = floor(Double2)", \
"Mod = RandomInt100 % 5", \
"CastInt = (int)AbsDlrVolume", \
"CastLong = (long)AbsDlrVolume"])

Handle arrays

array_examples = static_source_2.update_view(["RowNumber = i", \
"PrevRowReference = Double2_[i-1]", \
"MultipleRowRef = Double2_[i-2] - Double2_[i-1]", \
"SubArray = Double2_.subVector(i-2, i+1)", \
"SumSubArray = sum(Double2_.subVector(i-2, i+1))", \
"ArraySize = SubArray.size()", \
"ArrayMin = min(SubArray)", \
"ArrayMax = max(SubArray)", \
"ArrayMedian = median(SubArray)", \
"ArrayAvg = avg(SubArray)", \
"ArrayStd = std(SubArray)", \
"ArrayVar = var(SubArray)", \
"ArrayLast = last(SubArray)", \
"ArrayIndex = SubArray[1]", \
"InArray = in(45.71, SubArray)"])

Create a slice or sub-vector

slice_and_rolling = static_source_2.update(formulas=["SubVector = Int2_.subVector(i-2,i+1)",\
"RollingMean = avg(SubVector)", \
"RollingMedian =median(SubVector)", \
"RollingSum = sum(SubVector)"])

Manipulate time and calenders

time_stuff = static_source_2.view(["Timestamp", \
"CurrentTime = currentTime()", \
"CurrentDateDefault = currentDay()", \
"CurrentDateNy = currentDateNy()", \
"CurrentDateLon = currentDate(TZ_LON)", \
"LastBizDateNy = lastBusinessDateNy()", \
"IsAfter = CurrentTime > Timestamp", \
"IsBizDay = isBusinessDay(CurrentDateLon)", \
"StringDT = format(Timestamp, TZ_NY)", \
"StringDate = formatDate(Timestamp, TZ_NY)", \
"StringToTime = convertDateTime(StringDate + `T12:00 NY`)", \
"AddTime = Timestamp + '05:11:04.332'", \
"PlusHour = Timestamp + HOUR", \
"LessTenMins = Timestamp - 10 * MINUTE", \
"DiffYear = diffYear(Timestamp, currentTime())", \
"DiffDay = diffDay(Timestamp, CurrentTime)", \
"DiffNanos = PlusHour - Timestamp", \
"DayWeek = dayOfWeek(Timestamp, TZ_NY)", \
"HourDay = hourOfDay(Timestamp, TZ_NY)", \
"DateAtMidnight = dateAtMidnight(Timestamp, TZ_NY)", \
"IsBizDayString = isBusinessDay(StringDate)", \
"IsBizDayDatetime = isBusinessDay(Timestamp)", \
"IsBizTime = isBusinessDay(Timestamp)", \
"FracBizDayDone = fractionOfBusinessDayComplete(currentTime())", \
"FracBizDayOpen = fractionOfBusinessDayRemaining(currentTime())", \
"NextNonBizDay = nextNonBusinessDay()", \
"NextBizDayCurrent = nextBusinessDay()", \
"NextBizDayString = nextBusinessDay(StringDate)", \
"NextBizDayDatetime = nextBusinessDay(Timestamp)", \
"PlusFiveBizDayCurrent = nextBusinessDay(5)", \
"PlusFBizDayString = nextBusinessDay(StringDate, 5)", \
"PlusFBizDayDatetime = nextBusinessDay(Timestamp, 5)", \
"PrevBizDayCurrent = previousBusinessDay()", \
"PrevBizDayString = previousBusinessDay(StringDate)", \
"PrevBizDayDatetime = previousBusinessDay(Timestamp)", \
"MinusFiveBizDayCurrent = previousBusinessDay(5)", \
"MinusFiveBizDayString = previousBusinessDay(StringDate, 5)", \
"MinusFiveBizDayDatetime = previousBusinessDay(Timestamp, 5)", \
"BizDayArray = businessDaysInRange(Timestamp, currentTime())", \
"NonBizDayArray = nonBusinessDaysInRange(Timestamp, currentTime())", \
"DiffBizDaysCount = numberOfBusinessDays(Timestamp, currentTime())", \
"DiffBizDaysExact = diffBusinessDay(Timestamp, currentTime())", \
"DiffBizDaysString = numberOfBusinessDays(MinusFiveBizDayString, StringDate)", \
"StandardBizDayNanos = standardBusinessDayLengthNanos()", \
"DiffBizSecs = diffBusinessNanos(CurrentTime, CurrentTime + 5 * DAY) / 1E9", \
"LastBizOfMonth = isLastBusinessDayOfMonth(StringDate)", \
"LastBizOfWeek = isLastBusinessDayOfWeek(currentTime())"])

Bin data

Binning is a great pre-step for aggregating to support the down-sampling or other profiling of data.

bins = ticking_source.update_view(["PriceBin = upperBin(Double3, 5)", \
"SizeBin = lowerBin(Int3, 10)", \
"TimeBin = upperBin(Timestamp, '00:02:00')"]).reverse()
agg_bin = bins.view(["TimeBin", "Total = Int3 * Double3"]).sum_by(["TimeBin"])

Manipulate strings

string_stuff = static_source_2.view(["StringDate = formatDate(Timestamp, TZ_NY)", \
"String4","Double2", \
"NewString = `new_string_example_`", \
"ConcatV1 = NewString + String4", \
"ConcatV2 = NewString + `Note_backticks!!`", \
"ConcatV3 = NewString.concat(String4)", \
"ConcatV4 = NewString.concat(`Note_backticks!!`)", \
"StartBool = String4.startsWith(`M`)", \
"NoEndBool = !String4.endsWith(`G`)", \
"ContainedBool = String4.contains(`AA`)", \
"NoContainBool = !String4.contains(`AA`)", \
"FirstChar = String4.substring(0,1)", \
"LengthString = String4.length()", \
"CharIndexPos = ConcatV1.charAt(19)", \
"SubstringEx = ConcatV1.substring(11,20)", \
"FindIt = NewString.indexOf(`_`)", \
"FindItMiddle = NewString.indexOf(`_`, FindIt + 1)", \
"FindLastOf = NewString.lastIndexOf(`_`)", \
"SplitToArrays = NewString.split(`_`)", \
"SplitWithMax = NewString.split(`_`, 2)", \
"SplitIndexPos = NewString.split(`_`)[1]", \
"LowerCase = String4.toLowerCase()", \
"UpperCase = NewString.toUpperCase()", \
"DoubleToStringv1 = Double2 + ``", \
"DoubleToStringv2 = String.valueOf(Double2)", \
"DoubleToStringv3 = Double.toString(Double2)", \
"StringToDoublev1 = Double.valueOf(DoubleToStringv1)"])

Use Ternaries (If-Thens)

if_then_example_1 = static_source_2\
.update_view(["SimpleTernary = Double2 < 50 ? `smaller` : `bigger`"])

if_then_example_2 = static_source_2\
.update_view(["TwoLayers = Int2 <= 20 ? `small` : Int2 < 50 ? `medium` : `large`"])

Create and use a custom function

 ## Create and Use a custom function
def mult(a,b):
return a*b
py_func_example = static_source_2.update(["M_object = mult(Double2,Int2)", "M_double = (double)mult(Double2,Int2)"])

Manipulate columns

unique_values = static_source_2.select_distinct("String4") # show unique set
# works on all data types - careful with doubles, longs
unique_values_combo = static_source_2.select_distinct(["Timestamp", "String4"])
# unique combinations of Time with String4

rename_stuff = static_source_2.rename_columns(["Symbol = String4", "Price = Double2"])
drop_column = static_source_2.drop_columns(["X"]) # same for drop one or many
drop_columns = static_source_2.drop_columns(["X", "Int2", "String3"])

put_cols_at_start = static_source_2.move_columns_up(["String4", "Int2"]) # Makes 1st col(s)
put_cols_wherever = static_source_2.move_columns(1, ["String4", "Int2"])
# makes String4 the 2nd and Int2 the 3rd column

Group and aggregate

Simple grouping

group_to_arrays = static_source_1.group_by(["String2"])
# one row per key (i.e. String2), all other columns are arrays
group_multiple_keys = static_source_1.group_by(["String1", "Date"])
# one row for each key-combination (i.e. String1-Date pairs)

Ungrouping

# Run these lines together
agg_by_key = static_source_1.group_by(["Date"])
# one row per Date, other fields are arrays from static_source_1
ungroup_that_output = agg_by_key.ungroup() # no arguments usually
# each array value becomes its own row
# in this case turns aggByDatetimeKey back into static_source_1

Aggregations

count_of_entire_table = static_source_1.count_by("Count") # single arg returns tot count, you choose name of column output
count_of_group = static_source_1.count_by("N", ["String1"]) # N is naming choice

first_of_group = static_source_1.first_by(["String1"])
last_of_group = static_source_1.last_by(["String1"])

sum_of_group = static_source_1.view(["String1", "Double1"]).sum_by(["String1"])
# non-key field(s) must be numerical
avg_of_group = static_source_1.view(["String1", "Double1", "Int1"]).avg_by(["String1"])
std_of_group = static_source_1.view(["String1", "Double1", "Int1"]).std_by(["String1"])
var_of_group = static_source_1.view(["String1", "Double1", "Int1"]).var_by(["String1"])
median_of_group = static_source_1.view(["String1", "Double1", "Int1"]).median_by(["String1"])
min_of_group = static_source_1.view(["String1", "Double1", "Int1"]).min_by(["String1"])
max_of_group= static_source_1.view(["String1", "Double1", "Int1"]).max_by(["String1"])
# combine aggregations in a single method (using the same key-grouping)

Join

For SQL developers: Joins in Deephaven are used to extend result sets by joining data from other tables, not as much for filtering.

Joins that get used a lot

Natural Join

important

The right table of the join needs to have only one match based on the key(s).

# run these together
last_by_key_table = static_source_1.view(["String1", "Int1", "Timestamp"])\
.last_by(["String1"])\
.rename_columns(["Renamed = Int1", "RenamedTime = Timestamp"])

# creates 3-column table of String1 + last record of the other 2 cols
join_with_last_price = static_source_1.natural_join(last_by_key_table, ["String1"])
# arguments are (rightTableOfJoin, "JoinKey(s)")
# will have same number of rows as static_source_1 (left table)
# brings all non-key columns from last_price_table to static_source_1
# HEAVILY USED!

specify_cols_from_r = static_source_1.natural_join(last_by_key_table, ["String1"], ["Renamed"])
# nearly identical to joinWithLastPrice example
# args are (rightTableOfJoin, "JoinKey(s)", "Cols from R table")

rename_cols_on_join = static_source_1\
.natural_join(last_by_key_table, ["String1"], ["RenamedAgain = Renamed, RenamedTime"])
# nearly identical to specify_cols_from_r example
# can rename column on the join
# sometimes this is necessary…
# if there would be 2 cols of same name
# Note the placement of quotes

keys_of_diff_names = static_source_2.view(["String3", "Double2"])\
.natural_join(last_by_key_table, ["String3 = String1"])
# note that String2 in the L table is mapped as the key to String1 in R
# this result has many null records, as there are many String2
# without matching String1

Multiple Keys

# run these together
last_price_two_keys = static_source_1.view(["Date", "String1", "Int1"])\
.last_by(["Date", "String1"])
# creates 3-column table of LastDouble for each Date-String1 pair
nat_join_two_keys = static_source_1\
.natural_join(last_price_two_keys, ["Date, String1", "Int1"])
# arguments are (rightTableOfJoin, "JoinKey1, JoinKey2", "Col(s)")
# Note the placement of quotes

AJ (As-Of Join)

As-of joins are time series joins. They can also be used with other ordered numerics as the join key(s). It is often wise to make sure the Right-table is sorted (based on the key). aj is designed to find "the exact match" of the key or "the record just before". For timestamp aj-keys, this means "that time or the record just before".

Reverse As-of joins raj find "the exact match" of the key or "the record just after". For timestamp reverse aj-keys, this means "that time or the record just after".

from deephaven import time_table
ticking_source_2 = time_table("00:00:10.500")\
.update(['X = ii', 'Double4 = randomDouble(1, 100)', 'Int4 = randomInt(1, 100)', \
'Date = Timestamp.toDateString()', 'String7 = (java.lang.String)random.choice(letters_upp)', 'String8 = (java.lang.String)get_random_string(random.randint(1,4))'])

aj_join = ticking_source_2.aj(table=ticking_source, on=["Timestamp"],joins=["String5", "String6", "JoinTime = Timestamp"])

raj_join = ticking_source_2.raj(table=ticking_source, on=["Timestamp"],joins=["String5", "String6", "JoinTime = Timestamp"])

raj_join = ticking_source_2.raj(table=ticking_source, on=["String7 = String5", "Timestamp"],joins=["String6", "JoinTime = Timestamp"])

Less common joins

# exact joins require precisely one match in the right table for each key
last_string_1aug23 = static_source_1.where(["Date = `2017-08-23`"]).last_by(["String1"])
last_string_1aug24 = static_source_1.where(["Date = `2017-08-24`"]).last_by(["String1"])\
.where_in(last_string_1aug23, ["String1"])
# filters for String1 in last_string_1aug23
exact_join_ex = last_string_1aug24.exact_join(last_string_1aug23, ["String1"], ["RenamedDouble = Double1"])


# join will find all matches from left in the right table
# if a match is not found, that row is omitted from the result
# if the Right-table has multiple matches, then multiple rows are included in result
last_ss_1 = static_source_1.last_by(["String1"]).view(["String1", "Double1"])
last_ss_2 = static_source_2.last_by(["String3"])\
.view(["String1 = String3", "RenamedDouble = Double2"])
first_ss_2 = static_source_2.first_by(["String3"])\
.view(["String1 = String3", "RenamedDouble = Double2"])

from deephaven import merge
merge_first_and_last_ss_2 = merge([first_ss_2, last_ss_2])
# the table has two rows per String1
join_example = last_ss_1.join(merge_first_and_last_ss_2, ["String1"], ["RenamedDouble"])
# note that the resultant set has two rows per String1

Use columns as arrays and cells as variables

get_a_column = static_source_1.j_object.getColumnSource("String1")
print(get_a_column)
get_a_cell = get_a_column.get(0)
print(get_a_cell)

Read and write files

It is very easy to import files and to export any table to a CSV via the UI.

CSVs and Parquety files imported from a server-side directory should be done via the script below - these are NOT working examples. The code below provides syntax, but the filepaths are unknown.

# CSV
from deephaven import read_csv

# import CSV from a URL
csv_imported = read_csv("https://media.githubusercontent.com/media/deephaven/examples/main/Iris/csv/iris.csv")

# import headerless CSV
csv_headerless = read_csv("https://media.githubusercontent.com/media/deephaven/examples/main/DeNiro/csv/deniro_headerless.csv", headless=True)

# import headerless CSV then add headings
import deephaven.dtypes as dht
header = {"Year": dht.int_, "Rotten Tom Score": dht.int_, "Title": dht.string}
csv_manual_header = read_csv("https://media.githubusercontent.com/media/deephaven/examples/main/DeNiro/csv/deniro_headerless.csv", header=header, headless=True)

# !!! WRITE CSV !!!
# export / write CSV to local /data/ directory
from deephaven import write_csv
write_csv(csv_manual_header, "/data/outputFile.csv")

# import the local CSV you just exported
csv_local_read = read_csv("/data/outputFile.csv")

# full syntax
# csv_full_monty = read_csv(path: str,
# header: Dict[str, DataType]=None,
# headless: bool=False,
# delimiter: str=",",
# quote: str="\"",
# ignore_surrounding_spaces: bool = True,
# trim: bool = False,
# charset: str = "utf-8")


# PARQUET

# write Parquet
from deephaven.parquet import write
table_sample = static_source_1.head(100)
write(table_sample, "/data/output_generic.parquet")

write(table_sample, "/data/output_gzip.parquet", compression_codec_name="GZIP")
# other compression codes are available

# read Parquet
from deephaven.parquet import read
parquet_read = read("/data/output_generic.parquet")
parquet_ok_if_zipped = read("/data/output_gzip.parquet")

Do Cum-Sum and Rolling Average

def has_others(array, target):
for x in array:
if x != target:
return True
return False
###
make_arrays= static_source_2.sort("String3")\
.update_view(["String3Array10 = String3_.subVector(i-5, i-1)", \
"Int2Array10 = Int2_.subVector(i-5, i-1)", \
"Double2Array10 = Double2_.subVector(i-5, i-1)"])
###
cum_and_roll_10 = make_arrays.update(["ArrayHasOtherString3s = \
has_others.call(String3Array10.toArray(), String3)", \
"CumSum10 = ArrayHasOtherString3s = false ? \
(int)sum(Double2Array10) : NULL_INT", \
"RollAvg10 = ArrayHasOtherString3s = false ? \
avg(Int2Array10) : NULL_DOUBLE"])

Another example of creating a rolling sum

def rolling_sum(rows, values):

# Calculate a rolling sum from a java int array
# param rows: size of the rolling sum (i.e. number of rows to sum over)
# param values:
# return:

calculations = jpy.array('int', values.size()) # create an array of integers for our rolling sum value
sum_ = 0 # our running sum ("_" at end to avoid builtin symbol sum)

for i in range(values.size()):
sum_ += values.get(i)
if i >= rows:
# subtract from the rolling sum when needed
sum_ -= values.get(i - rows)
calculations[i] = sum_

return calculations


trades = static_source_1.where("Date >= `2017-08-25`")\
.view(["String1", "Int1"])\
.head_by(100, ["String1"])

sym_groups = trades.group_by(["String1"])
sums = sym_groups.update_view(["RollingSum=(int[]) rolling_sum.call(20, Int1)"])
ungroup = sums.ungroup()

Set up an EMA

from deephaven.experimental.ema import ByEmaSimple

ema30sec = ByEmaSimple('BD_SKIP','BD_SKIP','TIME',30,'SECONDS', type='LEVEL')

ema_data = ticking_source.view(formulas=["EMA_data = ema30sec.update(Timestamp, Int3)"]).last_by()
ema_data_grouped = ticking_source\
.view(formulas=["String5", "EMA_data = ema30sec.update(Timestamp, Int3, String5)"])\
.last_by(by=["String5"]).sort(order_by=["String5"])

Use Pandas

import deephaven.pandas as dhpd
pandas_df = dhpd.to_pandas(static_source_1)

# do Pandas stuff to prove it is a Pandas Dataframe
print(type(pandas_df), pandas_df.dtypes, pandas_df.describe())

# Pandas to dh table
dh_table_again = dhpd.to_table(pandas_df)

Format tables

Datetime formatting

time_formatting = static_source_1.view(["Time1 = Timestamp", "Time2 = Timestamp", "Time3 = Timestamp", "Time4 = Timestamp", "Time5 = Timestamp", "Time6 = Timestamp"])\
.format_columns(["Time1=Date(`yyyy_MM_dd'T'HH-mm-ss.S z`)", \
"Time2 = Date(`yyyy-MM-dd'T'HH:mm t`)", \
"Time3 = Date(`HH:mm:ss`)", \
"Time4 = Date(`HH:mm:ss.SSSSSSSSS`)", \
"Time5 = Date(`EEE dd MMM yy HH:MM:ss`)", \
"Time6 = Date(`yyyy-MM-dd`)"])\
.update_view(["Time7_string = formatDate(Time6, TZ_NY)"])

Number formatting

number_formatting = static_source_1\
.view(["Double1", \
"BigNum1 = Double1 * 1000", "BigNum2 = Double1 * 1000", \
"BigNum3 = Double1 * 1000", "Price1 = Double1", "Price2 = Double1", \
"Price3 = Double1", \
"SmallNum1 = Double1 / 1000", "SmallNum2 = Double1 / 1000", \
"SmallNum3 = Double1 / 1000", "TinyNum = Double1 / 1_000_000_000"])\
.format_columns(["BigNum1 = Decimal(`###,#00`)", \
"BigNum2 = Decimal(`##0.00`)", \
"BigNum3 = Decimal(`0.####E0`)", \
"Price1 = Decimal(`###,###.##`)", \
"Price2 = Decimal(`$###,##0.00`)", \
"SmallNum1 = Decimal(`##0.000000`)", \
"SmallNum2 = Decimal(`##0.00%`)", \
"SmallNum3 = Decimal(`##0%`)", \
"TinyNum = Decimal(`0.00E0`)"])

Color formatting

# set a background color for an entire table
background_format = static_source_1\
.format_columns("* = bgfga(colorRGB(0,0,128))")

# set a color column by column
just_colors = static_source_1\
.format_columns(["X = `#90F060`", \
"Double1 = LIGHTBLUE", \
"Int1 = colorRGB(247,204,0)", \
"Timestamp = colorRGB(247,204,204)", \
"Date = bgfg(colorRGB(57,43,128),colorRGB(243,247,122))", \
"String1 = bgfga(MEDIUMVIOLETRED)", \
"String2 = colorHSL(0, 24, 36)"])

#column formatting
column_colors = static_source_1\
.update_view(["RowMod10 = i % 10"])\
.format_column_where("String1", "Int1 > 35", "DEEP_RED")\
.format_column_where("Double1", "Double1 > Double1_[i-1]", "LIMEGREEN")\
.format_columns(["Int1 = (X % 5 !=0) ? ORANGE : BLACK", \
"Date = (String1 = `A`) ? colorRGB(253, 31, 203) : colorRGB(171, 254, 42)", \
"RowMod10 = heatmap(RowMod10, 1.0, 10.0, BRIGHT_GREEN, BRIGHT_RED)"])

#row formatting
row_colors = static_source_1\
.format_row_where("Int1 > 45"," PALE_BLUE")

# one can format_columns() for numbers and colors together
number_and_color = static_source_1\
.format_columns(["Double1 = Decimal(`##0.00`)", \
"Double1 = (Double1 > Double1_[i-1]) ? FUCHSIA : GREY"])

You will find an itemization of stock colors in the docs.

Plot programmatically

Substantial documentation about plotting exists. The following example intends to show the basics, and everything about styling and labeling is omitted.

Time series plots

from deephaven.plot.figure import Figure

figure = Figure()
time_series = figure.plot_xy(series_name="Time_series", t=static_source_1.head(100), x = "Timestamp", y="Int1")\
.show()

time_series_2axis = figure.plot_xy(series_name="Int1", t=static_source_1.where(["X < 100", "Int1 < 70 "]), x="Timestamp", y="Int1")\
.x_twin()\
.plot_xy(series_name="Double1", t=static_source_1.where(["X < 100", "Double1 > 30"]), x="Timestamp", y="Double1")\
.show()

time_series_3axis = figure.plot_xy(series_name="Int1", t=static_source_1.where(["X < 25"]), x="Timestamp", y="Int1")\
.x_twin()\
.plot_xy(series_name="Double1", t=static_source_1.where(["X < 25"]), x="Timestamp", y="Double1")\
.x_twin()\
.plot_xy(series_name="DoubleN", t=static_source_1.where(["X < 25"]).update(["DoubleN = pow(Double1,2)"]), x="Timestamp", y="DoubleN")\
.show()

Bar chart

from deephaven.plot import Figure, PlotStyle

figure = Figure()
bar_chart = figure.axes(plot_style=PlotStyle.BAR)\
.plot_xy(series_name="Int1", t=static_source_1.where('X < 41'), x="Timestamp", y="Int1").show()

Plot-by-some key

from deephaven.plot import Figure

figure = Figure()
t_pb = empty_table(100).update_view(formulas = ["X=i", "Z=i%3", "Y=X*X + Z * 10000", "Series_Id = `series_` + Z"]).move_columns(idx=0, cols=["Series_Id"])

table_3_keys = static_source_1.head(2000).where("String1 in `A`, `B`, `C`")\
.sort("Int1").update(["New_Row = i", "Y = Int1 * (String1 = `A` ? 2 : String1 = `B` ? 0.5 : 1.0)"])
p = figure.plot_xy(series_name="partitioned", t=table_3_keys, by=["String1"], x="New_Row", y="Y").show()

Histogram

from deephaven.plot import Figure

figure = Figure()
histogram = figure.plot_xy_hist(series_name="Histogram Values", t=static_source_1.where('X < 41'), x="Int1", nbins=10)\
.chart_title(title="Histogram of Values")\
.show()

Area graph

from deephaven.plot import PlotStyle, Figure

figure = Figure()
area_graph = figure.axes(plot_style=PlotStyle.AREA)\
.plot_xy(series_name="Int_col", t=static_source_1.where('X < 41'), x="Timestamp", y="Int1").show()

Stacked Area

from deephaven.plot import PlotStyle, Figure
figure = Figure()
stacked_area_graph = figure.axes(plot_style=PlotStyle.AREA)\
.plot_xy(series_name="Series_1", t=static_source_1.where(["X < 1000", "String1 = `A`"]), x="Timestamp", y="Double1")\
.plot_xy(series_name="Series_2", t=static_source_1.where(["X < 1000", "String1 = `B`"]), x="Timestamp", y="Double1")\
.show()

Scatter

from deephaven.plot.figure import Figure
from deephaven.plot import PlotStyle
from deephaven.plot import Color, Colors
from deephaven.plot import font_family_names, Font, FontStyle, Shape

figure = Figure()
plotXYScatter = figure\
.plot_xy(series_name="Double1", t=static_source_1.head(50), x="Timestamp", y="Double1")\
.axes(plot_style=PlotStyle.SCATTER)\
.point(shape=Shape.SQUARE, size=10, label="Big Point", color=Colors.RED)\
.x_twin()\
.axes(plot_style=PlotStyle.SCATTER)\
.plot_xy(series_name="ii", t=static_source_1.head(50), x="Timestamp", y="X")\
.point(shape=Shape.DIAMOND, size=16, label="Big Triangle",color=Colors.BLUE )\
.show()