In my previous blog post, https://stborden.wordpress.com/2021/08/27/enabling-and-use-of-the-oracle-goldengate-big-data-adapter-integrated-heartbeat/, I showed how to enable the heartbeat for the GoldenGate Big Data Adapter. In this post, we will extend this facility using python to read this data and provide lag information for the replication path over different time periods.
Python is a perfect language to iterate over the json generated by the integrated heartbeat. For ease, I have posted the script in the code block below, but it is also available on my GitHub site:
https://github.com/shane-borden/goldengate-scripts/blob/main/ogg_big_data_heartbeat_report.py
#!/bin/python3
# Python program to read ogg heartbeat history json file
import json
import time
import datetime
import os
import glob
import sys, getopt
def main(argv):
# Initialize Variables
vLagJsonDir = ''
try:
opts, args = getopt.getopt(argv,"h:j:",["jsondir="])
if len(opts) == 0:
print('Script Usage: ogg_big_data_heartbeat_report.py -j ')
sys.exit(1)
except getopt.error as err:
print('Script Usage: ogg_big_data_heartbeat_report.py -j ')
sys.exit(2)
for opt, arg in opts:
if opt == '-h':
print('Script Usage: ogg_big_data_heartbeat_report.py -j ')
sys.exit()
#elif opt in ("-j", "--jsondir"):
elif opt == '-j':
vLagJsonDir = arg
elif opt == '--jsondir':
vLagJsonDir = arg
vTotLag = 0
vTotJsonRecords = 0
vTotLag_1hour = 0
vTotJsonRecords_1hour = 0
vTotLag_4hour = 0
vTotJsonRecords_4hour = 0
vTotLag_8hour = 0
vTotJsonRecords_8hour = 0
vTotLag_24hour = 0
vTotJsonRecords_24hour = 0
now = time.mktime(datetime.datetime.now().timetuple())
if vLagJsonDir == "":
vLagJsonDir = "/u01/app/oracle/product/oggBd/19.1/gg_1/dirtmp/"
print('JSON Dir defaulted to: ' + str(vLagJsonDir))
else:
print('JSON Dir is: ' + str(vLagJsonDir))
lag_records = []
heartbeat_timestamp_records = []
replication_path_records = []
# Opening JSON file
for filename in glob.glob(vLagJsonDir + '/*-hb-[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9].json'):
#print(os.path.join(vLagJsonDir + "/", filename))
f = open(os.path.join(vLagJsonDir + "/", filename))
# returns JSON object as
# a dictionary
data = json.load(f)
# Iterating through the json
# list
for i in data['records']:
vIncomingTs = time.mktime(datetime.datetime.strptime(i['incomingHeartbeatTs'][:-3],"%Y-%m-%d %H:%M:%S.%f").timetuple())
vOutgoingTs = time.mktime(datetime.datetime.strptime(i['outgoingReplicatTs'][:-3],"%Y-%m-%d %H:%M:%S.%f").timetuple())
vIncomingHeartbeatTs = datetime.datetime.strptime(i['incomingHeartbeatTs'][:-3],"%Y-%m-%d %H:%M:%S.%f").strftime('%Y-%m-%d %H:%M')
heartbeat_timestamp_records.append(vIncomingHeartbeatTs)
#print(str(now - vOutgoingTs))
if (now - vOutgoingTs) " + i['incomingRoutingPath'] + " => " + i['incomingReplicat'] + " | " + vIncomingHeartbeatTs + " | " + str(vOutgoingTs - vIncomingTs))
replication_path_records.append(i['incomingExtract'] + " => " + i['incomingRoutingPath'] + " => " + i['incomingReplicat'])
elif (now - vOutgoingTs) <= 14400:
vTotLag_4hour = vTotLag_4hour + (vOutgoingTs - vIncomingTs)
vTotJsonRecords_4hour = (vTotJsonRecords_4hour + 1)
elif (now - vOutgoingTs) <= 28800:
vTotLag_8hour = vTotLag_8hour + (vOutgoingTs - vIncomingTs)
vTotJsonRecords_8hour = (vTotJsonRecords_8hour + 1)
elif (now - vOutgoingTs) 0:
print("Average Lag over the past hour: " + str(vTotLag_1hour // vTotJsonRecords_1hour) + " seconds")
if vTotJsonRecords_4hour > 0:
print("Average Lag over the past 4 hours: " + str(vTotLag_4hour // vTotJsonRecords_4hour) + " seconds")
if vTotJsonRecords_8hour > 0:
print("Average Lag over the past 8 hours: " + str(vTotLag_8hour // vTotJsonRecords_8hour) + " seconds")
if vTotJsonRecords_24hour > 0:
print("Average Lag over the past 24 hours: " + str(vTotLag_24hour // vTotJsonRecords_24hour) + " seconds")
print("Average Lag over the dataset (" + str(vTimeDiff) + " Days): " + str(vTotLag // vTotJsonRecords) + " seconds")
if __name__ == "__main__":
if len(sys.argv) < 1:
print('Script Usage: ogg_big_data_heartbeat_report.py -j ')
sys.exit(2)
else:
main(sys.argv[1:])
Now, to execute the script its quite easy. Just tell the script where the json files are and it will aggregate the results over the past hour, 4 hours, 8 hours, 24 hours and the entire dataset:
oracle@orcl-vbox2:/home/oracle $ ogg_big_data_heartbeat_report.py -j /u01/app/oracle/product/oggBd/19.1/gg_1/dirtmp JSON Dir is: /u01/app/oracle/product/oggBd/19.1/gg_1/dirtmp Replication Paths: E_HR_CL => P_HR_CL => RKFJSCON Combined Lag Data for Replication Paths: Average Lag over the past hour: 6.0 seconds Average Lag over the past 4 hours: 6.0 seconds Average Lag over the past 8 hours: 6.0 seconds Average Lag over the past 24 hours: 6.0 seconds Average Lag over the dataset (18.0 Days): 443446.0 seconds
I’ve only had my VM up for the past day or so, so the data for the entire dataset is skewed as there was a several day period where the VM was down.
Now you can extend this even further if you like to execute on a timely bases from OEM so that historical data can be kept or from any other scheduling tool you might have. Enjoy!
