%pylab inline

Populating the interactive namespace from numpy and matplotlib

figsize(15,10)

import pandas as pd

from datetime import datetime
from datetime import datetime as date_time

from scipy.stats.stats import pearsonr

his_columns = ['date_time', 'tp', 'dirp', 'sprp', 'tz', 'hm0', 'ti', 't1',
               'tc', 'tdw2', 'tdw1', 'tpc', 'nu','eps','qp','ss','tref','tsea',
               'bat']

hiw_columns = ['date_time','% no reception errors','hmax','tmax','h1_10',
               't1_10','h1_3','t1_3','Hav','Tav','Eps','#Waves']

def apply_date_time_index(df):
    df = df[pd.notnull(df.date_time)]
    date_times = []
    for index,date_time_string in enumerate(df['date_time'].values):
        if type(date_time_string) == type(''):
            date_time = datetime.strptime(date_time_string[:-5],
                                          "%Y-%m-%dT%H:%M:%S")
            date_times.append(date_time)
    df.index = pd.DatetimeIndex(date_times)
    return df

def process_hist(hist_df):
    hist_df = apply_date_time_index(hist_df)
    hist_df = hist_df.drop('date_time', axis = 1)
    hist_df = hist_df.resample('30Min')
    return hist_df

def read_hist_csv_and_time_index(filepath, columns= his_columns):
    his_df = pd.read_csv(filepath, names = columns)
    return process_hist(his_df)

# Reads in csv files output from Matlab DIWASP toolbox
# and outputs timestamped Pandas Dataframes

def read_diwasp_csv(file_path, date_format="%Y-%m-%dT%Hh%MZ"):
    """Function for reading csv files created by the write_csv.m matlab file
       which output file_names, Hm0 and peak period
    """
    df = pd.read_csv(file_path)
    date_time_array = []
    for date_time_string in df['file_name'].values:
        date_time_array.append(date_time.strptime(date_time_string[-21:-4],
                                                 date_format))

    df.index = pd.DatetimeIndex(date_time_array)
    return df

bragar_february_2013_rfbuoy_his = read_his_csv_and_time_index('D:\\Datawell\\Bragar_HebMarine2\\2013\\February\\Bragar_HebMarine2$}2013-02.his')
bragar_february_2013_waves_21_his = read_his_csv_and_time_index('D:\\Profiles\\le12jm\\My Documents\\toolbox-comparison-paper\\data\\W@ves21_exported_save_all_books\\Bragar_HebMarine2$}2013-02.his')

The above plot shows that there are spikes of up to 5 metres in this case where using the default his files would indicate a higher hm0.

There are also points at which the w@ves 21 exported his indicates a higher hm0 this could be an effect of time shifting.

plt.figure()
bragar_february_2013_waves_21_his.tp.plot()
bragar_february_2013_rfbuoy_his.tp.plot()
(bragar_february_2013_waves_21_his.tp - bragar_february_2013_rfbuoy_his.tp).plot()
plt.legend(['W@ve21 HIS Tp','RFBuoy HIS Tp','Tp Difference'])
plt.title('Bragar February 2013 - Peak period W@ves21 ( Save All Books ) historical spectra against RFBuoy HIS ( 10 minute offset )')
plt.ylabel('Seconds')

<matplotlib.text.Text at 0x1bfb3208>

Some of the spikes can be accounted for by the offset, where there are corresponding spikes in both directions. This could be accounted for by taking the timeseries in pandas and outputting modified raw files which match the timestamps which are provided for in the RFBuoy HIS file.

waves_21_hiw = read_hist_csv_and_time_index('D:\\Profiles\\le12jm\\My Documents\\toolbox-comparison-paper\\data\\Bragar_HebMarine2}2013-02.hiw',
                                            hiw_columns)

rfbuoy_hiw = read_hist_csv_and_time_index('D:\\Datawell\\Bragar_HebMarine2\\2013\\February\\Bragar_HebMarine2}2013-02.hiw',
                                          hiw_columns)

plt.figure()
rfbuoy_hiw.hmax.plot()
waves_21_hiw.hmax.plot()
(waves_21_hiw.hmax - rfbuoy_hiw.hmax).plot()
plt.legend(['RFBuoy hmax','W@ves21 hmax','Difference'])
plt.title('Hmax February 2013 - RFBuoy W@ves21 hiw comparison')

<matplotlib.text.Text at 0x1f33af60>

plt.figure()
rfbuoy_hiw.h1_3.plot()
waves_21_hiw.h1_3.plot()
(waves_21_hiw.h1_3 - rfbuoy_hiw.h1_3).plot()
plt.legend(['RFBuoy hm0','W@ves21 hm0','Difference'])
plt.title('hm0 February 2013 - RFBuoy W@ves21 hiw comparison')

<matplotlib.text.Text at 0x1fb1a320>

bragar_2013_feb_diwasp = read_diwasp_csv('D:\\Profiles\\le12jm\\My Documents\\toolbox-comparison-paper\\data\\small_freq_complex_masked_file_paramters.csv')
#diwasp_output = read_csv('D:\\Profiles\\le12jm\\My Documents\\toolbox-comparison-paper\\data\\siadar_complex_masked_file_parameters.csv')

plt.figure(figsize(13,8))
(bragar_2013_feb_diwasp.Hm0*100).plot()
bragar_february_2013_waves_21_his.hm0.plot()
(bragar_february_2013_waves_21_his.hm0 - (bragar_2013_feb_diwasp.Hm0 * 100)).plot()
plt.legend(['DIWASP Output','W@ves 21 HIS','W@ves21-DIWASP'])
plt.title('Hm0 HIS DIWASP vs W@ves21')

<matplotlib.text.Text at 0x203c5c50>

The diwasp output under predicts against the W@ves21 output aside from certain data points at the peak of the storm. What artefacts cause this? The data for that timestamp

bragar_2013_feb_diwasp_Hm0_centimetres = bragar_2013_feb_diwasp.Hm0 * 100
print "Largest negative difference of Significant Wave Height February Bragar 2013 - W@ves21 minus DIWASP"
(bragar_february_2013_waves_21_his.hm0 - bragar_2013_feb_diwasp_Hm0_centimetres)[(waves_21_his.hm0 - bragar_2013_feb_diwasp_Hm0_centimetres)==(waves_21_his.hm0 - bragar_2013_feb_diwasp_Hm0_centimetres).min()]

Largest negative difference of Significant Wave Height February Bragar 2013 - W@ves21 minus DIWASP

2013-02-05 19:30:00   -525.7892
dtype: float64

print "Scatter Index - RMS over mean - "
np.sqrt((bragar_february_2013_waves_21_his.hm0 - bragar_2013_feb_diwasp_Hm0_centimetres) ** 2).mean()/bragar_february_2013_waves_21_his.hm0.mean()

Scatter Index - RMS over mean -

0.12798443283898586

print "Pearson values from resampled Bragar 2013 Significant wave heights"
print pearsonr(bragar_february_2013_waves_21_his.hm0.resample('30Min'), bragar_2013_feb_diwasp_Hm0.resample('30Min'))
print np.corrcoef(bragar_february_2013_waves_21_his.hm0.resample('30Min').values, bragar_2013_feb_diwasp_Hm0.resample('30Min').values)

Pearson values from resampled Bragar 2013 Significant wave heights
(nan, 1.0)
[[ nan  nan]
 [ nan   1.]]

august_2012_diwasp_output = read_diwasp_csv('D:\\Profiles\\le12jm\\My Documents\\toolbox-comparison-paper\\data\\bragar_august_2012.csv')
august_2012_bragar_waves_21_his = read_hist_csv_and_time_index('D:\\Profiles\\le12jm\\My Documents\\toolbox-comparison-paper\\data\\Bragar_HebMarine2$}2012-08.his')

august_2012_bragar_waves_21_his.hm0.plot()
(august_2012_diwasp_output * 100 ).Hm0.plot()
plt.legend(['Waves','DIWASP'])
plt.title('Bragar August 2012 - Significant Wave Height (centimetres)')

<matplotlib.text.Text at 0x1a6efa20>

def rms_hm0(diwasp,waves):
    diwasp_hm0 = diwasp.Hm0 * 100
    return np.sqrt((waves.hm0 - diwasp_hm0) ** 2).mean()/waves.hm0.mean()

rms_hm0(august_2012_diwasp_output, august_2012_bragar_waves_21_his)

0.23580569297412493

august_2012_diwasp_output.index.difference(august_2012_bragar_waves_21_his.index)

<class 'pandas.tseries.index.DatetimeIndex'>
[2012-08-27 17:44:00]
Length: 1, Freq: None, Timezone: None

august_2012_diwasp_output.ix[datetime(2012,8,27,17):datetime(2012,8,27,18)]

	file_name	Hm0	peak_period	mean_dir_pp	energy_period
2012-08-27 17:00:00	Bragar_HebMarine2}2012-08-27T17h00Z.raw	2.635557	12.500000	233.610155	9.322102
2012-08-27 17:30:00	Bragar_HebMarine2}2012-08-27T17h30Z.raw	3.066969	13.333333	242.614520	9.681821
2012-08-27 17:44:00	Bragar_HebMarine2}2012-08-27T17h44Z.raw	3.280173	10.526316	235.184029	9.352168
2012-08-27 18:00:00	Bragar_HebMarine2}2012-08-27T18h00Z.raw	3.044029	10.526316	240.575615	9.293356

august_2012_bragar_waves_21_his.ix[datetime(2012,8,27,17):datetime(2012,8,27,18)]

	tp	dirp	sprp	tz	hm0	ti	t1	tc	tdw2	tdw1	tpc	nu	eps	qp	ss
2012-08-27 17:00:00	12.50	233.5	19.7	6.73	313	9.81	7.69	3.76	8.44	10.34	12.52	0.555	0.829	1.829	0.044
2012-08-27 17:30:00	10.53	231.2	26.3	6.74	360	9.46	7.58	3.95	8.24	10.04	11.81	0.515	0.811	1.980	0.051
2012-08-27 18:00:00	10.53	236.7	26.1	6.89	353	9.59	7.72	4.09	8.36	9.82	11.91	0.502	0.805	1.848	0.048

If there are multiple files in one 30 minute period, as above for 17:44, W@ves21 only provides one value which I believe only consists of one of those files the last is my guess

august_2012_diwasp_output_resampled = august_2012_diwasp_output.resample('30Min')

siadar_2013_august_waves_21_his = read_hist_csv_and_time_index('D:\\Profiles\\le12jm\\My Documents\\toolbox-comparison-paper\\data\\Bragar_HebMarine2$}2012-08.his')

siadar_2013_august_waves_21_his.ix[datetime(2012,8,9):datetime(2012,8,9,1)]

	date_time	tp	dirp	sprp	tz	hm0	ti	t1	tc	tdw2	tdw1	tpc	nu	eps	qp	ss
2012-08-09 00:00:00	2012-08-09T00:00:00.000	10.00	296.4	31.6	6.42	111	8.78	7.31	3.40	7.85	10.35	10.56	0.541	0.849	2.464	0.017
2012-08-09 00:30:00	2012-08-09T00:30:00.000	10.53	276.6	21.6	5.59	92	8.58	6.61	2.86	7.32	10.24	11.13	0.630	0.859	2.414	0.019
2012-08-09 01:00:00	2012-08-09T01:00:00.000	10.00	286.2	27.0	5.43	95	8.40	6.43	2.84	7.14	9.99	10.97	0.633	0.852	2.311	0.021

bragar_his_august_2012_rfbuoy = read_hist_csv_and_time_index('D:\\Datawell\\Bragar_HebMarine2\\2012\\August\\Bragar_HebMarine2$}2012-08.his')
siadar_august_2012_diwasp_output = read_diwasp_csv('D:\\Profiles\\le12jm\\My Documents\\toolbox-comparison-paper\\data\\siadar_august_2012.csv')
siadar_his_august_2012_rfbuoy = read_hist_csv_and_time_index('D:\\Datawell\\Siadar_HebMarine1\\2012\\August\\Siadar_HebMarine1$}2012-08.his')
siadar_august_2012_waves21 = read_hist_csv_and_time_index('D:\\Profiles\\le12jm\\My Documents\\toolbox-comparison-paper\\data\\Siadar_HebMarine1$}2012-08.his')

siadar_2012 = pd.read_pickle('C:\Users\le12jm\David_2012_Siadar_WAFO')

siadar_2012_august_wafo = siadar_2012.ix[datetime(2012,8,1,0):datetime(2012,9,1,0)]

plt.figure()
(siadar_his_august_2012_rfbuoy/100.0).hm0.plot()
siadar_august_2012_diwasp_output.Hm0.plot()
siadar_2012_august_wafo.Hs.plot()
(siadar_august_2012_waves21.hm0/100.0).plot()
plt.legend(['RFBuoy','DIWASP','WAFO','W@ves21'])
plt.title('August 2012 Siadar Hs')

<matplotlib.text.Text at 0x19665860>

diff_siadar_august_his_diwasp = (siadar_his_august_2012_rfbuoy/100.0).hm0 - siadar_august_2012_diwasp_output.Hm0
diff_siadar_august_wafo_diwasp = siadar_2012_august_wafo.Hs - siadar_august_2012_diwasp_output.Hm0
diff_siadar_august_wafo_his = siadar_2012_august_wafo.Hs - (siadar_his_august_2012_rfbuoy/100.0).hm0
diff_siadar_august_wafo_waves21 =  siadar_2012_august_wafo.Hs - (siadar_august_2012_waves21/100.0).hm0

print "Difference between Significant Sidar August 2012"
his_wafo_diwasp_descriptions = pd.DataFrame([diff_siadar_august_his_diwasp.describe(), diff_siadar_august_wafo_diwasp.describe(),
                                             diff_siadar_august_wafo_his.describe(), diff_siadar_august_wafo_waves21.describe()])
his_wafo_diwasp_descriptions.index =  ['HIS - DIWASP', 'WAFO - DIWASP', 'WAFO - HIS', 'WAFO - W@VES21']
print his_wafo_diwasp_descriptions

Difference between Significant Sidar August 2012
                count      mean       std       min       25%       50%  HIS - DIWASP     1455  0.001650  0.069195 -0.503595 -0.028592  0.002158
WAFO - DIWASP     484 -0.003648  0.075439 -0.378972 -0.035252 -0.004494
WAFO - HIS        484 -0.001221  0.066235 -0.244505 -0.038938 -0.003037
WAFO - W@VES21    484  0.000281  0.002864 -0.004980 -0.002100  0.000575

                     75%       max
HIS - DIWASP    0.033824  0.335032
WAFO - DIWASP   0.030124  0.427863
WAFO - HIS      0.032448  0.399695
WAFO - W@VES21  0.002747  0.004920

def scatter_index(time_series1,time_series2):
    return np.sqrt((time_series1 - time_series2) ** 2).mean()/time_series2.mean()

rms_results = [ ('RFBuoy - DIWASP', scatter_index((siadar_his_august_2012_rfbuoy/100.0).hm0, siadar_august_2012_diwasp_output.Hm0)),
                ('WAFO - DIWASP', scatter_index(siadar_2012_august_wafo.Hs, siadar_august_2012_diwasp_output.Hm0)),
                ('WAFO - RfBuoy', scatter_index(siadar_2012_august_wafo.Hs, (siadar_his_august_2012_rfbuoy/100.0).hm0)),
                ('W@ves21 - RfBuoy', scatter_index((siadar_august_2012_waves21.hm0/100.0), (siadar_his_august_2012_rfbuoy/100.0).hm0)),
                ('W@ves21 - WAFO', scatter_index((siadar_august_2012_waves21.hm0/100.0), siadar_2012_august_wafo.Hs)),
                ('W@ves21 - DIWASP', scatter_index((siadar_august_2012_waves21.hm0/100.0), siadar_august_2012_diwasp_output.Hm0))]
rms_df = pd.DataFrame(rms_results, columns=['Title','Scatter Index'])

print "Siadar August 2012 significant wave heights scatter indexes"
rms_df

Siadar August 2012 significant wave heights scatter indexes

	Title	Scatter Index
0	RFBuoy - DIWASP	0.033071
1	WAFO - DIWASP	0.035339
2	WAFO - RfBuoy	0.032917
3	W@ves21 - RfBuoy	0.031497
4	W@ves21 - WAFO	0.001738
5	W@ves21 - DIWASP	0.033966

siadar_2012_january_wafo = siadar_2012.ix[datetime(2012,1,1,0):datetime(2012,2,1,0)]

# Import historical his files produced by RFBuoy and Exported from RAW files in W@ves21
siadar_january_2012_waves21 = read_his_csv_and_time_index('D:\\Profiles\\le12jm\\My Documents\\toolbox-comparison-paper\\data\\Siadar_HebMarine1$}2012-01.his')
siadar_january_2012_rfbuoy = read_his_csv_and_time_index('D:\\Datawell\\Siadar_HebMarine1\\2012\\January\\Siadar_HebMarine1$}2012-01.his')

siadar_january_2012_waves21.dirp.plot()
siadar_january_2012_rfbuoy.dirp.plot()

<matplotlib.axes._subplots.AxesSubplot at 0x1fb0fac8>

siadar_2012_january_wafo.columns

Index([u'Hs', u'Tz'], dtype='object')

siadar_2012_df = pd.read_csv('F:\\David_Dirp_Spread_peak_frequency_2012.csv', names=['date_time','direction','spread'],
                             parse_dates=True, infer_datetime_format=True, index_col=0)

siadar_2012_df.direction.plot(title="Direction over time)

<matplotlib.axes._subplots.AxesSubplot at 0x24534c88>

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

his_comparison.rst

his_comparison.rst

Files

his_comparison.rst

Latest commit

History

his_comparison.rst

File metadata and controls