Skip to content

Latest commit

 

History

History
774 lines (552 loc) · 21.4 KB

his_comparison.rst

File metadata and controls

774 lines (552 loc) · 21.4 KB
%pylab inline
Populating the interactive namespace from numpy and matplotlib
figsize(15,10)
import pandas as pd
from datetime import datetime
from datetime import datetime as date_time
from scipy.stats.stats import pearsonr
his_columns = ['date_time', 'tp', 'dirp', 'sprp', 'tz', 'hm0', 'ti', 't1',
               'tc', 'tdw2', 'tdw1', 'tpc', 'nu','eps','qp','ss','tref','tsea',
               'bat']

hiw_columns = ['date_time','% no reception errors','hmax','tmax','h1_10',
               't1_10','h1_3','t1_3','Hav','Tav','Eps','#Waves']
def apply_date_time_index(df):
    df = df[pd.notnull(df.date_time)]
    date_times = []
    for index,date_time_string in enumerate(df['date_time'].values):
        if type(date_time_string) == type(''):
            date_time = datetime.strptime(date_time_string[:-5],
                                          "%Y-%m-%dT%H:%M:%S")
            date_times.append(date_time)
    df.index = pd.DatetimeIndex(date_times)
    return df
def process_hist(hist_df):
    hist_df = apply_date_time_index(hist_df)
    hist_df = hist_df.drop('date_time', axis = 1)
    hist_df = hist_df.resample('30Min')
    return hist_df
def read_hist_csv_and_time_index(filepath, columns= his_columns):
    his_df = pd.read_csv(filepath, names = columns)
    return process_hist(his_df)
# Reads in csv files output from Matlab DIWASP toolbox
# and outputs timestamped Pandas Dataframes

def read_diwasp_csv(file_path, date_format="%Y-%m-%dT%Hh%MZ"):
    """Function for reading csv files created by the write_csv.m matlab file
       which output file_names, Hm0 and peak period
    """
    df = pd.read_csv(file_path)
    date_time_array = []
    for date_time_string in df['file_name'].values:
        date_time_array.append(date_time.strptime(date_time_string[-21:-4],
                                                 date_format))

    df.index = pd.DatetimeIndex(date_time_array)
    return df
bragar_february_2013_rfbuoy_his = read_his_csv_and_time_index('D:\\Datawell\\Bragar_HebMarine2\\2013\\February\\Bragar_HebMarine2$}2013-02.his')
bragar_february_2013_waves_21_his = read_his_csv_and_time_index('D:\\Profiles\\le12jm\\My Documents\\toolbox-comparison-paper\\data\\W@ves21_exported_save_all_books\\Bragar_HebMarine2$}2013-02.his')

The above plot shows that there are spikes of up to 5 metres in this case where using the default his files would indicate a higher hm0.

There are also points at which the w@ves 21 exported his indicates a higher hm0 this could be an effect of time shifting.

plt.figure()
bragar_february_2013_waves_21_his.tp.plot()
bragar_february_2013_rfbuoy_his.tp.plot()
(bragar_february_2013_waves_21_his.tp - bragar_february_2013_rfbuoy_his.tp).plot()
plt.legend(['W@ve21 HIS Tp','RFBuoy HIS Tp','Tp Difference'])
plt.title('Bragar February 2013 - Peak period W@ves21 ( Save All Books ) historical spectra against RFBuoy HIS ( 10 minute offset )')
plt.ylabel('Seconds')
<matplotlib.text.Text at 0x1bfb3208>

output_12_1.png

Some of the spikes can be accounted for by the offset, where there are corresponding spikes in both directions. This could be accounted for by taking the timeseries in pandas and outputting modified raw files which match the timestamps which are provided for in the RFBuoy HIS file.

waves_21_hiw = read_hist_csv_and_time_index('D:\\Profiles\\le12jm\\My Documents\\toolbox-comparison-paper\\data\\Bragar_HebMarine2}2013-02.hiw',
                                            hiw_columns)
rfbuoy_hiw = read_hist_csv_and_time_index('D:\\Datawell\\Bragar_HebMarine2\\2013\\February\\Bragar_HebMarine2}2013-02.hiw',
                                          hiw_columns)
plt.figure()
rfbuoy_hiw.hmax.plot()
waves_21_hiw.hmax.plot()
(waves_21_hiw.hmax - rfbuoy_hiw.hmax).plot()
plt.legend(['RFBuoy hmax','W@ves21 hmax','Difference'])
plt.title('Hmax February 2013 - RFBuoy W@ves21 hiw comparison')
<matplotlib.text.Text at 0x1f33af60>

output_16_1.png

plt.figure()
rfbuoy_hiw.h1_3.plot()
waves_21_hiw.h1_3.plot()
(waves_21_hiw.h1_3 - rfbuoy_hiw.h1_3).plot()
plt.legend(['RFBuoy hm0','W@ves21 hm0','Difference'])
plt.title('hm0 February 2013 - RFBuoy W@ves21 hiw comparison')
<matplotlib.text.Text at 0x1fb1a320>

output_17_1.png

bragar_2013_feb_diwasp = read_diwasp_csv('D:\\Profiles\\le12jm\\My Documents\\toolbox-comparison-paper\\data\\small_freq_complex_masked_file_paramters.csv')
#diwasp_output = read_csv('D:\\Profiles\\le12jm\\My Documents\\toolbox-comparison-paper\\data\\siadar_complex_masked_file_parameters.csv')
plt.figure(figsize(13,8))
(bragar_2013_feb_diwasp.Hm0*100).plot()
bragar_february_2013_waves_21_his.hm0.plot()
(bragar_february_2013_waves_21_his.hm0 - (bragar_2013_feb_diwasp.Hm0 * 100)).plot()
plt.legend(['DIWASP Output','W@ves 21 HIS','W@ves21-DIWASP'])
plt.title('Hm0 HIS DIWASP vs W@ves21')
<matplotlib.text.Text at 0x203c5c50>

output_19_1.png

The diwasp output under predicts against the W@ves21 output aside from certain data points at the peak of the storm. What artefacts cause this? The data for that timestamp

bragar_2013_feb_diwasp_Hm0_centimetres = bragar_2013_feb_diwasp.Hm0 * 100
print "Largest negative difference of Significant Wave Height February Bragar 2013 - W@ves21 minus DIWASP"
(bragar_february_2013_waves_21_his.hm0 - bragar_2013_feb_diwasp_Hm0_centimetres)[(waves_21_his.hm0 - bragar_2013_feb_diwasp_Hm0_centimetres)==(waves_21_his.hm0 - bragar_2013_feb_diwasp_Hm0_centimetres).min()]
Largest negative difference of Significant Wave Height February Bragar 2013 - W@ves21 minus DIWASP
2013-02-05 19:30:00   -525.7892
dtype: float64
print "Scatter Index - RMS over mean - "
np.sqrt((bragar_february_2013_waves_21_his.hm0 - bragar_2013_feb_diwasp_Hm0_centimetres) ** 2).mean()/bragar_february_2013_waves_21_his.hm0.mean()
Scatter Index - RMS over mean -
0.12798443283898586
print "Pearson values from resampled Bragar 2013 Significant wave heights"
print pearsonr(bragar_february_2013_waves_21_his.hm0.resample('30Min'), bragar_2013_feb_diwasp_Hm0.resample('30Min'))
print np.corrcoef(bragar_february_2013_waves_21_his.hm0.resample('30Min').values, bragar_2013_feb_diwasp_Hm0.resample('30Min').values)
Pearson values from resampled Bragar 2013 Significant wave heights
(nan, 1.0)
[[ nan  nan]
 [ nan   1.]]
august_2012_diwasp_output = read_diwasp_csv('D:\\Profiles\\le12jm\\My Documents\\toolbox-comparison-paper\\data\\bragar_august_2012.csv')
august_2012_bragar_waves_21_his = read_hist_csv_and_time_index('D:\\Profiles\\le12jm\\My Documents\\toolbox-comparison-paper\\data\\Bragar_HebMarine2$}2012-08.his')
august_2012_bragar_waves_21_his.hm0.plot()
(august_2012_diwasp_output * 100 ).Hm0.plot()
plt.legend(['Waves','DIWASP'])
plt.title('Bragar August 2012 - Significant Wave Height (centimetres)')
<matplotlib.text.Text at 0x1a6efa20>

output_25_1.png

def rms_hm0(diwasp,waves):
    diwasp_hm0 = diwasp.Hm0 * 100
    return np.sqrt((waves.hm0 - diwasp_hm0) ** 2).mean()/waves.hm0.mean()
rms_hm0(august_2012_diwasp_output, august_2012_bragar_waves_21_his)
0.23580569297412493
august_2012_diwasp_output.index.difference(august_2012_bragar_waves_21_his.index)
<class 'pandas.tseries.index.DatetimeIndex'>
[2012-08-27 17:44:00]
Length: 1, Freq: None, Timezone: None
august_2012_diwasp_output.ix[datetime(2012,8,27,17):datetime(2012,8,27,18)]
file_name Hm0 peak_period mean_dir_pp sig_qual_sum energy_period
2012-08-27 17:00:00 Bragar_HebMarine2}2012-08-27T17h00Z.raw 2.635557 12.500000 233.610155 0 9.322102
2012-08-27 17:30:00 Bragar_HebMarine2}2012-08-27T17h30Z.raw 3.066969 13.333333 242.614520 0 9.681821
2012-08-27 17:44:00 Bragar_HebMarine2}2012-08-27T17h44Z.raw 3.280173 10.526316 235.184029 0 9.352168
2012-08-27 18:00:00 Bragar_HebMarine2}2012-08-27T18h00Z.raw 3.044029 10.526316 240.575615 0 9.293356
august_2012_bragar_waves_21_his.ix[datetime(2012,8,27,17):datetime(2012,8,27,18)]
tp dirp sprp tz hm0 ti t1 tc tdw2 tdw1 tpc nu eps qp ss tref tsea bat
2012-08-27 17:00:00 12.50 233.5 19.7 6.73 313 9.81 7.69 3.76 8.44 10.34 12.52 0.555 0.829 1.829 0.044 0 0 0
2012-08-27 17:30:00 10.53 231.2 26.3 6.74 360 9.46 7.58 3.95 8.24 10.04 11.81 0.515 0.811 1.980 0.051 0 0 0
2012-08-27 18:00:00 10.53 236.7 26.1 6.89 353 9.59 7.72 4.09 8.36 9.82 11.91 0.502 0.805 1.848 0.048 0 0 0

If there are multiple files in one 30 minute period, as above for 17:44, W@ves21 only provides one value which I believe only consists of one of those files the last is my guess

august_2012_diwasp_output_resampled = august_2012_diwasp_output.resample('30Min')
siadar_2013_august_waves_21_his = read_hist_csv_and_time_index('D:\\Profiles\\le12jm\\My Documents\\toolbox-comparison-paper\\data\\Bragar_HebMarine2$}2012-08.his')
siadar_2013_august_waves_21_his.ix[datetime(2012,8,9):datetime(2012,8,9,1)]
date_time tp dirp sprp tz hm0 ti t1 tc tdw2 tdw1 tpc nu eps qp ss tref tsea bat
2012-08-09 00:00:00 2012-08-09T00:00:00.000 10.00 296.4 31.6 6.42 111 8.78 7.31 3.40 7.85 10.35 10.56 0.541 0.849 2.464 0.017 0 0 0
2012-08-09 00:30:00 2012-08-09T00:30:00.000 10.53 276.6 21.6 5.59 92 8.58 6.61 2.86 7.32 10.24 11.13 0.630 0.859 2.414 0.019 0 0 0
2012-08-09 01:00:00 2012-08-09T01:00:00.000 10.00 286.2 27.0 5.43 95 8.40 6.43 2.84 7.14 9.99 10.97 0.633 0.852 2.311 0.021 0 0 0
bragar_his_august_2012_rfbuoy = read_hist_csv_and_time_index('D:\\Datawell\\Bragar_HebMarine2\\2012\\August\\Bragar_HebMarine2$}2012-08.his')
siadar_august_2012_diwasp_output = read_diwasp_csv('D:\\Profiles\\le12jm\\My Documents\\toolbox-comparison-paper\\data\\siadar_august_2012.csv')
siadar_his_august_2012_rfbuoy = read_hist_csv_and_time_index('D:\\Datawell\\Siadar_HebMarine1\\2012\\August\\Siadar_HebMarine1$}2012-08.his')
siadar_august_2012_waves21 = read_hist_csv_and_time_index('D:\\Profiles\\le12jm\\My Documents\\toolbox-comparison-paper\\data\\Siadar_HebMarine1$}2012-08.his')
siadar_2012 = pd.read_pickle('C:\Users\le12jm\David_2012_Siadar_WAFO')
siadar_2012_august_wafo = siadar_2012.ix[datetime(2012,8,1,0):datetime(2012,9,1,0)]
plt.figure()
(siadar_his_august_2012_rfbuoy/100.0).hm0.plot()
siadar_august_2012_diwasp_output.Hm0.plot()
siadar_2012_august_wafo.Hs.plot()
(siadar_august_2012_waves21.hm0/100.0).plot()
plt.legend(['RFBuoy','DIWASP','WAFO','W@ves21'])
plt.title('August 2012 Siadar Hs')
<matplotlib.text.Text at 0x19665860>

output_38_1.png

diff_siadar_august_his_diwasp = (siadar_his_august_2012_rfbuoy/100.0).hm0 - siadar_august_2012_diwasp_output.Hm0
diff_siadar_august_wafo_diwasp = siadar_2012_august_wafo.Hs - siadar_august_2012_diwasp_output.Hm0
diff_siadar_august_wafo_his = siadar_2012_august_wafo.Hs - (siadar_his_august_2012_rfbuoy/100.0).hm0
diff_siadar_august_wafo_waves21 =  siadar_2012_august_wafo.Hs - (siadar_august_2012_waves21/100.0).hm0
print "Difference between Significant Sidar August 2012"
his_wafo_diwasp_descriptions = pd.DataFrame([diff_siadar_august_his_diwasp.describe(), diff_siadar_august_wafo_diwasp.describe(),
                                             diff_siadar_august_wafo_his.describe(), diff_siadar_august_wafo_waves21.describe()])
his_wafo_diwasp_descriptions.index =  ['HIS - DIWASP', 'WAFO - DIWASP', 'WAFO - HIS', 'WAFO - W@VES21']
print his_wafo_diwasp_descriptions
Difference between Significant Sidar August 2012
                count      mean       std       min       25%       50%  HIS - DIWASP     1455  0.001650  0.069195 -0.503595 -0.028592  0.002158
WAFO - DIWASP     484 -0.003648  0.075439 -0.378972 -0.035252 -0.004494
WAFO - HIS        484 -0.001221  0.066235 -0.244505 -0.038938 -0.003037
WAFO - W@VES21    484  0.000281  0.002864 -0.004980 -0.002100  0.000575

                     75%       max
HIS - DIWASP    0.033824  0.335032
WAFO - DIWASP   0.030124  0.427863
WAFO - HIS      0.032448  0.399695
WAFO - W@VES21  0.002747  0.004920
def scatter_index(time_series1,time_series2):
    return np.sqrt((time_series1 - time_series2) ** 2).mean()/time_series2.mean()
rms_results = [ ('RFBuoy - DIWASP', scatter_index((siadar_his_august_2012_rfbuoy/100.0).hm0, siadar_august_2012_diwasp_output.Hm0)),
                ('WAFO - DIWASP', scatter_index(siadar_2012_august_wafo.Hs, siadar_august_2012_diwasp_output.Hm0)),
                ('WAFO - RfBuoy', scatter_index(siadar_2012_august_wafo.Hs, (siadar_his_august_2012_rfbuoy/100.0).hm0)),
                ('W@ves21 - RfBuoy', scatter_index((siadar_august_2012_waves21.hm0/100.0), (siadar_his_august_2012_rfbuoy/100.0).hm0)),
                ('W@ves21 - WAFO', scatter_index((siadar_august_2012_waves21.hm0/100.0), siadar_2012_august_wafo.Hs)),
                ('W@ves21 - DIWASP', scatter_index((siadar_august_2012_waves21.hm0/100.0), siadar_august_2012_diwasp_output.Hm0))]
rms_df = pd.DataFrame(rms_results, columns=['Title','Scatter Index'])
print "Siadar August 2012 significant wave heights scatter indexes"
rms_df
Siadar August 2012 significant wave heights scatter indexes
Title Scatter Index
0 RFBuoy - DIWASP 0.033071
1 WAFO - DIWASP 0.035339
2 WAFO - RfBuoy 0.032917
3 W@ves21 - RfBuoy 0.031497
4 W@ves21 - WAFO 0.001738
5 W@ves21 - DIWASP 0.033966
siadar_2012_january_wafo = siadar_2012.ix[datetime(2012,1,1,0):datetime(2012,2,1,0)]
# Import historical his files produced by RFBuoy and Exported from RAW files in W@ves21
siadar_january_2012_waves21 = read_his_csv_and_time_index('D:\\Profiles\\le12jm\\My Documents\\toolbox-comparison-paper\\data\\Siadar_HebMarine1$}2012-01.his')
siadar_january_2012_rfbuoy = read_his_csv_and_time_index('D:\\Datawell\\Siadar_HebMarine1\\2012\\January\\Siadar_HebMarine1$}2012-01.his')
siadar_january_2012_waves21.dirp.plot()
siadar_january_2012_rfbuoy.dirp.plot()
<matplotlib.axes._subplots.AxesSubplot at 0x1fb0fac8>

output_46_1.png

siadar_2012_january_wafo.columns
Index([u'Hs', u'Tz'], dtype='object')
siadar_2012_df = pd.read_csv('F:\\David_Dirp_Spread_peak_frequency_2012.csv', names=['date_time','direction','spread'],
                             parse_dates=True, infer_datetime_format=True, index_col=0)
siadar_2012_df.direction.plot(title="Direction over time)
<matplotlib.axes._subplots.AxesSubplot at 0x24534c88>

output_49_1.png