-
Notifications
You must be signed in to change notification settings - Fork 0
/
geocsv.py
144 lines (109 loc) · 3 KB
/
geocsv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import re
class geocsv(object):
def __init__(self, geocsvFilename):
self.geocsvFile = open(geocsvFilename, 'r')
self.keys = dict()
self.fieldLine = ''
self.delimiter = ','
self.numberColumns = 0
def __del__(self):
self.geocsvFile.close()
name = 'I created the class'
knownKeys = [
'fields',
'field_unit',
'field_type',
'field_name_long',
'field_standard_name',
'field_missing',
'delimiter',
'attribution',
'standard_name_cv'
]
allowedTypes = [
'string',
'integer',
'float',
'datetime'
]
# Public functions
def validate(self):
# run through all of the checks
self._parseHeader()
#self._checkData()
return True
def _parseHeader(self):
# Extract all the info up until the field line
for line in self.geocsvFile:
if re.match("^#", line):
# Keyword declarations
pair = line.strip("# \r\n").split(":", maxsplit=1)
self.keys.update({pair[0].strip(): pair[1].strip()})
else:
# Fields line
self.fieldLine = line.strip()
break
# Set the delimiter if necessary
if self.keys['delimiter'] is not None:
self.delimiter = self.keys['delimiter']
# Set the number of data columns
#self.numberColumns = re.split(self.delimiter, self.fieldLine)
#self.numberColumns = self.fieldLine.split("\t")
self.numberColumns = self.fieldLine.split(self.delimiter)
print(self.fieldLine)
print(self.delimiter)
print(self.numberColumns)
def _checkData(self):
i = 0
for line in self.geocsvFile:
if re.match("^#", line):
continue
else:
# Data line
i += 1
dataEntry = line.strip().split(self.delimiter)
print(i)
# def printFieldInfo():
#
# # display info about the fields
#
# def printColumns(fieldArray):
#
# # Extract data dolumns specified in fieldArray (index or name)
#
# # Checks to perform
#
# def checkRequiredHeaders:
#
# # currently only the 'fields'
#
# def checkNumberColumns:
#
# # check relevant header lines, column title line, and each data line for the same number of columns according to delimiter
#
# def checkCharset:
#
# # make sure everything is UTF-8
# # (not even sure if this can be done - may just be indicated by other failures...)
#
# def checkDates:
#
# # fields of type ‘datetime’ must be in an ISO 8601 format.
#
# def checkTypes:
#
# # check known types for consistency
#
# def setDelimiter:
#
# # default delimiter is a comma
#
# def setFieldMissing:
#
# # no default values
#
# def setRowStructure:
#
# # uses field names and field types to create structure for data entry
# POSIX regular expression to look for header key-value pairs:
#re = "^#\s*(keyword)\s*:(value)[\r\n]+"