-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathNASISpedons_ExtractPedons_WFS.py
1508 lines (1175 loc) · 65.8 KB
/
NASISpedons_ExtractPedons_WFS.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#-------------------------------------------------------------------------------
# Name: NASISpedons_ExtractPedons_WFS.py
#
# Author: Adolfo.Diaz
# e-mail: [email protected]
# phone: 608.662.4422 ext. 216
#
# Created: 1/10/2022
# Last Modified: 1/10/2022
# https://alexwlchan.net/2019/10/adventures-with-concurrent-futures/
# This tool is a modification of the NASISpedons_Extract_Pedons_from_NASIS _MultiThreading_ArcGISPro_SQL.py
# script. It was modified\created for the purpose of mining all pedons from
# NASIS to update an existing Web Feature Service. Only
## ===================================================================================
def AddMsgAndPrint(msg, severity=0):
# prints message to screen if run as a python script
# Adds tool message to the geoprocessor
#
#Split the message on \n first, so that if it's multiple lines, a GPMessage will be added for each line
try:
print(msg)
try:
f = open(textFilePath,'a+')
f.write(msg + " \n")
f.close
del f
except:
pass
except:
pass
## ===================================================================================
def errorMsg():
try:
exc_type, exc_value, exc_traceback = sys.exc_info()
theMsg = "\t" + traceback.format_exception(exc_type, exc_value, exc_traceback)[1] + "\n\t" + traceback.format_exception(exc_type, exc_value, exc_traceback)[-1]
if theMsg.find("exit") > -1:
AddMsgAndPrint("\n\n")
pass
else:
AddMsgAndPrint(theMsg,2)
except:
AddMsgAndPrint("Unhandled error in unHandledException method", 2)
pass
## ================================================================================================================
def tic():
""" Returns the current time """
try:
return time.time()
except:
errorMsg()
## ================================================================================================================
def toc(_start_time):
""" Returns the total time by subtracting the start time - finish time"""
try:
t_sec = round(time.time() - _start_time)
(t_min, t_sec) = divmod(t_sec,60)
(t_hour,t_min) = divmod(t_min,60)
if t_hour:
return ('{} hour(s): {} minute(s): {} second(s)'.format(int(t_hour),int(t_min),int(t_sec)))
elif t_min:
return ('{} minute(s): {} second(s)'.format(int(t_min),int(t_sec)))
else:
return ('{} second(s)'.format(int(t_sec)))
except:
errorMsg()
## ================================================================================================================
def splitThousands(someNumber):
""" will determine where to put a thousands seperator if one is needed.
Input is an integer. Integer with or without thousands seperator is returned."""
try:
return re.sub(r'(\d{3})(?=\d)', r'\1,', str(someNumber)[::-1])[::-1]
except:
errorMsg()
return someNumber
## ================================================================================================================
def getDictionaryOfAllPedonIDs():
# Description
# This function will send a URL request to the 'Web Pedon PEIID List All of NASIS' NASIS
# report to obtain a list of ALL pedons in NASIS. Pedons include regular
# NASIS pedons and LAB pedons. Each record in the report will contain the following values:
# START 1204126, 1204127, 1204128 STOP"""
try:
AddMsgAndPrint("Retrieving a list of ALL pedonIDs from NASIS")
arcpy.SetProgressorLabel("Requesting a list of ALL pedonIDs from NASIS")
#URL = r'https://nasis.sc.egov.usda.gov/NasisReportsWebSite/limsreport.aspx?report_name=WEB_EXPORT_PEDON_BOX_COUNT' + coordinates
URL = r'https://nasis.sc.egov.usda.gov/NasisReportsWebSite/limsreport.aspx?report_name=WEB_PEDON_PEIID_LIST_ALL_OF_NASIS'
# Open a network object using the URL with the search string already concatenated
startTime = tic()
#AddMsgAndPrint("\tNetwork Request Time: " + toc(startTime))
""" -------------------------------------- Try connecting to NASIS to read the report ------------------------"""
try:
theReport = urllib.request.urlopen(URL).readlines()
except:
try:
AddMsgAndPrint("\t2nd attempt at requesting data")
theReport = urllib.request.urlopen(URL).readlines()
except:
try:
AddMsgAndPrint("\t3rd attempt at requesting data")
theReport = urllib.request.urlopen(URL).readlines()
except URLError as e:
AddMsgAndPrint('URL Error' + str(e),2)
return False
except HTTPError as e:
AddMsgAndPrint('HTTP Error' + str(e),2)
return False
except socket.timeout as e:
AddMsgAndPrint("\n\t" + URL)
AddMsgAndPrint("\tServer Timeout Error", 2)
return False
except socket.error as e:
AddMsgAndPrint("\n\t" + URL)
AddMsgAndPrint("\tNASIS Reports Website connection failure", 2)
return False
""" -------------------------------------- Read the NASIS report ------------------------------------"""
bValidRecord = False # boolean that marks the starting point of the mapunits listed in the project
arcpy.SetProgressor("step", "Reading NASIS Report: 'WEB_PEDON_PEIID_LIST_ALL_OF_NASIS'", 0, len(theReport), 1)
# iterate through the report until a valid record is found
for theValue in theReport:
# convert from bytes to string and remove white spaces
theValue = theValue.decode('utf-8').strip()
# Iterating through the lines in the report
if bValidRecord:
if theValue == "STOP": # written as part of the report; end of lines
break
# Found a valid record
if not theValue == None:
# All of the peodonIDs will be contained in 1 line
pedonDict = {val.strip():None for val in theValue.split(",")}
else:
continue
else:
if theValue.startswith('<div id="ReportData">START'):
bValidRecord = True
arcpy.SetProgressorPosition()
#Resets the progressor back to its initial state
arcpy.ResetProgressor()
if len(pedonDict) == 0:
AddMsgAndPrint("\tThere were no pedons returned from this report",2)
return False
else:
return pedonDict
except:
errorMsg()
return False
## ================================================================================================================
def getNASISbreakdownCounts():
""" This function will send the bounding coordinates to the 'Web Export Pedon Box' NASIS report
and return a list of pedons within the bounding coordinates. Pedons include regular
NASIS pedons and LAB pedons. Each record in the report will contain the following values:
Row_Number,upedonid,peiid,pedlabsampnum,Longstddecimaldegrees,latstddecimaldegrees,Undisclosed Pedon
24|S1994MN161001|102861|94P0697|-93.5380936|44.0612717|'Y'
A dictionary will be returned containing something similar:
{'102857': ('S1954MN161113A', '40A1694', '-93.6499481', '43.8647194','Y'),
'102858': ('S1954MN161113B', '40A1695', '-93.6455002', '43.8899956','N')}
theURL = r'https://nasis.sc.egov.usda.gov/NasisReportsWebSite/limsreport.aspx?report_name=WEB_EXPORT_PEDON_BOX_COUNT&Lat1=44.070820&Lat2=44.596950&Long1=-91.166274&Long2=-90.311911'
returns a pedonDictionary"""
#-------------------------- KSSL Pedon and Undisclosed Metrics ----------------------------------
#------------------------------------------------------------------------------------------------
# This section is only to determine how many Lab Pedons and how many undiscolsed pedons there are
# nationwide. It is recommended that the WEB_EXPORT_PEDON_BOX_COUNT NASIS report in the KSSL folder
# be duplicated and modified such that ONLY 2 fields are returned (pedlabsampnum and location) which
# represent the pedon lab sample number and a boolean indicating if the pedon is undisclosed.
#
# Iterate through the getWebExportPedon function 4 times to request all pedons from NASIS and get a
# Lab pedon and undisclosed count stricly for metrics.
def runWebMetricReport(coordinates):
try:
#AddMsgAndPrint(".\nGetting a NASIS pedon count using the above bounding coordinates")
URL = r'https://nasis.sc.egov.usda.gov/NasisReportsWebSite/limsreport.aspx?report_name=WEB_NASIS_Pedons_WFS_Metrics_AD' + coordinates
""" -------------------------------------- Try connecting to NASIS to read the report ------------------------"""
try:
theReport = urllib.request.urlopen(URL).readlines()
except:
try:
AddMsgAndPrint(".\t2nd attempt at requesting data")
theReport = urllib.request.urlopen(URL).readlines()
except:
try:
AddMsgAndPrint(".\t3rd attempt at requesting data")
theReport = urllib.request.urlopen(URL).readlines()
except URLError as e:
AddMsgAndPrint('URL Error' + str(e),2)
return False
except HTTPError as e:
AddMsgAndPrint('HTTP Error' + str(e),2)
return False
except socket.timeout as e:
AddMsgAndPrint(".\n.\t" + URL)
AddMsgAndPrint(".\tServer Timeout Error", 2)
return False
except socket.error as e:
AddMsgAndPrint(".\n.\t" + URL)
AddMsgAndPrint(".\tNASIS Reports Website connection failure", 2)
return False
""" -------------------------------------- Read the NASIS report ------------------------------------"""
undisclosedLab = 0
disclosedLab = 0
undisclosedNASIS = 0
disclosedNASIS = 0
test = 0
bValidRecord = False # boolean that marks the starting point of the mapunits listed in the project
peiidList = list()
# iterate through the report until a valid record is found
for theValue in theReport:
# convert from bytes to string and remove white spaces
theValue = theValue.decode('utf-8').strip()
# Iterating through the lines in the report
if bValidRecord:
if theValue == "STOP": # written as part of the report; end of lines
break
# Found a valid project record i.e. 91P0481|N (only 2 values)
else:
theRec = theValue.split("|")
if len(theRec) != 3:
AddMsgAndPrint(".\tNASIS Report: WEB_NASIS_Pedons_WFS_Metrics_AD is not returning the correct amount of values per record",2)
return False
peiidList.append(theRec[0])
continue
# Go through the different combinations of metrics
# Record is an undisclosed lab pedon
if theRec[0] != 'Null' and theRec[1] == 'Y':
undisclosedLab+= 1
# Record is a disclosed lab pedon
elif theRec[0] != 'Null' and theRec[1] == 'N':
disclosedLab+=1
# Record is an undisclosed NASIS pedon
elif theRec[0] == 'Null' and theRec[1] == 'Y':
undisclosedNASIS+= 1
# Record is an disclosed NASIS pedon
elif theRec[0] == 'Null' and theRec[1] == 'N':
disclosedNASIS+=1
else:
AddMsgAndPrint(".\tUnaccounted for combination: " + str(theValue),1)
test += 1
else:
if theValue.startswith('<div id="ReportData">START'):
bValidRecord = True
## bCountFailed = False
## if undisclosedLab + disclosedLab + undisclosedNASIS + disclosedNASIS != test:
## print(f"{coordinates} did not Match:")
## print("\t" + str(undisclosedLab))
## print("\t" + str(disclosedLab))
## print("\t" + str(undisclosedNASIS))
## print("\t" + str(disclosedNASIS))
## print("\t" + str(test))
## bCountFailed = True
##
## if undisclosedLab == 0 and disclosedLab == 0 and undisclosedNASIS == 0 and disclosedNASIS == 0:
## print(f" {coordinates} Returned empty:")
##
## print(f" {undisclosedLab},{disclosedLab},{undisclosedNASIS},{disclosedNASIS},{test},{bCountFailed}")
##
## return [undisclosedLab,disclosedLab,undisclosedNASIS,disclosedNASIS,test,bCountFailed]
return peiidList
except:
errorMsg()
return [0,0,0]
try:
Starttest = tic()
latNorth = 49
latSouth = 24
longWest = -125
longEast = -65
degreeSize = 5
# list of coordinates for the 5x5 degree blocks covering the US
USdegreeblocks = []
# Create list of coordinates for 5x5 degree blocks for US
for lat in range(latSouth,latNorth,degreeSize):
for long in range(longWest,longEast,degreeSize):
USdegreeblocks.append([lat,lat+degreeSize,long,long+degreeSize])
# List of coordinates for 4 boxes around the US for the NW hemisphere
#NW_hemisphere = [[0,90,-179.5,longWest],[0,latSouth,longWest,-0.1],[latSouth,90,longEast,-0.1],[latNorth,90,longWest,longEast]]
NW_hemisphere = [[0,90,-180,longWest],[0,latSouth,longWest,0],[latSouth,90,longEast,0],[latNorth,90,longWest,longEast]]
# Lat1, Lat2, Long1, Long2 -- S,N,W,E -- NE, SW, ,SE hemishpere
# This represents the master list of coordinates to send to NASIS report
#worldQuadrant = [[0.0,90.0,0.5,179.5],[-90.0,0.0,-179.5,-0.5],[-90.0,0.0,0.5,179.5]]
worldQuadrant = [[0,90,0,180],[-90,0,-180,0],[-90,0,0,180]]
# Add NW Hemisphere box coordinates to worldQuadrant list
for coordLst in NW_hemisphere:
worldQuadrant.append(coordLst)
# Add US degree block coordinates to worldQuadrant list
for coordLst in USdegreeblocks:
worldQuadrant.append(coordLst)
## i=0
## for coord in worldQuadrant:
## i+=1
## Left = coord[2]
## Right = coord[3]
## Top = coord[1]
## Bottom = coord[0]
## outFC = f"N:\\flex\\NCSS_Pedons\\NASIS_Pedons\\Web_Feature_Service\\degreeBlocks\\quad_{Left}_{Right}_{Top}_{Bottom}.shp"
## origin_coord = f"{Left} {Bottom}"
## y_axis_coord = f"{Left} {Bottom + 10}"
## cell_width = abs(Left - Right)
## cell_height = abs(Top - Bottom)
## rows = 0
## columns = 0
## corner_coord = f"{Right} {Top}"
## template = f"{Left} {Bottom} {Right} {Top}"
## arcpy.CreateFishnet_management(outFC,origin_coord,y_axis_coord,cell_width,cell_height,rows,columns,corner_coord,'NO_LABELS',template,"POLYGON")
undisclosedLabRecs = 0
disclosedLabRecs = 0
undisclosedNASISrecs = 0
disclosedNASISrecs = 0
TOTAL = 0
peIIDlist = list()
# Gather global metrics w/out using US degree blocks.
for coordLst in worldQuadrant:
coordStr = f"&Lat1={coordLst[0]}&Lat2={coordLst[1]}&Long1={coordLst[2]}&Long2={coordLst[3]}"
print(f"Running report on {coordStr}")
pedonCounts = runWebMetricReport(coordStr)
for id in pedonCounts:
peIIDlist.append(id)
## undisclosedLabRecs+=pedonCounts[0]
## disclosedLabRecs+=pedonCounts[1]
## undisclosedNASISrecs+=pedonCounts[2]
## disclosedNASISrecs+=pedonCounts[3]
##
## TOTAL+=pedonCounts[4]
## if pedonCounts[5]:
## exit()
## # Create an Executor to manage all tasks. Using the with statement creates a context
## # manager, which ensures any stray threads or processes get cleaned up properly when done.
## with ThreadPoolExecutor(max_workers=multiprocessing.cpu_count()) as executor:
##
## # use a set comprehension to start all tasks. This creates a future object
## future_to_url = {executor.submit(openURL, url): url for url in URLlist}
##
## # yield future objects as they are done.
## for future in as_completed(future_to_url):
## #futureResults.append(future.result())
## organizeFutureInstanceIntoPedonDict(future.result())
## arcpy.SetProgressorPosition()
print(toc(Starttest))
#return [undisclosedLabRecs,disclosedLabRecs,undisclosedNASISrecs,disclosedNASISrecs,TOTAL]
return peIIDlist
except:
errorMsg()
return False
## ================================================================================================================
def createPedonDB():
"""This Function will create a new File Geodatabase using a pre-established XML workspace
schema. All Tables will be empty and should correspond to that of the access database.
Relationships will also be pre-established.
Return false if XML workspace document is missing OR an existing FGDB with the user-defined
name already exists and cannot be deleted OR an unhandled error is encountered.
Return the path to the new Pedon File Geodatabase if everything executes correctly."""
try:
if sqliteFormat:
AddMsgAndPrint("\nCreating New Pedon SQLite Database",0)
arcpy.SetProgressorLabel("Creating New Pedon SQLite Database")
else:
AddMsgAndPrint("\nCreating New Pedon File Geodatabase",0)
arcpy.SetProgressorLabel("Creating New Pedon File Geodatabase")
# pedon xml template that contains empty pedon Tables and relationships
# schema and will be copied over to the output location
pedonXML = os.path.dirname(sys.argv[0]) + os.sep + "NASISpedons_XMLWorkspace.xml"
if sqliteFormat:
localPedonDB = os.path.dirname(sys.argv[0]) + os.sep + "NASISPedonsSQLiteTemplate.sqlite"
ext = ".sqlite"
else:
localPedonDB = os.path.dirname(sys.argv[0]) + os.sep + "NASISPedonsFGDBTemplate_WFS.gdb"
ext = ".gdb"
# Return false if pedon fGDB template is not found
if not arcpy.Exists(localPedonDB):
AddMsgAndPrint("\t" + os.path.basename(localPedonDB) + ext + " template was not found!",2)
return False
newPedonDB = os.path.join(outputFolder,DBname + ext)
if arcpy.Exists(newPedonDB):
try:
arcpy.Delete_management(newPedonDB)
AddMsgAndPrint("\t" + os.path.basename(newPedonDB) + " already exists. Deleting and re-creating FGDB",1)
except:
AddMsgAndPrint("\t" + os.path.basename(newPedonDB) + " already exists. Failed to delete",2)
return False
# copy template over to new location
# AddMsgAndPrint("\tCreating " + DBname + ext + " with NCSS Pedon Schema 7.3")
arcpy.Copy_management(localPedonDB,newPedonDB)
""" ------------------------------ Code to use XML Workspace -------------------------------------------"""
## # Return false if xml file is not found
## if not arcpy.Exists(pedonXML):
## AddMsgAndPrint("\t" + os.path.basename(pedonXML) + " Workspace document was not found!",2)
## return False
##
## # Create empty temp File Geodatabae
## arcpy.CreateFileGDB_management(outputFolder,os.path.splitext(os.path.basename(newPedonFGDB))[0])
##
## # set the pedon schema on the newly created temp Pedon FGDB
## AddMsgAndPrint("\tImporting NCSS Pedon Schema 7.3 into " + DBname + ".gdb")
## arcpy.ImportXMLWorkspaceDocument_management(newPedonFGDB, pedonXML, "DATA", "DEFAULTS")
#arcpy.UncompressFileGeodatabaseData_management(newPedonFGDB)
AddMsgAndPrint("\tSuccessfully created: " + os.path.basename(newPedonDB))
return newPedonDB
except:
errorMsg()
return False
## ===============================================================================================================
def createReferenceObjects(pedonDBloc):
# Description
# This function will create the following 2 unique dictionaries that will be used throughout the script:
# - pedonGDBtablesList: contains every table in the newly created pedonDB above as a key.
# Individual records of tables will be added as list of values to the table keys.
# This dictionary will be populated using the results from the
# the WEB_AnalysisPC_MAIN_URL_EXPORT NASIS report
# i.e. {'area': [],'areatype': [],'basalareatreescounted': []}
# - tableInfoDict: Dictionary containing physical name from MDSTATTABS table as the key.
# Each key has an associated list consisting of alias name, number of fields in the
# physical table and the position index of the same table within the pedonGDBList.
#
# i.e. {croptreedetails:['Crop Tree Details',48,34]}
# The number of fields is used to double check that the values from
# the web report are correct. This was added b/c there were text fields that were
# getting disconnected in the report and being read as 2 lines -- Jason couldn't
# address this issue in NASIS.
# The position index is needed b/c once the pedonGDBList begins to be populated a
# table cannot be looked up.
# Paramaters
# pedonDBloc - Catalog path of the pedon File Geodatabase that was create to store pedon data.
# This FGDB must contain the Metadata Table which will be used to retrieve alias names
# and physical table names
# Returns
# This function returns 2 dictionaries (Description above). If anything goes wrong the function will
# return False,False and the script will eventually exit.
try:
arcpy.SetProgressorLabel("Gathering Metadata Table and Field Information")
AddMsgAndPrint(".\nGathering Metadata Table and Field Information")
# Open Metadata table containing information for other pedon tables
theMDTable = pedonDBloc + os.sep + prefix + "MetadataTable"
# Establishes a cursor for searching through field rows. A search cursor can be used to retrieve rows.
# This method will return an enumeration object that will, in turn, hand out row objects
if not arcpy.Exists(theMDTable):
AddMsgAndPrint(".\n\t" + theMDTable + " doesn't Exist",2)
return False,False
# Establish a list of tables to get field aliases for
arcpy.env.workspace = pedonDBloc
tableList = arcpy.ListTables("*")
tableList.append(prefix + "pedon")
#nameOfFields = ["TablePhysicalName","TableLabel"]
nameOfFields = ["tabphynm","tablab"]
# Initiate 2 Dictionaries
tableInfoDict = dict()
emptyPedonGDBtablesDict = dict()
with arcpy.da.SearchCursor(theMDTable,nameOfFields) as cursor:
for row in cursor:
physicalName = prefix + row[0] # Physical name of table
aliasName = row[1] # Alias name of table
tblPath = f"{pedonDBloc}\\{physicalName}"
if physicalName.find(prefix + 'Metadata') > -1: continue
if physicalName in tableInfoDict: continue
# The metadata table has more tables than what is in the DB template. Only
# Gather field information for those tables in the DB.
if physicalName in tableList:
uniqueFields = [f.name for f in arcpy.ListFields(tblPath) if not f.name.lower() in ('objectid','oid','geometry','fid','shape')]
numOfValidFlds = len(uniqueFields)
# Add 2 more fields to the pedon table for X,Y
if physicalName == prefix + 'pedon':
numOfValidFlds += 2
# Siteaoverlap table has 9 fields instead of 6 b/c areaiidref is actually
# a NASIS client placeholder for areatypename, areasymbol, areaname, areaiidref
if physicalName == prefix + 'siteaoverlap':
numOfValidFlds = 9
# i.e. {phtexture:'Pedon Horizon Texture',phtexture}; will create a one-to-many dictionary
# As long as the physical name doesn't exist in dict() add physical name
# as Key and alias as Value.
if not physicalName in tableInfoDict:
tableInfoDict[physicalName] = [aliasName,numOfValidFlds]
emptyPedonGDBtablesDict[physicalName] = []
del uniqueFields;numOfValidFlds
arcpy.SetProgressorLabel('')
return emptyPedonGDBtablesDict,tableInfoDict
except:
AddMsgAndPrint("Unhandled exception (createReferenceObject)", 2)
errorMsg()
return False, False
## ===============================================================================================================
def parsePedonsIntoLists():
""" This function will parse pedons into manageable chunks that will be sent to the 2nd URL report.
There is an inherent URL character limit of 2,083. The report URL is 123 characters long which leaves 1,960 characters
available. I arbitrarily chose to have a max URL of 1,860 characters long to avoid problems. Most pedonIDs are about
6 characters. This would mean an average max request of 265 pedons at a time.
This function returns a list of pedon lists"""
#1860 = 265
try:
arcpy.SetProgressorLabel("Determining the number of requests to send the server")
# Total Count
i = 1
listOfPedonStrings = list() # List containing pedonIDstring lists; individual lists are comprised of about 265 pedons
pedonIDstr = "" # concatenated string of pedonIDs
for pedonID in pedonDict:
# End of pedon dictionary has been reached
if i == len(pedonDict):
pedonIDstr = pedonIDstr + str(pedonID)
listOfPedonStrings.append(pedonIDstr)
# End of pedon list NOT reached
else:
# Max URL length reached - retrieve pedon data and start over
if len(pedonIDstr) > 1866:
pedonIDstr = pedonIDstr + str(pedonID)
listOfPedonStrings.append(pedonIDstr)
## reset the pedon ID string to empty
pedonIDstr = ""
i+=1
# concatenate pedonID to string and continue
else:
pedonIDstr = pedonIDstr + str(pedonID) + ",";i+=1
numOfPedonStrings = len(listOfPedonStrings) # Number of unique requests that will be sent
if not numOfPedonStrings:
AddMsgAndPrint("\n\t Something Happened here.....WTF!",2)
exit()
else:
return listOfPedonStrings,numOfPedonStrings
except:
AddMsgAndPrint("Unhandled exception (createFGDB)", 2)
errorMsg()
exit()
## ================================================================================================================
def organizeFutureInstanceIntoPedonDict(futureObject):
# Description:
# This function will take in a "future" object representing the execution of the
# ThreadPoolExecutor callable. In this case, the future object represents
# the content of pedon Horizon information for a list of pedon IDs. The content
# will be organized it into a dictionary (pedonDBDict) whose schema follows
# NASIS 7.3 pedon schema.
# Parameters
# future object - Encapsulates the asynchronous execution of a callable.
# Future instances are created by Executor.submit()
# Returns
# True if the data was organized correctly
# False if the object was empty or there was an error.
# To view a sample output report go to:
# https://nasis.sc.egov.usda.gov/NasisReportsWebSite/limsreport.aspx?report_name=WEB_AnalysisPC_MAIN_URL_EXPORT&pedonid_list=14542
try:
theReport = futureObject
# There was an obvious error in opening the URL in the openURL function
if theReport == None:
return None
invalidTable = 0 # represents tables that don't correspond with the GDB
invalidRecord = 0 # represents records that were not added
validRecord = 0
bHeader = False # flag indicating if value is html junk
currentTable = "" # The table found in the report
numOfFields = "" # The number of fields a specific table should contain
partialValue = "" # variable containing part of a value that is not complete
originalValue = "" # variable containing the original incomplete value
bPartialValue = False # flag indicating if value is incomplete; append next record
""" ------------------- Begin Adding data from URL into a dictionary of lists ---------------"""
for theValue in theReport:
# convert from bytes to string and remove white spaces
theValue = theValue.decode('utf-8').strip()
# represents the start of valid table; Typically Line #19
if theValue.find('@begin') > -1:
theTable = prefix + theValue[theValue.find('@') + 7:] ## Isolate the table
numOfFields = tableFldDict[theTable][1]
# Check if the table name exists in the list of dictionaries
# if so, set the currentTable variable and bHeader
if theTable in pedonDBtablesDict:
currentTable = theTable
bHeader = True ## Next line will be the header
else:
AddMsgAndPrint("\t" + theTable + " Does not exist in the FGDB schema! Figure this out Jason Nemecek!",2)
invalidTable += 1
# end of the previous table has been reached; reset currentTable
elif theValue.find('@end') > -1:
currentTable = ""
bHeader = False
# represents header line; skip this line
elif bHeader:
bHeader = False
# this is a valid record that should be collected
elif not bHeader and currentTable:
numOfValues = len(theValue.split('|'))
# Add the record to its designated list within the dictionary
# Do not remove the double quotes b/c doing so converts the object
# to a list which increases its object size. Remove quotes before
# inserting into table
# this should represent the 2nd half of a valid value
if bPartialValue:
partialValue += theValue # append this record to the previous record
# This value completed the previous value
if len(partialValue.split('|')) == numOfFields:
pedonDBtablesDict[currentTable].append(partialValue)
validRecord += 1
bPartialValue = False
partialValue,originalValue = "",""
# appending this value still falls short of number of possible fields
# add another record; this would be the 3rd record appended and may
# exceed number of values.
elif len(partialValue.split('|')) < numOfFields:
arcpy.SetProgressorPosition()
continue
# appending this value exceeded the number of possible fields
else:
AddMsgAndPrint("\t\tIncorrectly formatted Record Found in " + currentTable + " table:",2)
AddMsgAndPrint("\t\t\tRecord should have " + str(numOfFields) + " values but has " + str(len(partialValue.split('|'))),2)
AddMsgAndPrint("\t\t\tOriginal Record: " + originalValue,2)
AddMsgAndPrint("\t\t\tAppended Record: " + partialValue,2)
invalidRecord += 1
bPartialValue = False
partialValue,originalValue = ""
# number of values do not equal the number of fields in the corresponding tables
elif numOfValues != numOfFields:
# number of values exceed the number of fields; Big Error
if numOfValues > numOfFields:
AddMsgAndPrint("\n\t\tIncorrectly formatted Record Found in " + currentTable + " table:",2)
AddMsgAndPrint("\t\t\tRecord should have " + str(numOfFields) + " values but has " + str(numOfValues),2)
AddMsgAndPrint("\t\t\tRecord: " + theValue,2)
invalidRecord += 1
# number of values falls short of the number of correct fields
else:
partialValue,originalValue = theValue,theValue
bPartialValue = True
else:
pedonDBtablesDict[currentTable].append(theValue)
validRecord += 1
bPartialValue = False
partialValue = ""
elif theValue.find("ERROR") > -1:
AddMsgAndPrint("\n\t\t" + theValue[theValue.find("ERROR"):],2)
return False
else:
invalidRecord += 1
if not validRecord:
AddMsgAndPrint("\t\tThere were no valid records captured from NASIS request",2)
return False
# Report any invalid tables found in report; This should take care of itself as Jason perfects the report.
if invalidTable and invalidRecord:
AddMsgAndPrint("\t\tThere were " + splitThousands(invalidTable) + " invalid table(s) included in the report with " + splitThousands(invalidRecord) + " invalid record(s)",1)
# Report any invalid records found in report; There are 27 html lines reserved for headers and footers
if invalidRecord > 28:
AddMsgAndPrint("\t\tThere were " + splitThousands(invalidRecord) + " invalid record(s) not captured",1)
return True
except:
errorMsg()
return False
## ================================================================================================================
def importPedonData(tableInfoDict,verbose=False):
""" This function will purge the contents from the pedonDBtablesDict dictionary which contains all of the pedon
data into the pedon FGDB. Depending on the number of pedons in the user's AOI, this function will be
used multiple times. The pedonDBtablesDict dictionary could possilbly allocate all of the computer's
memory so a fail-safe was built in to make sure a memory exception error wasn't encountered. This
function is invoked when approximately 40,000 pedons have been retrieved from the server and stored in \
memory."""
try:
if verbose: AddMsgAndPrint("\nImporting Pedon Data into FGDB")
arcpy.SetProgressorLabel("Importing Pedon Data into FGDB")
# use the tableInfoDict so that tables are imported in alphabetical order
tblKeys = tableInfoDict.keys()
maxCharTable = max([len(table) for table in tblKeys]) + 1
maxCharAlias = max([len(value[1][0]) for value in tableInfoDict.items()])
firstTab = (maxCharTable - len("Table Physical Name")) * " "
headerName = "\n\tTable Physical Name" + firstTab + "Table Alias Name"
if verbose: AddMsgAndPrint(headerName,0)
if verbose: AddMsgAndPrint("\t" + len(headerName) * "=",0)
tblKeys = dict(sorted(tableFldDict.items(), key=lambda item: item[0]))
""" ---------------------------------------------------"""
arcpy.SetProgressor("step","Importing Pedon Data into FGDB table: ",0,len(tblKeys),1)
for table in tblKeys:
arcpy.SetProgressorLabel("Importing Pedon Data into FGDB: " + table)
arcpy.SetProgressorPosition()
# Skip any Metadata files
if table.find(prefix + 'Metadata') > -1: continue
# Capture the alias name of the table
aliasName = tableInfoDict[table][0]
# Strictly for standardizing reporting
firstTab = (maxCharTable - len(table)) * " "
# check if list contains records to be added
if len(pedonDBtablesDict[table]):
numOfRowsAdded = 0
GDBtable = pedonDB + os.sep + table # FGDB Pyhsical table path
""" -------------------------------- Collect field information -----------------------"""
''' For the current table, get the field length if the field is a string. I do this b/c
the actual value may exceed the field length and error out as has happened in SSURGO. If
the value does exceed the field length then the value will be truncated to the max length
of the field '''
# Put all the field names in a list
fieldList = arcpy.Describe(GDBtable).fields
nameOfFields = []
fldLengths = []
for field in fieldList:
# Skip Object ID field Shape field (only for site)
if not field.type.lower() in ("oid","geometry"):
nameOfFields.append(field.name)
if field.type.lower() == "string":
fldLengths.append(field.length)
else:
fldLengths.append(0)
# Add a new field at the end called 'labsampleIndicator' to indicate whether
# record is a LAB pedon. Addd XY token to list
if table == prefix + 'pedon':
# Pedon feature class will have X,Y geometry added; Add XY token to list
nameOfFields.append('SHAPE@XY')
fldLengths.append(0) # X coord
fldLengths.append(0) # Y coord
""" -------------------------------- Insert Rows ------------------------------------------
Iterate through every value from a specific table in the pedonDBtablesDict dictary
and add it to the appropriate FGDB table Truncate the value if it exceeds the
max number of characters. Set the value to 'None' if it is an empty string."""
# Initiate the insert cursor object using all of the fields
cursor = arcpy.da.InsertCursor(GDBtable,nameOfFields)
recNum = 0
# '"S1962WI025001","43","15","9","North","89","7","56","West",,"Dane County, Wisconsin. 100 yards south of road."'
for rec in pedonDBtablesDict[table]:
newRow = list() # list containing the values that will populate a new row
fldNo = 0 # list position to reference the field lengths in order to compare
for value in rec.replace('"','').split('|'):
value = value.strip()
fldLen = fldLengths[fldNo]
if value == '' or value == 'NULL': ## Empty String
value = None
elif fldLen > 0: ## record is a string, truncate it
value = value[0:fldLen]
else: ## record is a number, keep it
value = value
newRow.append(value)
fldNo += 1
del value, fldLen
# Add XY coordinates to the pedon point feature class.
if table == prefix + 'pedon':
try:
xValue = float(newRow[-1]) # Long
yValue = float(newRow[-2]) # Lat
except:
xValue = 0.00
yValue = 90.0
#santaPedons+=1
# remove the X,Y coords from the newRow list b/c X,Y
# fields don't exist in the pedon Table
newRow = newRow[:-2]
newRow.append((xValue,yValue))
del xValue,yValue
try:
cursor.insertRow(newRow)
numOfRowsAdded += 1;recNum += 1
except arcpy.ExecuteError:
AddMsgAndPrint("\n\tError in :" + table + " table: Field No: " + str(fldNo) + " : " + str(rec),2)
AddMsgAndPrint("\n\t" + arcpy.GetMessages(2),2)
break
except:
AddMsgAndPrint("\n\tError in: " + table + " table")
AddMsgAndPrint("\tNumber of Fields in GDB: " + str(len(nameOfFields)))
AddMsgAndPrint("\tNumber of fields in report: " + str(len([rec.split('|')][0])))
errorMsg()
break
del newRow,fldNo
# Report the # of records added to the table
## if bAliasName:
secondTab = (maxCharAlias - len(aliasName)) * " "
if verbose: AddMsgAndPrint("\t" + table + firstTab + aliasName + secondTab + " Records Added: " + splitThousands(numOfRowsAdded))
## else:
## if verbose: AddMsgAndPrint("\t" + table + firstTab + " Records Added: " + splitThousands(numOfRowsAdded),1)
del numOfRowsAdded,GDBtable,fieldList,nameOfFields,fldLengths,cursor
# Table had no records; still print it out
else:
secondTab = (maxCharAlias - len(aliasName)) * " "
if verbose: AddMsgAndPrint("\t" + table + firstTab + aliasName + secondTab + " Records Added: 0")
#Resets the progressor back to its initial state
arcpy.ResetProgressor()
return True
except arcpy.ExecuteError:
AddMsgAndPrint(arcpy.GetMessages(2),2)
return False
except:
errorMsg()
return False
## ================================================================================================================
def getObjectSize(obj, handlers={}, verbose=False):
""" Returns the approximate memory footprint an object and all of its contents.
Automatically finds the contents of the following builtin containers and
their subclasses: tuple, list, deque, dict, set and frozenset.
To search other containers, add handlers to iterate over their contents:
handlers = {SomeContainerClass: iter,
OtherContainerClass: OtherContainerClass.get_elements}
"""
try:
# lamda function to iterate through a dictionary
dict_handler = lambda d: chain.from_iterable(d.items())
# Use the following lines if you want to determine the size for ANY object
## all_handlers = {tuple: iter,
## list: iter,
## deque: iter,
## dict: dict_handler,
## set: iter,