-
Notifications
You must be signed in to change notification settings - Fork 29
/
Copy pathical2org.awk
executable file
·677 lines (582 loc) · 21.8 KB
/
ical2org.awk
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
#!/usr/bin/env -S gawk -f
# awk script for converting an iCal formatted file to a sequence of org-mode headings.
# this may not work in general but seems to work for day and timed events from Google's
# calendar, which is really all I need right now...
#
# usage:
# awk -f THISFILE < icalinputfile.ics > orgmodeentries.org
#
# Note: change org meta information generated below for author and
# email entries!
#
# Caveats:
#
# - date entries with no time specified are assumed to be local time zone;
# same remark for date entries that do have a time but do not end with Z
# e.g.: 20130101T123456 is local and will be kept as 2013-01-01 12:34
# where 20130223T123422Z is UTC and will be corrected appropriately
#
# - UTC times are changed into local times, using the time zone of the
# computer that runs the script; it would be very hard in an awk script
# to respect the time zone of a file belonging to another time zone:
# the offsets will be different as well as the switchover time(s);
# (consider a remote shell to a computer with the file's time zone)
#
# - the UTC conversion entirely relies on the built-in strftime method;
# the author is not responsible for any erroneous conversions nor the
# consequence of such conversions
#
# - does process RRULE recurring events, but ignores COUNT specifiers
#
# - does not process EXDATE to exclude date(s) from recurring events
#
# Eric S Fraga
# 20100629 - initial version
# 20100708 - added end times to timed events
# - adjust times according to time zone information
# - fixed incorrect transfer for entries with ":" embedded within the text
# - added support for multi-line summary entries (which become headlines)
# 20100709 - incorporated time zone identification
# - fixed processing of continuation lines as Google seems to
# have changed, in the last day, the number of spaces at
# the start of the line for each continuation...
# - remove backslashes used to protect commas in iCal text entries
# no further revision log after this as the file was moved into a git
# repository...
#
# Updated by: Guido Van Hoecke <guivhoATgmailDOTcom>
# Last change: 2013.05.26 14:28:33
#----------------------------------------------------------------------------------
BEGIN {
### config section
attending_types["UNSET"] = 0;
attending_types["ATTENDING"] = 1;
attending_types["NEEDS_ACTION"] = 2;
attending_types["NOT_ATTENDING"] = 3;
attending_types[0] = "UNSET";
attending_types[1] = "ATTENDING";
attending_types[2] = "NEEDS_ACTION";
attending_types[3] = "NOT_ATTENDING";
# map of UIDS for duplicate checking -- sometimes the same id comes down
# with multiple VEVENTS
UIDS[0];
# map of people attending a given event
people_attending[0];
# maximum age in days for entries to be output: set this to -1 to
# get all entries or to N>0 to only get enties that start or end
# less than N days ago
max_age = 7;
# set to 1 or 0 to yes or not output a header block with TITLE,
# AUTHOR, EMAIL etc...
header = 1;
# set to 1 or 0 to yes or not output the original ical preamble as
# comment
preamble = 1;
# set to 1 to output time and summary as one line starting with
# the time (value 1) or to 0 to output the summary as first line
# and the date and time info as a later line (after the property
# drawer or org complains)
condense = 0;
# set to 1 or 0 to yes or not output the original ical entry as a
# comment (mostly useful for debugging purposes)
original = 1;
# google truncates long subjects with ... which is misleading in
# an org file: it gives the unfortunate impression that an
# expanded entry is still collapsed; value 1 will trim those
# ... and value 0 doesn't touch them
trimdots = 1;
# change this to your name
author = ENVIRON["AUTHOR"] != "" ? ENVIRON["AUTHOR"] : "Marc Sherry"
# and to your email address
emailaddress = ENVIRON["EMAIL"] != "" ? ENVIRON["EMAIL"] : "unknown"
# main title of the Org file
title = ENVIRON["TITLE"] != "" ? ENVIRON["TITLE"] : "Main Google calendar entries"
# calendar/category name for display in org-mode
calendarname = ENVIRON["CALENDAR"] != "" ? ENVIRON["CALENDAR"] : "unknown"
# any tags for this calendar (e.g. "WORK" or "PERSONAL")
filetags = ENVIRON["FILETAGS"] != "" ? ENVIRON["FILETAGS"] : "unknown"
# timezone offsets
"date +%z" | getline local_tz_offset
close("date +%z")
local_tz_offset = parse_timezone_offset(local_tz_offset)
### end config section
# use a colon to separate the type of data line from the actual contents
FS = ":";
# we only need to preserve the original entry lines if either the
# preamble or original options are true
preserve = preamble || original
first = 1; # true until an event has been found
max_age_seconds = max_age*24*60*60
if (header) {
print "#+TITLE: ", title
print "#+AUTHOR: ", author
print "#+EMAIL: ", emailaddress
print "#+DESCRIPTION: converted using the ical2org awk script"
print "#+CATEGORY: ", calendarname
print "#+STARTUP: hidestars"
print "#+STARTUP: overview"
print "#+FILETAGS: ", filetags
print ""
}
}
# continuation lines (at least from Google) start with a space. If the
# continuation is after a processed field (description, summary, attendee,
# etc.) append the entry to the respective variable
/^[ ]/ {
if (indescription) {
entry = entry gensub("\r", "", "g", gensub("^[ ]", "", 1, $0));
# print "entry continuation: " entry
} else if (insummary) {
summary = summary gensub("\r", "", "g", gensub("^[ ]", "", 1, $0))
# print "summary continuation: " summary
} else if (inattendee) {
attendee = attendee gensub("\r", "", "g", gensub("^[ ]", "", 1, $0))
# print "attendee continuation: " attendee
are_we_going(attendee)
add_attendee(attendee)
} else if (inlocation) {
location = location unescape(gensub("\r", "", "g", $0), 0);
}
if (preserve)
icalentry = icalentry "\n" $0
}
/^BEGIN:VEVENT/ {
# start of an event: initialize global values used for each event
date = "";
entry = ""
headline = ""
icalentry = "" # the full entry for inspection
id = ""
indescription = 0;
insummary = 0
inattendee = 0
inlocation = 0
in_alarm = 0
got_end_date = 0
attending = attending_types["UNSET"];
# http://unix.stackexchange.com/a/147958/129055
intfreq = "" # the interval and frequency for repeating org timestamps
lasttimestamp = -1;
location = ""
rrend = ""
status = ""
summary = ""
attendee = ""
delete people_attending;
# if this is the first event, output the preamble from the iCal file
if (first) {
if(preamble) {
print "* COMMENT original iCal preamble"
print gensub("\r", "", "g", icalentry)
}
if (preserve)
icalentry = ""
first = 0;
}
}
# any line that starts at the left with a non-space character is a new data field
/^BEGIN:VALARM/ {
# alarms have their own UID, DESCRIPTION, etc. We don't want these polluting the real fields
in_alarm = 1
}
/^END:VALARM/ {
in_alarm = 0
}
/^[A-Z]/ {
# we do not copy DTSTAMP lines as they change every time you download
# the iCal format file which leads to a change in the converted
# org file as I output the original input. This change, which is
# really content free, makes a revision control system update the
# repository and confuses.
if (preserve)
if (! index("DTSTAMP", $1))
icalentry = icalentry "\n" $0
# this line terminates the collection of description and summary entries
indescription = 0;
insummary = 0;
inattendee = 0;
}
# this type of entry represents a day entry, not timed, with date stamp YYYYMMDD
/^DTSTART;VALUE=DATE[^-]/ {
date = datestring($2);
}
/^DTEND;VALUE=DATE[^-]/ {
got_end_date = 1
end_date = datestring($2, 1);
if ( issameday )
end_date = ""
}
# this represents a timed entry with date and time stamp YYYYMMDDTHHMMSS
# we ignore the seconds
/^DTSTART[:;][^V]/ {
tz = "";
match($0, /TZID=([^:]*)/, a)
{
tz = a[1];
}
tz_offset = get_timezone_offset(tz)
offset = local_tz_offset - tz_offset
date = datetimestring($2, offset);
# print date;
if (date != "" && got_end_date) {
fix_date_time()
}
}
# and the same for the end date;
/^DTEND[:;][^V]/ {
# NOTE: this doesn't necessarily appear after DTSTART
tz = "";
match($0, /TZID=([^:]*)/, a)
{
tz = a[1];
}
tz_offset = get_timezone_offset(tz)
offset = local_tz_offset - tz_offset
end_date = datetimestring($2, offset);
got_end_date = 1
if (date != "" && got_end_date) {
# We got start and end date/time, let's munge as appropriate
fix_date_time()
}
}
# this represents a timed entry with a UTC datetime stamp YYYYMMDDTHHMMSSZ
# we ignore the seconds
/^DTSTART[:;]VALUE=DATE-TIME/ {
tz = "";
offset = local_tz_offset
date = datetimestring($2, offset);
# print date;
if (date != "" && got_end_date) {
fix_date_time()
}
}
# and the same for the end date;
/^DTEND[:;]VALUE=DATE-TIME/ {
# NOTE: this doesn't necessarily appear after DTSTART
tz = "";
offset = local_tz_offset
end_date = datetimestring($2, offset);
got_end_date = 1
if (date != "" && got_end_date) {
# We got start and end date/time, let's munge as appropriate
fix_date_time()
}
}
# repetition rule
/^RRULE:FREQ=(DAILY|WEEKLY|MONTHLY|YEARLY)/ {
# TODO: handle BYDAY values for events that repeat weekly for multiple days
# (e.g. a "Gym" event)
# get the d, w, m or y value
freq = tolower(gensub(/.*FREQ=(.).*/, "\\1", 1, $0))
# get the interval, and use 1 if none specified
interval = $2 ~ /INTERVAL=/ ? gensub(/.*INTERVAL=([0-9]+);.*/, "\\1", 1, $2) : 1
# get the enddate of the rule and use "" if none specified
rrend = $2 ~ /UNTIL=/ ? datestring(gensub(/.*UNTIL=([0-9]{8}).*/, "\\1", 1, $2)) : ""
rrend_raw = $2 ~ /UNTIL=/ ? gensub(/.*UNTIL=([0-9]{8}).*/, "\\1", 1, $2) : ""
repeat_count = $2 ~ /COUNT=/ ? gensub(/.*COUNT=([0-9]+).*/, "\\1", 1, $2) : ""
# build the repetitor vale as understood by org
intfreq = " +" interval freq
# if the repetition is daily, and there is an end date, drop the repetitor
# as that is the default
if (intfreq == " +1d" && end_date == "" && rrend != "")
intfreq = ""
now = strftime("%Y%m%dT%H%M%SZ")
if (rrend_raw != "" && rrend_raw < now)
intfreq = ""
if (repeat_count != "") # TODO: count repeats correctly
intfreq = ""
}
# The description will the contents of the entry in org-mode.
# this line may be continued.
/^DESCRIPTION/ {
if (!in_alarm) {
# Setting $1 to "" clears colons from items like "1:1 with Marc", so we
# strip "DESCRIPTION:" off of the front instead
# $1 = "";
entry = entry gensub("\r", "", "g", gensub(/^DESCRIPTION:/, "", 1, $0));
indescription = 1;
}
}
# the summary will be the org heading
/^SUMMARY/ {
# Setting $1 to "" clears colons from items like "1:1 with Marc", so we
# strip "SUMMARY:" off of the front instead
if (!in_alarm) {
summary = gensub("\r", "", "g", gensub(/^SUMMARY:/, "", 1, $0));
# trim trailing dots if requested by config option
if(trimdots && summary ~ /\.\.\.$/)
sub(/\.\.\.$/, "", summary)
insummary = 1;
# print "Summary: " summary
}
}
# the unique ID will be stored as a property of the entry
/^UID/ {
if (!in_alarm) {
id = gensub("\r", "", "g", $2);
}
}
/^LOCATION/ {
location = unescape(gensub("\r", "", "g", $2), 0);
inlocation = 1;
# print "Location: " location
}
/^STATUS/ {
status = gensub("\r", "", "g", $2);
# print "Status: " status
}
/^ATTENDEE/ {
attendee = gensub("\r", "", "g", $0);
inattendee = 1;
# print "Attendee: " attendee
}
# when we reach the end of the event line, we output everything we
# have collected so far, creating a top level org headline with the
# date/time stamp, unique ID property and the contents, if any
/^END:VEVENT/ {
#output event
# print "max_age: " max_age
# print "lasttimestamp: " lasttimestamp
# print "lasttimestamp+max_age_seconds: " lasttimestamp+max_age_seconds
# print "systime(): " systime()
is_duplicate = (id in UIDS);
if(is_duplicate == 0 && (max_age<0 || intfreq != "" || ( lasttimestamp>0 && systime()<lasttimestamp+max_age_seconds )) )
{
if (attending != attending_types["NOT_ATTENDING"]) {
# build org timestamp
if (intfreq != "")
date = date intfreq
# TODO: http://orgmode.org/worg/org-faq.html#org-diary-class
else if (end_date != "")
date = date ">--<" end_date
else if (rrend != "")
date = date ">--<" rrend
# translate \n sequences to actual newlines and unprotect commas (,)
if (condense)
print "* <" date "> " gensub("^[ ]+", "", "", unescape(summary, 0))
else
print "* " gensub("^[ ]+", "", "g", unescape(summary, 0))
print " :PROPERTIES:"
print " :ID: " id
if(length(location))
print " :LOCATION: " location
if(length(status))
print " :STATUS: " status
attending_string = attending_types[attending]
if(attending_string == "UNSET")
# No attending info at all -- assume this is an event we
# created to block off our calendar, with no attendees, and
# mark it as attending
attending_string = "ATTENDING"
print " :ATTENDING: " attending_string
print " :ATTENDEES: " join_keys(people_attending)
print " :END:"
if (date2 != "")
{
# Fake some logbook entries so we can generate a clock report
print " :LOGBOOK:"
print " CLOCK: [" date1 "]--[" date2 "] => " "0:00"
print " :END"
}
if (!condense)
print "<" date ">"
print ""
if(length(entry)>1)
print gensub("^[ ]+", "", "g", unescape(entry, 1));
# output original entry if requested by 'original' config option
if (original)
print "** COMMENT original iCal entry\n", gensub("\r", "", "g", icalentry)
}
UIDS[id] = 1;
}
}
# Join keys in an array, return a string
function join_keys(input)
{
joined = "";
first_key = 1;
for (key in input)
{
if (first_key != 1)
joined = joined ", "
joined = joined key
first_key = 0;
}
return joined;
}
# unescape commas, newlines, etc. newlines are optionally converted to just
# spaces -- it's good to preserve them in descriptions for e.g. interview
# calendar events, but addresses look better with spaces as more info fits on a
# line
function unescape(input, preserve_newlines)
{
ret = gensub("\\\\,", ",", "g",
gensub("\\\\;", ";", "g", input))
if (preserve_newlines)
ret = gensub("\\\\n", "\n", "g", ret)
else
ret = gensub("\\\\n", " ", "g", ret)
return ret
# return gensub("\\\\,", ",", "g",
# gensub("\\\\n", " ", "g",
# gensub("\\\\;", ";", "g", input)))
}
# function to parse a timezone offset to minutes minutes
function parse_timezone_offset(offset_string) {
hours = substr(offset_string, 2, 2) * 60;
minutes = substr(offset_string, 4, 2);
total_offset = hours + minutes;
if (substr(offset_string, 1, 1) == "-") {
total_offset = -total_offset;
}
return total_offset;
}
# Get timezone offset for a given timezone
function get_timezone_offset(tz) {
# Construct a command to get the timezone offset for 'tz'
cmd = "TZ=\"" tz "\" date +%z"
# Run the command and read the output
cmd | getline tz_offset
close(cmd)
return parse_timezone_offset(tz_offset)
}
# funtion to convert an iCal time string 'yyyymmddThhmmss[Z]' into a
# date time string as used by org, preferably including the short day
# of week: 'yyyy-mm-dd day hh:mm' or 'yyyy-mm-dd hh:mm' if we cannot
# define the day of the week
function datetimestring(input, offset)
{
# print "________"
# print "input : " input
# convert the iCal Date+Time entry to a format that mktime can understand
spec = match(input, "([0-9]{4})([0-9]{2})([0-9]{2})T([0-9]{2})([0-9]{2})([0-9]{2}).*[\r]*", a);
year = a[1]
month = a[2]
day = a[3]
hour = a[4]
min = a[5]
sec = a[6]
# print "spec :" spec
# print "input: " input
# print "datetime: " year" "month" "day" "hour" "min" "sec
stamp = mktime(year" "month" "day" "hour" "min" "sec);
lasttimestamp = stamp;
if (stamp <= 0) {
# this is a date before the start of the epoch, so we cannot
# use strftime and will deliver a 'yyyy-mm-dd hh:mm' string
# without day of week; this assumes local time, and does not
# attempt UTC offset correction
spec = gensub("([0-9]{4})([0-9]{2})([0-9]{2})T([0-9]{2})([0-9]{2})([0-9]{2}).*[\r]*", "\\1-\\2-\\3 \\4:\\5", "g", input);
# print "==> spec:" spec;
return spec;
}
stamp = stamp + offset * 60;
return strftime("%Y-%m-%d %a %H:%M", stamp);
}
# function to convert an iCal date into an org date;
# the optional parameter indicates whether this is an end date;
# for single or multiple whole day events, the end date given by
# iCal is the date of the first day after the event;
# if the optional 'isenddate' parameter is non zero, this function
# tries to reduce the given date by one day
function datestring(input, isenddate)
{
#convert the iCal string to a an mktime input string
spec = gensub("([0-9]{4})([0-9]{2})([0-9]{2}).*[\r]*", "\\1 \\2 \\3 00 00 00", "g", input);
# compute the nr of seconds after or before the epoch
# dates before the epoch will have a negative timestamp
# days after the epoch will have a positive timestamp
stamp = mktime(spec);
if (isenddate) {
# subtract 1 day from the timestamp
# note that this also works for dates before the epoch
stamp = stamp - 86400;
# register whether the end date is same as the start date
issameday = lasttimestamp == stamp
}
# save timestamp to allow for check of max_age
lasttimestamp = stamp
if (stamp < 0) {
# this date is before the epoch;
# the returned datestring will not have the short day of week string
# as strftime does not handle negative times;
# we have to construct the datestring directly from the input
if (isenddate) {
# we really should return the date before the input date, but strftime
# does not work with negative timestamp values; so we can not use it
# to obtain the string representation of the corrected timestamp;
# we have to return the date specified in the iCal input and we
# add time 00:00 to clarify this
return spec = gensub("([0-9]{4})([0-9]{2})([0-9]{2}).*[\r]*", "\\1-\\2-\\3 00:00", "g", input);
} else {
# just generate the desired representation of the input date, without time;
return gensub("([0-9]{4})([0-9]{2})([0-9]{2}).*[\r]*", "\\1-\\2-\\3", "g", input);
}
}
# return the date and day of week
return strftime("%Y-%m-%d %a", stamp);
}
# Add the current attendee's response to a set, so we can list who's going
# and who's declined
function add_attendee(attendee)
{
match(attendee, /CN=([^;]+)/, m)
{
CN = tolower(m[1]);
people_attending[CN] = 1;
}
}
function fix_date_time()
{
if (substr(date,1,10) == substr(end_date,1,10)) {
# timespan within same date, use one date with a time range, but preserve
# original dates for org-clocktable
date1 = date
date2 = end_date
date = date "-" substr(end_date, length(end_date)-4)
end_date = ""
}
}
# Parse the current ATTENDEE line and see if it belongs to us. If so, check if
# we've accepted this calendar invite, and if so, set `attending` to True. It
# may be the case that there are no attendees (e.g. personal calendar items),
# and if that's the case, we'll leave `attending` unset. If there are attendees,
# we'll parse our status out and set `attending` appropriately.
function are_we_going(attendee)
{
if (attending != attending_types["UNSET"])
{
# print "Bailing out early, attending is " attending
return;
}
match(attendee, /CN=([^;]+)/, m)
{
# CN's can optionally be surrounded by quotes (google calendar download
# omits, apple calendar export includes them)
CN = gensub("\"", "", "g", tolower(m[1]));
# TODO: no hardcoding
if (CN == tolower(author) || CN == tolower(emailaddress))
{
# This is us -- did we accept the meeting?
if (attendee ~ /PARTSTAT=ACCEPTED/)
{
attending = attending_types["ATTENDING"];
}
else if (attendee ~ /PARTSTAT=NEEDS-ACTION/)
{
attending = attending_types["NEEDS_ACTION"];
}
else {
attending = attending_types["NOT_ATTENDING"];
}
}
}
# print "are_we_going: " attending
}
# Local Variables:
# time-stamp-line-limit: 1000
# time-stamp-format: "%04y.%02m.%02d %02H:%02M:%02S"
# time-stamp-active: t
# time-stamp-start: "Last change:[ \t]+"
# time-stamp-end: "$"
# End: