-
Notifications
You must be signed in to change notification settings - Fork 0
/
mw_pather.hs
552 lines (490 loc) · 22.4 KB
/
mw_pather.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
{-
This is a combination program and extremely verbose Haste
( http://haste-lang.org/ ) tutorial.
The program is a pathfinder for locations in the game Morrowind.
Specifically:
- on the server/ghc side, it scrapes the
https://uesp.net/wiki/Main_Page wiki for all the travel
information for the towns and cities.
- on the web/haste side, it retrieves and presents that data, lets
the user pick a start and destiation, and shows the shortest
path between them
The comments here are me clarifying to myself my understanding of
the various parts of this program, in particular Haste and its
type system.
-}
{-# LANGUAGE CPP #-}
module Main where
import Haste.App
import Haste.DOM
import Haste.Events
import Data.List
import Control.Applicative
import Text.Printf
import Debug.Trace
-- import Text.Regex.PCRE
import qualified Data.Set as Set
import Data.IORef
#ifndef __HASTE__
import Data.String.Utils
import Network.HTTP.Conduit hiding (Connection)
import Text.HTML.TagSoup
import qualified Data.ByteString.Lazy.Char8 as Char8
#endif
{-
This is just a tracing function with log levels that I use; you
can ignore it unless you're especially interested in that sort of
thing. This version only works in monadic contexts
-}
data TraceLevel = Crazy | Debug | Info | Error deriving (Show, Eq, Ord)
traceLevel = Crazy
rlpTraceM :: (Monad m) => TraceLevel -> String -> m ()
rlpTraceM level msg
| level >= traceLevel = traceM msg
| otherwise = return ()
type LocationName = String
type ConnectionType = String
{-
This type describes a link between two in-game locations; we use
these to build paths between two points, and we present the
shortest to the user. The ConnectionType is the type of travel
(boat, giant bug, various kinds of teleportation, etc).
-}
data Connection = Connection {
origin :: LocationName
, destination :: LocationName
, ctype :: ConnectionType
} deriving (Eq, Ord)
-- Pretty printing.
instance Show Connection where
show (Connection _ sdest "None") = printf "You start in %s." sdest
show (Connection sorigin sdestination sctype) = printf "Go from %s to %s by %s." sorigin sdestination sctype
-- show (Connection sorigin sdestination sctype) = printf "/ %s--%s--%s /" sorigin sdestination sctype
-- This type describes a path from one game location to another;
-- it's what we return to the user.
data Path = Path {
pconns :: [Connection]
, pdest :: LocationName
} deriving (Show, Eq)
-- Old stuff, here for reference as to what the structures look
-- like. We now generate these structures by web scraping.
--
-- connections :: [Connection]
-- connections = [
-- Connection { origin="Balmora", destination="Vivec", ctype="Guild Guide" }
-- , Connection { origin="Balmora", destination="Sadrith Mora", ctype="Guild Guide" }
-- , Connection { origin="Sadrith Mora", destination="Vivec", ctype="Guild Guide" }
-- , Connection { origin="Sadrith Mora", destination="Balmora", ctype="Guild Guide" }
-- , Connection { origin="Sadrith Mora", destination="Tel Mora", ctype="Boat" }
-- , Connection { origin="Vivec", destination="Balmora", ctype="Guild Guide" }
-- ]
--
-- locations :: [LocationName]
-- locations = [
-- "Balmora"
-- , "Sadrith Mora"
-- , "Tel Mora"
-- , "Vivec"
-- ]
main :: IO ()
main = do
{-
As the Haste docs suggest, our entire program is one runApp
call.
Type details:
The first argument is an AppCfg , which is basically just the
server to connect to for websockets
The second argument is an App value, which means the do is in
the App monad. The primary interesting thing that the App
monad does is that any Exports (which are lines of
communication between the web side and the server side; they
are generated by "remote" and "onSessionEnd") (haste also calls
these method calls) that are monadically processed here will
become part of our app's structure, i.e. "remote" calls here
result in new lines of communication between the web and server
sides.
-}
runApp (mkConfig "vrici.lojban.org" 24601) $ do
-- Make an IORef on the server side to hold the results of the
-- web scraping.
--
-- Type details:
--
-- liftServerIO gives it the App monad type (specifically
-- "App (Server a)"), which is the monad we're in. We then
-- unpack it with <- , so remoteConns has type "Server a" or, to
-- be more precise, "Server (IORef [[String]])", because of the
-- type of the IORef.
remoteConns <- liftServerIO $ newIORef [["nothing"]]
-- Make an API call for the server-side function we use to get
-- the Connection list.
--
-- Type details:
--
-- getConns takes a "Server (IORef [[String]])" and returns
-- "Server [[String]]".
--
-- remote takes any "Remotable" type,
-- which is anything that can be serialized; see
-- haste-compiler/libraries/haste-lib/src/Haste/Binary.hs .
-- This includes the standard/basic types, which is why we're
-- doing it on a [[String]] instead of [Connection]. This
-- requires that we be able to convert between the two
-- representations, but that's easy and writing a Binary
-- instance for a custom type is not.
--
-- remote returns "App (Remote a)", and then we strip the App
-- off, leaving us with a "Remote a", and specifically a
-- "Remote (Server [[String]])", which is why clientMain wants
-- that. :)
remoteGetConns <- remote (getConns remoteConns)
-- Pass the API call in question to our client-side computation
runClient $ clientMain remoteGetConns
-- *********************************************************
-- CLIENT SIDE
-- *********************************************************
-- The client-side computation. Takes one API call's info. Returns
-- nothing; here only for side effects.
--
-- This is the bit that runs in the browser. With this particular
-- app, nothing runs on the server side unless the client side asks
-- for it.
clientMain :: Remote (Server [[String]]) -> Client ()
clientMain remoteGetConns = do
-- Run our one API call: make the connection list on the server
-- and return it as a [[String]] (which we use because then
-- we don't have to write an instance of Binary for Connection)
--
-- onServer simply takes a "Remote (Server a)" and runs it on
-- the server. It does this by making a websocket connection,
-- and then using the Haste.Binary serialization to pass data to
-- and from the server.
connectionStr <- onServer remoteGetConns
-- Turn the list of lists back into [Connection]
let connections = map listToConn connectionStr
-- Get all the destinations from the connections, and use it to
-- populate the location name dropdowns.
let locations = sort $ nub $ map destination connections
-- Get some off of our index.html (which is super simple)
maybeStart <- elemById "start"
maybeEnd <- elemById "end"
maybeResult <- elemById "result"
-- Generate the lists <option> tags of locations, for the
-- dropdowns we just grabbed.
startLocationElems <- makeLocationElems locations
endLocationElems <- makeLocationElems locations
case (maybeStart, maybeEnd, maybeResult) of
(Nothing, _, _) -> error "Start dropdown not found"
(_, Nothing, _) -> error "End dropdown not found"
(_, _, Nothing) -> error "Result element not found"
(Just start, Just end, Just result) -> do
addChildren start startLocationElems
addChildren end endLocationElems
handleSelection connections start end result
return ()
-- Saves us having to write a Binary instance for Connection
connToList :: Connection -> [String]
connToList conn = [
(origin conn), (destination conn), (ctype conn)
]
-- Saves us having to write a Binary instance for Connection
listToConn :: [String] -> Connection
listToConn [orig, dest, ctypeStr] = Connection { origin=orig, destination=dest, ctype=ctypeStr }
-- In many of the functions that follow, "Client" is a standin for
-- "IO". Specifically, the Haste.DOM functions all pretty much look
-- like:
--
-- (Functor m, MonadIO m) => a -> m Elem
--
-- for some "a", so these need to be IO-like. The Client monad is
-- a MonadIO, so leaving it in Client saves us some liftIO calls and
-- the like.
-- Turn a list of LocationName into a list of <option>...</option>
-- elements.
makeLocationElems :: [LocationName] -> Client [Elem]
makeLocationElems locations = mapM makeOption locations
-- Make an <option> tag.
makeOption :: String -> Client Elem
makeOption opt = do
optElem <- newElem "option"
set optElem [attr "value" =: opt]
inner <- newTextElem opt
appendChild optElem inner
return optElem
-- Add a list of elements as children to the given element.
addChildren :: Elem -> [Elem] -> Client ()
addChildren parent childs = sequence_ [appendChild parent c | c <- childs]
-- This is the part that actually drives processing: it sets up the
-- event callbacks for what happens when a user picks a dropdown
-- item.
handleSelection :: [Connection] -> Elem -> Elem -> Elem -> Client ()
handleSelection connections start end result = do
_ <- onEvent start Change $ handleFind connections start end result
_ <- onEvent end Change $ handleFind connections start end result
return ()
-- Once the user has picked start and end points, this function
-- takes them and kicks off the path computation
handleFind :: [Connection] -> Elem -> Elem -> Elem -> EventData BasicEvent -> Client ()
handleFind connections start end result _ = do
-- Blank out the result element
clearChildren result
-- Get the start and end elements
maybeStart <- getValue start
maybeEnd <- getValue end
case (maybeStart, maybeEnd) of
(Nothing, _) -> error "Couldn't get value for starting point."
(_, Nothing) -> error "Couldn't get value for ending point."
(Just startName, Just endName) -> do
-- Make the result be a series of <p> elements that are just
-- the "show" values of the list returned by findPath ;
-- findPath does all the work
pathPrints <- rowsHtmlShow $ pconns $ findPath connections startName endName []
addChildren result pathPrints
liftIO $ preventDefault
-- Make a <p> tag out of a string.
makeP :: String -> Client Elem
makeP string = do
p <- newElem "p"
text <- newTextElem string
setChildren p [text]
return p
-- Just turns a list into <p> tags.
rowsHtmlShow :: [Connection] -> Client [Elem]
rowsHtmlShow sconns = mapM (makeP . show) sconns
-- findPath is the meat of the data processing (as opposed to the
-- data scraping, which lives in makeConnections and friends).
--
-- Starting with a list of pasts that consists soley of the given
-- starting point, repeatedly adds any destinations that can be
-- reached from any path in the list of paths. Stops either when
-- our target destination is one of those that can now be reached,
-- or when no new destinations can be reached.
--
-- NB: the "trace" call here ends up in the javascript console,
-- which I think is pretty cool.
findPath :: [Connection] -> LocationName -> LocationName -> [Path] -> Path
findPath connections start end paths = trace (printf "In findPath: start: %s, end: %s, paths: %s" start end (show paths)) $
case maybePath of
Nothing ->
-- If we didn't add any new destinations, we've exhausted the
-- search tree; give up
if paths == newPaths then
error $ printf "No path to %s found!" end
else
findPath connections start end newPaths
Just path -> path
where maybePath = find (\x -> (pdest x) == end) paths
-- We inject the start path, even if we have other paths,
-- every time. This way we don't have to make any
-- decisions, and it's pretty cheap.
newPaths = reducePaths [] $ expandPaths connections $ Path { pdest=start, pconns=[Connection { origin=start, destination=start, ctype="None" }] } : paths
-- Starting with the empty list and a list of Paths, add the
-- destination of each Path to the list, and drop any element in the
-- Path list that is a path to a destination we've already seen.
--
-- In other words, de-dupe for Path lists, with a preference for the
-- shortest (== leftmost in the list; it's a side effect of the way we
-- add Paths) path.
reducePaths :: [LocationName] -> [Path] -> [Path]
reducePaths dests (path:paths) =
if (pdest path) `elem` dests then
[] ++ reducePaths dests paths
else
[path] ++ reducePaths (dests ++ [(pdest path)]) paths
reducePaths _ [] = []
-- Add all possible connections to the given paths. In other words,
-- for every path, go to everywhere we could go from the current end
-- point of that path.
expandPaths :: [Connection] -> [Path] -> [Path]
expandPaths connections paths =
paths ++ concatMap (addPaths connections) paths
-- Add all possible connections to the given path.
addPaths :: [Connection] -> Path -> [Path]
addPaths connections start =
map (extendMatchingPath start) connections
-- Add the given Connection to the given Path, i.e. make the Path
-- one step longer.
extendMatchingPath :: Path -> Connection -> Path
extendMatchingPath path conn =
if (pdest path) == (origin conn) then
Path { pconns=((pconns path) ++ [conn]), pdest=(destination conn) }
else
path
-- *********************************************************
-- SERVER SIDE
-- *********************************************************
-- This is an API call endpoint; that is, this function gets
-- "called" from the client/Haste side, but runs on the server/GHC
-- side, and the "call" is via binary serialization over websockets.
--
-- Freaking magic, in other words. :D
--
-- Server side function that checks an IORef and if it has boring
-- data (i.e. has never been populated for real), runs
-- makeConnections to get all the connection info and stuffs it into
-- said IORef. In either case, returns the connections.
--
-- The Server type here doesn't actually do anything except that
-- Haste won't run these, so they have to be run on the GHC side.
getConns :: Server (IORef [[String]]) -> Server [[String]]
getConns remoteConnsIORef = do
-- Pull our IORef out of the Server monad
remoteConnsRef <- remoteConnsIORef
-- And since all the IORef stuff is in IO, might as well just do a
-- whole pile of stuff in the IO monad. We then pull the result
-- out of the IO monad, so we should end up with a straight
-- [[String]]
ioConns <- liftIO $ do
remoteConns <- readIORef remoteConnsRef
if length remoteConns > 1
then do
-- Looks like real data
return remoteConns
else do
-- Do the connection generation. This static list of
-- locations is maybe not ideal, but if we start with this
-- list we're pretty much guaranteed to hit every reachable
-- location, even if this list isn't complete (which it
-- probably is).
conns <- makeConnections Set.empty Set.empty $ Set.fromList ["Ald'ruhn","Balmora","Ebonheart","Sadrith Mora","Vivec","Caldera","Gnisis","Maar Gan","Molag Mar","Pelagiad","Suran","Tel Mora","Ald Velothi","Dagon Fel","Gnaar Mok","Hla Oad","Khuul","Tel Aruhn","Tel Branora","Seyda Neen","Vos","Tel Fyr","Tel Vos","Buckmoth Legion Fort","Moonmoth Legion Fort","Wolverine Hall","Ahemmusa Camp","Erabenimsun Camp","Urshilaku Camp","Zainab Camp","Indarys Manor","Rethan Manor","Tel Uvirith"]
-- Stick the connections *as a [[String]]* rather than a
-- [Connection], into our IORef
_ <- writeIORef remoteConnsRef $ map connToList conns
-- Pull the data back out of our IORef
newConns <- readIORef remoteConnsRef
return newConns
-- Put the [[String]] back in the Server monad
return ioConns
-- Starts with a list of locations. Scrapes the Morrowind wiki (
-- https://www.uesp.net/wiki/ ) for all the travel/connection
-- information for each of those locations, and returns
-- IO [Connection]
--
-- This and its children are in IO because of the web scraping.
-- They could have been in Server I guess, but it was more
-- convenient this way due to the mix of this function with a bunch
-- of IORef stuff in getConns
--
-- The algorithm:
--
-- Repeatedly take the first to-do item, add all its connections,
-- stick it in done, stick anything that comes up into to-do if it
-- isn't already in done or to-do. When to-do is empty, return
-- connections.
makeConnections :: Set.Set Connection -> Set.Set LocationName -> Set.Set LocationName -> IO [Connection]
makeConnections connections doneLocs toDoLocs
| Set.null toDoLocs = do
_ <- rlpTraceM Crazy (printf "\nmakeConnections is done: %s\n" $ show $ Set.toList connections)
-- We're done, return the results
return $ Set.toList connections
| otherwise = do
_ <- rlpTraceM Crazy (printf "\nmakeConnections: %s -- %s -- %s -- %s\n" (show $ Set.toList connections) (show $ Set.toList doneLocs) (show $ Set.toList toDoLocs) (show $ Set.findMin toDoLocs))
-- Because these are sets they're unordered by default, so we
-- take the minimum item as our next to-do item.
let nextLoc = fixLocName $ Set.findMin toDoLocs
let toDoRemainder = Set.deleteMin toDoLocs
-- Turn our to-do item into a bunch of connections; this is
-- hwere the web scraping happens.
newConns <- pageToConnections nextLoc
_ <- rlpTraceM Crazy (printf "makeConnections: %s" (show newConns))
-- And we're done with that page.
let newDoneLocs = Set.insert nextLoc doneLocs
-- the todo list becomes everything left in the todo list
-- plus all the new destinations minus all the locations
-- we've done before
let newToDo = (Set.difference (Set.union toDoRemainder $ Set.fromList $ map fixLocName $ map destination newConns) newDoneLocs)
-- Recursive call to get more stuff done.
makeConnections
(Set.union connections $ Set.fromList newConns)
newDoneLocs
newToDo
-- Deal with some idiosyncracies of the web data; take location
-- names that the web might generate and turn them into things we
-- can actually lookup as URLs.
fixLocName :: LocationName -> LocationName
fixLocName "" = "Morrowind:Balmora"
fixLocName "Vivec" = "Morrowind:Vivec (city)"
fixLocName "Fort Darius" = "Morrowind:Gnisis"
fixLocName "Fort Pelagiad" = "Morrowind:Pelagiad"
fixLocName "Moonmoth Legion fort" = "Morrowind:Moonmoth Legion Fort"
fixLocName "Ald'Ruhn" = "Morrowind:Ald'ruhn"
fixLocName "Fort FrostmothBM" = "Bloodmoon:Fort Frostmoth"
fixLocName "Morrowind:Fort Frostmoth" = "Bloodmoon:Fort Frostmoth"
fixLocName "Morrowind:Raven Rock" = "Bloodmoon:Raven Rock"
fixLocName "Raven Rock" = "Bloodmoon:Raven Rock"
fixLocName lname = trace (printf "\nfixLocName: %s\n" $ show $ lname) $ if isInfixOf ":" lname then lname else "Morrowind:" ++ lname
-- Webformatting the URLs; turns out to be pretty easy this time.
fixPageForWeb :: LocationName -> String
fixPageForWeb pname =
-- This is never run on the Haste side.
--
-- The Haste side doesn't have missingh, which means no "replace",
-- hence this CPP #ifdef.
#ifdef __HASTE__
pname
#else
replace " " "_" pname
#endif
-- pageToConnections takes a location, scrapes the Morrowind wiki
-- page for that location, and returns all the connections between
-- that location and other locations.
--
-- Example of what we're parsing here:
--
-- <td style="text-align:left;"><b>Transport:</b><br />
-- <p><a href="/wiki/Morrowind:Almsivi_Intervention" title="Morrowind:Almsivi Intervention">Almsivi Intervention</a>:<br /></p>
-- <ul>
-- <li><a href="/wiki/Morrowind:Ald%27ruhn" title="Morrowind:Ald'ruhn">Ald'ruhn</a></li>
-- </ul>
-- <p><a href="/wiki/Morrowind:Divine_Intervention" title="Morrowind:Divine Intervention">Divine Intervention</a>:<br /></p>
-- <ul>
-- <li><a href="/wiki/Morrowind:Wolverine_Hall" title="Morrowind:Wolverine Hall">Wolverine Hall</a><br /></li>
-- </ul>
-- <p><a href="/wiki/Morrowind:Transport#Boat" title="Morrowind:Transport">Boat</a>:<br /></p>
-- <ul>
-- <li><a href="/wiki/Morrowind:Dagon_Fel" title="Morrowind:Dagon Fel">Dagon Fel</a></li>
-- <li><a href="/wiki/Morrowind:Sadrith_Mora" title="Morrowind:Sadrith Mora">Sadrith Mora</a></li>
-- <li><a href="/wiki/Morrowind:Tel_Aruhn" title="Morrowind:Tel Aruhn">Tel Aruhn</a></li>
-- <li><a href="/wiki/Morrowind:Vos" title="Morrowind:Vos">Vos</a></li>
-- </ul>
-- </td>
pageToConnections :: LocationName -> IO [Connection]
pageToConnections originPage = do
-- This is never run on the Haste side.
--
-- We CPP #ifdef the whole thing away on the Haste side because the
-- Haste side doesn't have any of the webscraping libraries we use,
-- so we just return something of the correct type.
#ifdef __HASTE__
return [ Connection { origin="Balmora", destination="Vivec", ctype="Guild Guide" } ]
#else
_ <- rlpTraceM Crazy (printf "\nopening url: %s\n" $ show $ fixPageForWeb originPage)
let openURL x = simpleHttp x
-- Get a tagsoup tags list from the page in question.
tags <- fmap (parseTags . Char8.unpack) $ openURL (printf "https://en.uesp.net/wiki/%s" $ fixPageForWeb originPage)
_ <- rlpTraceM Crazy (printf "In destToConn: tags: %s" (show tags))
-- Get a list of all the transports by finding all the <td>
-- elements and finding the one with the string "Transport:"
-- inside.
let transports1 = filter (\x -> Data.List.isInfixOf "Transport:" (innerText x)) $ partitions (~== "<td>") tags
-- Break the transports <td>'s contents up by <p> tags, which
-- means one transport type per partition.
let transports = if (length transports1) > 0 then
partitions (~== "<p>") $ head transports1
else []
return $ concat $ map transToConns transports
where
transToConns trans = map destToConn tDests
where
-- Get the transport's name
tName1 = partitions (~== "<a>") trans
tName = if (length tName1) > 0
then innerText $ takeWhile (not . isTagClose) $ head tName1
else "None"
-- Get the transport's destinations; basically this gets
-- all the <li> elements, takes their text values, and
-- makes sure they are formatted correctly.
tDests = filter (/= "") $ lines $ replace "'''" "" $ replace " (split)" "" $ replace "/" "\n" $ innerText $ concat $ partitions (~== "<li>") $ concat $ partitions (~== "<ul>") trans
destToConn dest = Connection { origin=(fixLocName originPage), destination=(fixLocName dest), ctype=tName }
-- trace (printf "In destToConn: %s, %s, %s" originPage dest tName) $
#endif