Skip to content

Commit

Permalink
Merge branch 'master' into html
Browse files Browse the repository at this point in the history
  • Loading branch information
nevrome authored Jan 14, 2025
2 parents 3479389 + eea7cf9 commit 7a5de9e
Show file tree
Hide file tree
Showing 14 changed files with 293 additions and 68 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
- V 1.6.1.0:
- Added a feature to list bibliography information via `trident list --bibliography`.
- Added a new Server API `/bibliography` to serve bibliography information via HTTP.
- V 1.6.0.0:
- Added support to write gzipped EIGENSTRAT and PLINK files with `genoconvert` and `forge`. Both commands get a new option `-z` which creates gzipped output.
- V 1.5.7.4:
Expand Down
5 changes: 2 additions & 3 deletions poseidon-hs.cabal
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name: poseidon-hs
version: 1.6.0.0
version: 1.6.1.0
synopsis: A package with tools for working with Poseidon genotype data
description: The tools in this package read and analyse Poseidon-formatted genotype databases, a modular system for storing genotype data from thousands of individuals.
license: MIT
Expand Down Expand Up @@ -29,12 +29,11 @@ library
build-depends: base >= 4.7 && < 5, sequence-formats>=1.6.1, text, time, pipes-safe,
exceptions, pipes, bytestring, filepath, yaml, aeson, directory, parsec,
vector, pipes-ordered-zip, table-layout<1.0.0.0, mtl, split, warp, warp-tls, wai-cors,
-- scotty is set to < 0.20.0, because the current stackage resolver 21.17 does not have the latest version, but cabal uses it in the release action, if this constraint is not set
scotty < 0.20.0, cassava, pureMD5, wai, githash,
http-conduit, conduit, http-types, zip-archive,
unordered-containers, network-uri, optparse-applicative, co-log, regex-tdfa,
scientific, country, generics-sop, containers, process, deepseq, template-haskell,
blaze-html, blaze-markup, neat-interpolation, file-embed
blaze-html, blaze-markup, neat-interpolation, file-embed, MissingH
default-language: Haskell2010

executable trident
Expand Down
30 changes: 27 additions & 3 deletions src/Poseidon/BibFile.hs
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
{-# LANGUAGE OverloadedStrings #-}
module Poseidon.BibFile (dummyBibEntry, readBibTeXFile, writeBibTeXFile, BibTeX, BibEntry(..), renderBibEntry) where
module Poseidon.BibFile (dummyBibEntry, readBibTeXFile, writeBibTeXFile, BibTeX, BibEntry(..), renderBibEntry, parseAuthors, authorAbbrvString) where

import Poseidon.Utils (PoseidonException (..),
showParsecErr)

import Control.Exception (throwIO)
import Control.Monad (forM_, liftM2, liftM3)
import Control.Monad (forM, forM_, liftM2,
liftM3)
import Control.Monad.Catch (MonadThrow, throwM)
import Data.List (intercalate)
import Data.List.Split (splitOn)
import Data.String.Utils (strip)
import System.IO (IOMode (..), hPutStrLn,
withFile)
import Text.Parsec (between, char, many, many1,
Expand Down Expand Up @@ -53,7 +58,7 @@ readBibTeXFile :: FilePath -> IO BibTeX
readBibTeXFile bibPath = do
res <- parseFromFile bibFileParser bibPath
case res of
Left err -> throwIO $ PoseidonBibTeXException bibPath $ showParsecErr err
Left err -> throwIO . PoseidonBibTeXException $ "In file " ++ bibPath ++ ": " ++ showParsecErr err
Right res_ -> return res_

{-
Expand Down Expand Up @@ -143,3 +148,22 @@ texBlock closeChar =
comma :: CharParser st String
comma = T.comma lexer

parseAuthors :: (MonadThrow m) => String -> m [(String, String)] -- parses a string of authors to a list of first names and last names
parseAuthors authorString = forM (splitOn " and " (intercalate " " . map strip . lines $ authorString)) $ \singleAuthorStr -> do
case splitOn ", " singleAuthorStr of
[lastName, firstName] -> return (firstName, lastName)
[firstName] -> return (firstName, "") -- in some cultures there are single first names, e.g. "Nini"
_ -> throwM . PoseidonBibTeXException $ "cannot parse bib-author " ++ singleAuthorStr

authorAbbrvString :: [(String, String)] -> String
authorAbbrvString [] = ""
authorAbbrvString [firstAuthor] = renderAuthor True firstAuthor
authorAbbrvString [firstAuthor, secondAuthor] = renderAuthor False firstAuthor ++ " and " ++ renderAuthor False secondAuthor
authorAbbrvString (firstAuthor : _) = renderAuthor False firstAuthor ++ " et al."

-- first argument is whether to render an author in full, or abbreviated.
renderAuthor :: Bool -> (String, String) -> String
renderAuthor True (first, "") = first -- there are edge cases where only first names exist, like in Consortia
renderAuthor True (first, family) = first ++ " " ++ family
renderAuthor False (first, "") = first
renderAuthor False (first, family) = [head first] ++ " " ++ family
77 changes: 68 additions & 9 deletions src/Poseidon/CLI/List.hs
Original file line number Diff line number Diff line change
Expand Up @@ -2,25 +2,27 @@

module Poseidon.CLI.List (runList, ListOptions(..), ListEntity(..), RepoLocationSpec(..)) where

import Poseidon.BibFile (authorAbbrvString, parseAuthors)
import Poseidon.EntityTypes (HasNameAndVersion (..))
import Poseidon.Package (PackageReadOptions (..),
defaultPackageReadOptions,
getAllGroupInfo,
getAllGroupInfo, getBibliographyInfo,
getExtendedIndividualInfo,
packagesToPackageInfos,
readPoseidonPackageCollection)
import Poseidon.ServerClient (AddJannoColSpec (..),
ApiReturnData (..),
import Poseidon.ServerClient (AddColSpec (..), ApiReturnData (..),
ArchiveEndpoint (..),
BibliographyInfo (..),
ExtendedIndividualInfo (..),
GroupInfo (..), PackageInfo (..),
processApiResponse, qDefault)
import Poseidon.Utils (PoseidonIO, logInfo, logWarning)

import Control.Monad (forM_, when)
import Control.Monad.IO.Class (liftIO)
import Data.List (intercalate, sortOn)
import Data.List (intercalate, nub, sortOn)
import Data.Maybe (catMaybes, fromMaybe)
import qualified Data.Text as T
import Data.Version (Version, showVersion)

import Text.Layout.Table (asciiRoundS, column, def, expandUntil,
Expand All @@ -39,7 +41,8 @@ data RepoLocationSpec = RepoLocal [FilePath] | RepoRemote ArchiveEndpoint
-- | A datatype to represent the options what to list
data ListEntity = ListPackages
| ListGroups
| ListIndividuals AddJannoColSpec
| ListIndividuals AddColSpec
| ListBibliography AddColSpec

-- | The main function running the list command
runList :: ListOptions -> PoseidonIO ()
Expand Down Expand Up @@ -95,9 +98,9 @@ runList (ListOptions repoLocation listEntity rawOutput onlyLatest) = do
RepoRemote (ArchiveEndpoint remoteURL archive) -> do
logInfo "Downloading individual data from server"
let addJannoColFlag = case addJannoColSpec of
AddJannoColAll -> "&additionalJannoColumns=ALL"
AddJannoColList [] -> ""
AddJannoColList moreJannoColumns -> "&additionalJannoColumns=" ++ intercalate "," moreJannoColumns
AddColAll -> "&additionalJannoColumns=ALL"
AddColList [] -> ""
AddColList moreJannoColumns -> "&additionalJannoColumns=" ++ intercalate "," moreJannoColumns
apiReturn <- processApiResponse (remoteURL ++ "/individuals" ++ qDefault archive ++ addJannoColFlag) False
case apiReturn of
ApiReturnExtIndividualInfo indInfo -> return indInfo
Expand All @@ -114,7 +117,7 @@ runList (ListOptions repoLocation listEntity rawOutput onlyLatest) = do
-- we only output this warning if the columns were requested explicitly. Not if
-- all columns were requested. We consider such an "all" request to mean "all columns that are present".
case addJannoColSpec of
AddJannoColList (_:_) -> do
AddColList (_:_) -> do
forM_ (zip [0..] addJannoCols) $ \(i, columnKey) -> do
-- check entries in all individuals for that key
let nonEmptyEntries = catMaybes [snd (entries !! i) | ExtendedIndividualInfo _ _ _ _ entries <- extIndInfos]
Expand All @@ -129,6 +132,57 @@ runList (ListOptions repoLocation listEntity rawOutput onlyLatest) = do
showMaybeVersion (getPacVersion i), show isLatest] ++
map (fromMaybe "n/a" . snd) addColumnEntries
return (tableH, tableB)
ListBibliography addColSpec -> do
bibInfos <- case repoLocation of
RepoRemote (ArchiveEndpoint remoteURL archive) -> do
logInfo "Downloading bibliography data from server"
let addJannoColFlag = case addColSpec of
AddColAll -> "&additionalBibColumns=ALL"
AddColList [] -> ""
AddColList moreBibFields -> "&additionalBibColumns=" ++ intercalate "," moreBibFields
apiReturn <- processApiResponse (remoteURL ++ "/bibliography" ++ qDefault archive ++ addJannoColFlag) False
case apiReturn of
ApiReturnBibInfo bibInfo -> return bibInfo
_ -> error "should not happen"
RepoLocal baseDirs -> do
pacCollection <- readPoseidonPackageCollection pacReadOpts baseDirs
getBibliographyInfo pacCollection addColSpec

let addBibFieldNames = case addColSpec of
AddColAll -> nub . concatMap (map fst . bibInfoAddCols) $ bibInfos
AddColList names -> names

-- warning in case the additional Columns do not exist in the entire janno dataset,
-- we only output this warning if the columns were requested explicitly. Not if
-- all columns were requested. We consider such an "all" request to mean "all columns that are present".
case addColSpec of
AddColList (_:_) -> do
forM_ addBibFieldNames $ \bibFieldKey -> do
-- check entries in all individuals for that key
let nonEmptyEntries = do
bibInfo <- bibInfos
Just (Just _) <- return $ bibFieldKey `lookup` bibInfoAddCols bibInfo
return ()
when (null nonEmptyEntries) . logWarning $
"Bibliography field " ++ bibFieldKey ++ "is not present in any bibliography entry"
_ -> return ()

let tableH = ["BibKey", "Title", "Author", "Year", "DOI",
"Nr of samples"] ++ addBibFieldNames
tableB = do
bibInfo <- bibInfos
let addBibFieldColumns = do
bibFieldName <- addBibFieldNames
case bibFieldName `lookup` bibInfoAddCols bibInfo of
Just (Just v) -> return v
_ -> return ""
authors <- parseAuthors . curateBibField . bibInfoAuthor $ bibInfo
return $ [bibInfoKey bibInfo, curateBibField $ bibInfoTitle bibInfo, authorAbbrvString authors,
curateBibField $ bibInfoYear bibInfo,
curateBibField $ bibInfoDoi bibInfo, show (bibInfoNrSamples bibInfo)] ++
addBibFieldColumns
return (tableH, tableB)

if rawOutput then
liftIO $ putStrLn $ intercalate "\n" [intercalate "\t" row | row <- tableH:tableB]
else do
Expand All @@ -139,3 +193,8 @@ runList (ListOptions repoLocation listEntity rawOutput onlyLatest) = do
showMaybe = fromMaybe "n/a"
showMaybeVersion :: Maybe Version -> String
showMaybeVersion = maybe "n/a" showVersion

-- this function is necessary, as BibTeX sometimes has arbitrary line breaks within fields,
-- which we need to get rid of to avoid down-stream problems
curateBibField :: Maybe String -> String
curateBibField = T.unpack . T.intercalate " " . map T.strip . T.lines . T.pack . fromMaybe ""
22 changes: 17 additions & 5 deletions src/Poseidon/CLI/OptparseApplicativeParsers.hs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import Poseidon.GenotypeData (GenoDataSource (..),
GenotypeDataSpec (..),
GenotypeFileSpec (..),
SNPSetSpec (..))
import Poseidon.ServerClient (AddJannoColSpec (..),
import Poseidon.ServerClient (AddColSpec (..),
ArchiveEndpoint (..))
import Poseidon.Utils (ErrorLength (..), LogMode (..),
TestMode (..),
Expand Down Expand Up @@ -636,7 +636,10 @@ parseMaybeSnpFile = OP.option (Just <$> OP.str) (
OP.value Nothing)

parseListEntity :: OP.Parser ListEntity
parseListEntity = parseListPackages <|> parseListGroups <|> (parseListIndividualsDummy *> parseListIndividualsExtraCols)
parseListEntity = parseListPackages <|>
parseListGroups <|>
(parseListIndividualsDummy *> parseListIndividualsExtraCols) <|>
(parseListBibliographyDummy *> parseListBibliographyExtraFields)
where
parseListPackages = OP.flag' ListPackages (
OP.long "packages" <>
Expand All @@ -648,14 +651,23 @@ parseListEntity = parseListPackages <|> parseListGroups <|> (parseListIndividual
parseListIndividualsDummy = OP.flag' () (
OP.long "individuals" <>
OP.help "List all individuals/samples.")
parseListIndividualsExtraCols = ListIndividuals <$> (parseAllJannoCols <|> (AddJannoColList <$> OP.many parseExtraCol))
parseAllJannoCols = OP.flag' AddJannoColAll (OP.long "fullJanno" <> OP.help "output all Janno Columns")
parseListIndividualsExtraCols = ListIndividuals <$> (parseAllJannoCols <|> (AddColList <$> OP.many parseExtraCol))
parseAllJannoCols = OP.flag' AddColAll (OP.long "fullJanno" <> OP.help "output all Janno Columns")
parseExtraCol = OP.strOption (
OP.short 'j' <>
OP.long "jannoColumn" <>
OP.metavar "COLNAME" <>
OP.help "List additional fields from the janno files, using the .janno column heading name, such as \
\\"Country\", \"Site\", \"Date_C14_Uncal_BP\", etc..")
\\"Country\", \"Site\", \"Date_C14_Uncal_BP\", etc... Can be given multiple times")
parseListBibliographyDummy = OP.flag' () (
OP.long "bibliography" <> OP.help "output bibliography information for packages")
parseListBibliographyExtraFields = ListBibliography <$>
(parseAllBibFields <|> (AddColList <$> OP.many parseExtraBibFields))
parseAllBibFields = OP.flag' AddColAll (OP.long "fullBib" <> OP.help "output all bibliography fields found in any bibliography item")
parseExtraBibFields = OP.strOption (
OP.short 'b' <> OP.long "bibField" <> OP.metavar "BIB-FIELD" <>
OP.help "List information from the given bibliography field, for example \"abstract\" or \"publisher\". Can \
\be given multiple times.")

parseRawOutput :: OP.Parser Bool
parseRawOutput = OP.switch (
Expand Down
22 changes: 18 additions & 4 deletions src/Poseidon/CLI/Serve.hs
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,13 @@ import Poseidon.Package (PackageReadOptions (..),
PoseidonPackage (..),
defaultPackageReadOptions,
getAllGroupInfo,
getBibliographyInfo,
getExtendedIndividualInfo,
getJannoRowsFromPac,
packagesToPackageInfos,
readPoseidonPackageCollection)
import Poseidon.PoseidonVersion (minimalRequiredClientVersion)
import Poseidon.ServerClient (AddJannoColSpec (..),
import Poseidon.ServerClient (AddColSpec (..),
ApiReturnData (..),
ServerApiReturnType (..))
import Poseidon.ServerHTML
Expand Down Expand Up @@ -145,14 +146,27 @@ runServer (ServeOptions archBaseDirs maybeZipPath port ignoreChecksums certFiles
pacs <- getItemFromArchiveStore archiveStore
maybeAdditionalColumnsString <- (Just <$> param "additionalJannoColumns") `rescue` (\_ -> return Nothing)
indInfo <- case maybeAdditionalColumnsString of
Just "ALL" -> getExtendedIndividualInfo pacs AddJannoColAll -- Nothing means all Janno Columns
Just "ALL" -> getExtendedIndividualInfo pacs AddColAll -- Nothing means all Janno Columns
Just additionalColumnsString ->
let additionalColumnNames = splitOn "," additionalColumnsString
in getExtendedIndividualInfo pacs (AddJannoColList additionalColumnNames)
Nothing -> getExtendedIndividualInfo pacs (AddJannoColList [])
in getExtendedIndividualInfo pacs (AddColList additionalColumnNames)
Nothing -> getExtendedIndividualInfo pacs (AddColList [])
let retData = ApiReturnExtIndividualInfo indInfo
return $ ServerApiReturnType [] (Just retData)

get "/bibliography" . conditionOnClientVersion $ do
logRequest logA
pacs <- getItemFromArchiveStore archiveStore
maybeAdditionalBibFieldsString <- (Just <$> param "additionalBibColumns") `rescue` (\_ -> return Nothing)
bibInfo <- case maybeAdditionalBibFieldsString of
Just "ALL" -> getBibliographyInfo pacs AddColAll -- Nothing means all Janno Columns
Just additionalBibFieldsString ->
let additionalBibFields = splitOn "," additionalBibFieldsString
in getBibliographyInfo pacs (AddColList additionalBibFields)
Nothing -> getBibliographyInfo pacs (AddColList [])
let retData = ApiReturnBibInfo bibInfo
return $ ServerApiReturnType [] (Just retData)

-- API for retreiving package zip files
when (isJust maybeZipPath) . get "/zip_file/:package_name" $ do
logRequest logA
Expand Down
Loading

0 comments on commit 7a5de9e

Please sign in to comment.