From 8c6ac8ee6c2f1d7cb31dfe98896605a1deb52245 Mon Sep 17 00:00:00 2001 From: jakoble <37188634+jakoble@users.noreply.github.com> Date: Mon, 28 Oct 2024 15:35:29 +0100 Subject: [PATCH] Add files via upload --- corpora/spoken-corpora/c-oral.json | 15 +++++++++++++++ corpora/spoken-corpora/perfil.json | 15 +++++++++++++++ corpora/spoken-corpora/spoken-dutch-corpus.json | 15 +++++++++++++++ 3 files changed, 45 insertions(+) create mode 100644 corpora/spoken-corpora/c-oral.json create mode 100644 corpora/spoken-corpora/perfil.json create mode 100644 corpora/spoken-corpora/spoken-dutch-corpus.json diff --git a/corpora/spoken-corpora/c-oral.json b/corpora/spoken-corpora/c-oral.json new file mode 100644 index 0000000..ba773e2 --- /dev/null +++ b/corpora/spoken-corpora/c-oral.json @@ -0,0 +1,15 @@ +{ + "Name": "C-ORAL-ROM_EXM", + "URL": "https://hdl.handle.net/21.11129/0000-000B-D4FF-7", + "Family": "Spoken corpora", + "Description": "This is a corpus of formal and informal speech.\nThe corpus is available from PORTULAN.", + "Language": ["por"], + "Licence": "The MIT licence", + "Size": ["300,000 words"], + "Annotation": ["Orthographically aligned", "Phonemically alligned", "PoS tagged"], + "Infrastructure": "CLARIN", + "Access": { + "Download": "https://hdl.handle.net/21.11129/0000-000B-D4FF-7" + }, + "Publication":"" +} \ No newline at end of file diff --git a/corpora/spoken-corpora/perfil.json b/corpora/spoken-corpora/perfil.json new file mode 100644 index 0000000..9826c7d --- /dev/null +++ b/corpora/spoken-corpora/perfil.json @@ -0,0 +1,15 @@ +{ + "Name": "Perfil Sociolinguístico da Fala Bracarense", + "URL": "https://hdl.handle.net/21.11129/0000-000D-F928-E", + "Family": "90 hours", + "Description": "The corpus is composed by 1 hour interviews with speakers of the same area (around Braga, Portugal).\nThe interviews are stratified according to gender, age and level of education; the transcriptions are aligned with .\nThe corpus is available from PORTULAN.", + "Language": ["por"], + "Licence": "CC BY-NC-ND", + "Size": ["90 hours"], + "Annotation": ["transcriptions aligned"], + "Infrastructure": "CLARIN", + "Access": { + "Download": "https://hdl.handle.net/21.11129/0000-000D-F928-E" + }, + "Publication":"" +} \ No newline at end of file diff --git a/corpora/spoken-corpora/spoken-dutch-corpus.json b/corpora/spoken-corpora/spoken-dutch-corpus.json new file mode 100644 index 0000000..128a3b3 --- /dev/null +++ b/corpora/spoken-corpora/spoken-dutch-corpus.json @@ -0,0 +1,15 @@ +{ + "Name": "Spoken Dutch Corpus", + "URL": "https://hdl.handle.net/10032/tm-a2-k6", + "Family": "Spoken corpora", + "Description": "This is a corpus of standard Dutch spoken in Flanders and the Netherlands.", + "Language": ["nld"], + "Licence": "", + "Size": ["900 hours"], + "Annotation": ["PoS-tagged", "syntactically parsed", "phonetically transcribed", "phonemically transcribed"], + "Infrastructure": "CLARIN", + "Access": { + "Download": "https://hdl.handle.net/10032/tm-a2-k6" + }, + "Publication":"" +} \ No newline at end of file