DTA::CAB::Format::CONLLU - Datum parser: CONLL-U format
use DTA::CAB::Format::CONLLU;
##========================================================================
## Constructors etc.
$fmt = DTA::CAB::Format::CONLLU->new(%args);
##========================================================================
## Methods: I/O: Input
\%head = blockScanHead(\$buf,$io,\%opts);
$fmt = $fmt->fromFh($filename_or_handle);
##========================================================================
## Methods: Output
$ext = $fmt->defaultExtension();
$fmt = $fmt->putToken($tok);
$fmt = $fmt->putSentence($sent);
$fmt = $fmt->putData($data);
##========================================================================
## Methods: Low-Level
$str = unescapeConllu($str);
$str = escapeConllu($str);
DTA::CAB::Format::CONLLU is a CAB datum parser+formatter conforming to the CONLL-U format conventions; see https://universaldependencies.org/format.html for details.
ID: Word index, integer starting at 1 for each new sentence;
FORM: Word form or punctuation symbol.
LEMMA: Lemma or stem of word form.
UPOS: Universal part-of-speech tag.
XPOS: Language-specific part-of-speech tag; underscore if not available.
FEATS: List of morphological features from the universal feature inventory or underscore if not available.
HEAD: Head of the current word, which is either a value of ID or zero (0).
DEPREL: Universal dependency relation to the HEAD (root iff HEAD = 0) or a defined language-specific subtype of one.
DEPS: Enhanced dependency graph in the form of a list of head-deprel pairs.
MISC: Any other annotation, split by '|'
MISC
fieldBy the CONLL-U conventions, the final token field MISC
is separated by vertical bars (MISC ::= "MISC1|...|MISCn"
). This module treats MISC$i
elements of the form ATTR=VALUE
specially for the following ATTR
s:
id=TOKID # sets $tok->{id}
loc=OFFSET LENGTH # sets $tok->{loc}
xlit=XTEXT # sets $tok->{xlit}{latin1Text}; also honors CONLL-U "Translit=XTEXT"
norm=NORM # sets $tok->{moot}{word}
details=DETAILS # sets $tok->{moot}{details}{details}
json=JSON # clobbers %$tok with JSON a la Format::TJ
VALUE
s of specially handled attributes containing literal %
or |
should have these 2 characters (and only these 2 characters) URI-escaped (to %25
, and %7C
respectively).
DTA::CAB::Format::CONLLU inherits from DTA::CAB::Format::TJ.
Global tag translation table from language-specific PoS-tagset to UD PoS-tagset (XPOS > UPOS
) used for output. Keys are language-specific tagsets, values are HASH- or CODE-refs for tagset translation.
%XPOS2UPOS => ($tagset =E<gt> $CODE_OR_HASHREF, ...)
$upos = $XPOS2UPOS{$tagset}->{$xpos}; ##-- HASH-ref
$upos = $XPOS2UPOS{$tagset}->($xpos); ##-- CODE-ref
$fmt = CLASS_OR_OBJ->new(%args);
object structure: assumed HASH
{
##-- Input
doc => $doc, ##-- INHERITED: buffered input document
cuMiscIn => $bool, ##-- NEW: parse special MISC attrs (default=true)
##-- Output
outbuf => $stringBuffer, ##-- INHERITED: buffered output
level => $formatLevel, ##-- OVERRIDE: <0:omit-misc ; 0:default:include-misc,exclude-json, >=1:include-json, >=2:canonical-json
tagset => $tagset, ##-- auto-convert XPOS->UPOS for $tagset (known values: 'stts' (default))
##-- Common (INHERITED from Format::TT)
raw => $bool, ##-- INHERITED: attempt to load/save raw data
fh => $fh, ##-- INHERITED: IO::Handle for read/write
utf8 => $bool, ##-- INHERITED: read/write utf8?
tloc => $attr, ##-- INHERITED: if non-empty, parseTokenizerString() sets $w->{$attr}="$off $len"; default=0
#defaultFieldName => $name, ##-- INHERITED: default name for unnamed misc-fields; parsed into @{$tok->{other}{$name}}; default=''
}
\%head = blockScanHead(\$buf,$io,\%opts);
gets header offset, length from (mmaped) \$buf. %opts are as for blockScan(). OVERRIDE scans for CONLL-U "# newdoc"
comment.
$fmt = $fmt->fromFh($filename_or_handle);
new override calls DTA::CAB::Format::fromFh().
guts for fromFh() method: parse handle $fh into local document buffer.
$ext = $fmt->defaultExtension();
returns default filename extension for this format (.conllu
).
$fmt = $fmt->putToken($tok);
$fmt = $fmt->putToken($tok,$conllu_id);
honors $fmt->{level} : <0:omit-misc ; 0:default:include-misc,exclude-json, >=1:include-json, >=2:canonical-json
$fmt = $fmt->putSentence($sent);
concatenates formatted tokens, adding sentence-id comment if available
concatenates formatted sentences, adding document # $TJ:DOC
comment comment if appropriate.
$fmt = $fmt->putData($data);
puts raw data (uses forceDocument()); OVERRIDE uses DTA::CAB::Format::TT implementation.
$str = unescapeConllu($str);
un-escapes CONLLU value strings using URI-escape sequences ('%7C' => '|'
, '%25'=>'%'
)
$str = escapeConllu($str);
escapes CONLLU value strings using URI-escape sequences ('|'=>'%7C'
, '%'=>'%25'
)
An example file in the format accepted/generated by this module with the default options (level => 0, tagset => 'stts'
) is:
# sent_id = s1
1 EJn eine DET ART _ _ _ _ Translit=Ejn|norm=Ein|details=eine[_ARTINDEF][sg][acc][neut] <2.5>
2 zamer zahm ADJ ADJA _ _ _ _ Translit=zamer|norm=zahmer|details=zahm[_ADJA][none][pos][pl][gen]\*[strong] <0>
3 Elephant Elefant NOUN NN _ _ _ _ Translit=Elephant|norm=Elefant|details=Elefant[_NN][k_l_t][masc][sg][nom] <0>
4 gillt gelten VERB VVFIN _ _ _ _ Translit=gillt|norm=gilt|details=gelt~en[_VVFIN][third][sg][pres][ind] <0>
5 ohngefähr ohngefähr ADV ADV _ _ _ _ Translit=ohngefähr|norm=ohngefähr|details=ohngefähr[_ADV] <0>
6 zweyhundert zweihundert NUM CARD _ _ _ _ Translit=zweyhundert|norm=zweihundert|details=zwei/Z#hundert[_CARD][num ] <0>
7 Thaler Taler NOUN NN _ _ _ _ Translit=Thaler|norm=Taler|details=Taler[_NN][k_g_artef][masc][pl][nom_acc_gen] <0>
8 . . PUNCT $. _ _ _ _ Translit=.|norm=.|details=$. <0>
# sent_id = s2
1 Ceterum ceterum X FM.la _ _ _ _ Translit=Ceterum|norm=Ceterum|details=* <0>
2 censeo censeo X FM.la _ _ _ _ Translit=censeo|norm=censeo|details=* <0>
3 Carthaginem carthaginem X FM.la _ _ _ _ Translit=Carthaginem|norm=Carthaginem|details=* <0>
4 esse esse X FM.la _ _ _ _ Translit=esse|norm=esse|details=* <0>
5 delendam delendam X FM.la _ _ _ _ Translit=delendam|norm=delendam|details=* <0>
6 . . PUNCT $. _ _ _ _ Translit=.|norm=.|details=$. <0>
An example file in the terse format generated by this module with the options (level => -1, tagset => 'none'
) is:
# sent_id = s1
1 EJn eine _ ART _ _ _ _ _
2 zamer zahm _ ADJA _ _ _ _ _
3 Elephant Elefant _ NN _ _ _ _ _
4 gillt gelten _ VVFIN _ _ _ _ _
5 ohngefähr ohngefähr _ ADV _ _ _ _ _
6 zweyhundert zweihundert _ CARD _ _ _ _ _
7 Thaler Taler _ NN _ _ _ _ _
8 . . _ $. _ _ _ _ _
# sent_id = s2
1 Ceterum ceterum _ FM.la _ _ _ _ _
2 censeo censeo _ FM.la _ _ _ _ _
3 Carthaginem carthaginem _ FM.la _ _ _ _ _
4 esse esse _ FM.la _ _ _ _ _
5 delendam delendam _ FM.la _ _ _ _ _
6 . . _ $. _ _ _ _ _
An example file in the verbose format generated by this module with the options (level => 2, tagset => 'stts'
) including a full TJ
-style dump in the json
attribute of the MISC
field is:
# sent_id = s1
# $TJ:SENT={"lang":"de"}
1 EJn eine DET ART _ _ _ _ Translit=Ejn|norm=Ein|details=eine[_ARTINDEF][sg][acc][neut] <2.5>|json={"dmoot":{"analyses":[{"details":"Ein","prob":0,"tag":"Ein"}],"morph":[{"hi":"ein~en[_VVIMP][sg]","w":2},{"hi":"eine[_ARTINDEF][sg][nom][masc]","w":2.5},{"hi":"eine[_ARTINDEF][sg][nom][neut]","w":2.5},{"hi":"eine[_ARTINDEF][sg][acc][neut]","w":2.5},{"hi":"ein[_ADV]","w":2.5},{"hi":"ein[_CARD][num]","w":2.5},{"hi":"ein[_PTKVZ]","w":2.5}],"tag":"Ein"},"errid":"72751","exlex":"Ein","f":407,"lts":[{"hi":"\\?ejn","w":0}],"moot":{"analyses":[{"details":"ein[_ADV]","lemma":"ein","prob":2.5,"tag":"ADV"},{"details":"ein[_CARD][num]","lemma":"ein","prob":2.5,"tag":"CARD"},{"details":"ein[_PTKVZ]","lemma":"ein","prob":2.5,"tag":"PTKVZ"},{"details":"eine[_ARTINDEF][sg][acc][neut]","lemma":"eine","prob":2.5,"tag":"ART"},{"details":"eine[_ARTINDEF][sg][nom][masc]","lemma":"eine","prob":2.5,"tag":"ART"},{"details":"eine[_ARTINDEF][sg][nom][neut]","lemma":"eine","prob":2.5,"tag":"ART"},{"details":"ein~en[_VVIMP][sg]","lemma":"einen","prob":2,"tag":"VVIMP"}],"details":{"details":"eine[_ARTINDEF][sg][acc][neut]","lemma":"eine","prob":2.5,"tag":"ART"},"lemma":"eine","tag":"ART","word":"Ein"},"msafe":0,"rw":[],"text":"EJn","xlit":{"isLatin1":1,"isLatinExt":1,"latin1Text":"Ejn"}}
2 zamer zahm ADJ ADJA _ _ _ _ Translit=zamer|norm=zahmer|details=zahm[_ADJA][none][pos][pl][gen]\*[strong] <0>|json={"dmoot":{"analyses":[{"details":"zahmer","prob":0.129596281051636,"tag":"zahmer"},{"details":"zamer","prob":1.248,"tag":"zamer"}],"morph":[{"hi":"zahm[_ADJA][none][pos][sg][nom][masc][strong_mixed]","w":0},{"hi":"zahm[_ADJA][none][pos][sg][dat_gen][fem][strong]","w":0},{"hi":"zahm[_ADJA][none][pos][pl][gen]\\*[strong]","w":0},{"hi":"zahm[_ADJC][none][comp]","w":0}],"tag":"zahmer"},"eqphox":[{"hi":"zahmer","w":0.237610012292862}],"f":1,"lts":[{"hi":"tsame6","w":0}],"moot":{"analyses":[{"details":"zahm[_ADJA][none][pos][pl][gen]\\*[strong]","lemma":"zahm","prob":0,"tag":"ADJA"},{"details":"zahm[_ADJA][none][pos][sg][dat_gen][fem][strong]","lemma":"zahm","prob":0,"tag":"ADJA"},{"details":"zahm[_ADJA][none][pos][sg][nom][masc][strong_mixed]","lemma":"zahm","prob":0,"tag":"ADJA"},{"details":"zahm[_ADJC][none][comp]","lemma":"zahm","prob":0,"tag":"ADJD"}],"details":{"details":"zahm[_ADJA][none][pos][pl][gen]\\*[strong]","lemma":"zahm","prob":0,"tag":"ADJA"},"lemma":"zahm","tag":"ADJA","word":"zahmer"},"msafe":0,"rw":[{"hi":"zahmer","w":15.7981405258179}],"text":"zamer","xlit":{"isLatin1":1,"isLatinExt":1,"latin1Text":"zamer"}}
3 Elephant Elefant NOUN NN _ _ _ _ Translit=Elephant|norm=Elefant|details=Elefant[_NN][k_l_t][masc][sg][nom] <0>|json={"dmoot":{"analyses":[{"details":"Elefant","prob":0,"tag":"Elefant"}],"morph":[{"hi":"Elefant[_NN][k_l_t][masc][sg][nom]","w":0}],"tag":"Elefant"},"errid":"84974","exlex":"Elefant","f":303,"lang":["de"],"lts":[{"hi":"\\?elefant","w":0}],"moot":{"analyses":[{"details":"Elefant[_NN][k_l_t][masc][sg][nom]","lemma":"Elefant","prob":0,"tag":"NN"}],"details":{"details":"Elefant[_NN][k_l_t][masc][sg][nom]","lemma":"Elefant","prob":0,"tag":"NN"},"lemma":"Elefant","tag":"NN","word":"Elefant"},"morph":[{"hi":"Elephant[_NN][k_l_t][masc][sg][nom]","w":0},{"hi":"elephant[_FM][en]","w":2.5}],"msafe":1,"rw":[],"text":"Elephant","xlit":{"isLatin1":1,"isLatinExt":1,"latin1Text":"Elephant"}}
4 gillt gelten VERB VVFIN _ _ _ _ Translit=gillt|norm=gilt|details=gelt~en[_VVFIN][third][sg][pres][ind] <0>|json={"dmoot":{"analyses":[{"details":"gilt","prob":0.135864566802979,"tag":"gilt"},{"details":"gillt","prob":1.248,"tag":"gillt"},{"details":"Gild","prob":1.35002433472872,"tag":"Gild"}],"morph":[{"hi":"gelt~en[_VVFIN][third][sg][pres][ind]","w":0},{"hi":"gelt~en[_VVIMP][sg]","w":0}],"tag":"gilt"},"eqphox":[{"hi":"gilt","w":0.0521488003432751},{"hi":"Gild","w":0.298937886953354}],"f":5,"lts":[{"hi":"gilt","w":0}],"moot":{"analyses":[{"details":"gelt~en[_VVFIN][third][sg][pres][ind]","lemma":"gelten","prob":0,"tag":"VVFIN"},{"details":"gelt~en[_VVIMP][sg]","lemma":"gelten","prob":0,"tag":"VVIMP"}],"details":{"details":"gelt~en[_VVFIN][third][sg][pres][ind]","lemma":"gelten","prob":0,"tag":"VVFIN"},"lemma":"gelten","tag":"VVFIN","word":"gilt"},"msafe":0,"rw":[{"hi":"gilt","w":18.9322834014893}],"text":"gillt","xlit":{"isLatin1":1,"isLatinExt":1,"latin1Text":"gillt"}}
5 ohngefähr ohngefähr ADV ADV _ _ _ _ Translit=ohngefähr|norm=ohngefähr|details=ohngefähr[_ADV] <0>|json={"dmoot":{"analyses":[{"details":"ohngefähr","prob":0,"tag":"ohngefähr"}],"morph":[{"hi":"ohngefähr[_ADV]","w":0}],"tag":"ohngefähr"},"lang":["de"],"lts":[{"hi":"\\?oNefe6","w":0}],"moot":{"analyses":[{"details":"ohngefähr[_ADV]","lemma":"ohngefähr","prob":0,"tag":"ADV"}],"details":{"details":"ohngefähr[_ADV]","lemma":"ohngefähr","prob":0,"tag":"ADV"},"lemma":"ohngefähr","tag":"ADV","word":"ohngefähr"},"morph":[{"hi":"ohngefähr[_ADV]","w":0}],"msafe":1,"text":"ohngefähr","xlit":{"isLatin1":1,"isLatinExt":1,"latin1Text":"ohngefähr"}}
6 zweyhundert zweihundert NUM CARD _ _ _ _ Translit=zweyhundert|norm=zweihundert|details=zwei/Z#hundert[_CARD][num] <0>|json={"dmoot":{"analyses":[{"details":"zweihundert","prob":0,"tag":"zweihundert"}],"morph":[{"hi":"zwei/Z#hundert[_CARD][num]","w":0}],"tag":"zweihundert"},"errid":"ec","exlex":"zweihundert","f":397,"lts":[{"hi":"tsvaihunde6t","w":0}],"moot":{"analyses":[{"details":"zwei/Z#hundert[_CARD][num]","lemma":"zweihundert","prob":0,"tag":"CARD"}],"details":{"details":"zwei/Z#hundert[_CARD][num]","lemma":"zweihundert","prob":0,"tag":"CARD"},"lemma":"zweihundert","tag":"CARD","word":"zweihundert"},"msafe":0,"rw":[],"text":"zweyhundert","xlit":{"isLatin1":1,"isLatinExt":1,"latin1Text":"zweyhundert"}}
7 Thaler Taler NOUN NN _ _ _ _ Translit=Thaler|norm=Taler|details=Taler[_NN][k_g_artef][masc][pl][nom_acc_gen] <0>|json={"dmoot":{"analyses":[{"details":"Taler","prob":0,"tag":"Taler"}],"morph":[{"hi":"Taler[_NN][k_g_artef][masc][sg][nom_acc_dat]","w":0},{"hi":"Taler[_NN][k_g_artef][masc][pl][nom_acc_gen]","w":0}],"tag":"Taler"},"errid":"57836","exlex":"Taler","f":4078,"lts":[{"hi":"tale6","w":0}],"moot":{"analyses":[{"details":"Taler[_NN][k_g_artef][masc][pl][nom_acc_gen]","lemma":"Taler","prob":0,"tag":"NN"},{"details":"Taler[_NN][k_g_artef][masc][sg][nom_acc_dat]","lemma":"taler","prob":0,"tag":"NN"}],"details":{"details":"Taler[_NN][k_g_artef][masc][pl][nom_acc_gen]","lemma":"Taler","prob":0,"tag":"NN"},"lemma":"Taler","tag":"NN","word":"Taler"},"morph":[{"hi":"Thaler[_NE][lastname][none][k_l_h_m_namti_fam][sg][nom_acc_dat]","w":0},{"hi":"Thale/GN~er[_NN][k_l_h_m_eig_sozk_bev_geo][masc][sg][nom_acc_dat]","w":5},{"hi":"Thale/GN~er[_NN][k_l_h_m_eig_sozk_bev_geo][masc][pl][nom_acc_gen]","w":5},{"hi":"Thal/GN~er[_NN][k_l_h_m_eig_sozk_bev_geo][masc][sg][nom_acc_dat]","w":5},{"hi":"Thal/GN~er[_NN][k_l_h_m_eig_sozk_bev_geo][masc][pl][nom_acc_gen]","w":5}],"msafe":0,"rw":[],"text":"Thaler","xlit":{"isLatin1":1,"isLatinExt":1,"latin1Text":"Thaler"}}
8 . . PUNCT $. _ _ _ _ Translit=.|norm=.|details=$. <0>|json={"dmoot":{"analyses":[{"details":".","prob":0,"tag":"."}],"morph":[{"hi":"$.","w":0}],"tag":"."},"errid":"ec","exlex":".","f":5318438,"lts":[{"hi":"","w":0}],"moot":{"analyses":[{"details":"$.","lemma":".","prob":0,"tag":"$."}],"details":{"details":"$.","lemma":".","prob":0,"tag":"$."},"lemma":".","tag":"$.","word":"."},"msafe":1,"text":".","toka":["$."],"tokpp":["$."],"xlit":{"isLatin1":1,"isLatinExt":1,"latin1Text":"."}}
# sent_id = s2
# $TJ:SENT={"lang":"la"}
1 Ceterum ceterum X FM.la _ _ _ _ Translit=Ceterum|norm=Ceterum|details=* <0>|json={"dmoot":{"analyses":[{"details":"Ceterum","prob":0,"tag":"Ceterum"}],"morph":[{"hi":"[_FM][lat]","w":0}],"tag":"Ceterum"},"f":11,"lang":["la"],"lts":[{"hi":"kete6um","w":0}],"mlatin":[{"hi":"[_FM][lat]","w":0}],"moot":{"analyses":[{"details":"[_FM][lat]","lemma":"ceterum","prob":0,"tag":"FM"}],"details":{"details":"*","lemma":"ceterum","prob":0,"tag":"FM.la"},"lemma":"ceterum","tag":"FM.la","word":"Ceterum"},"msafe":1,"text":"Ceterum","xlit":{"isLatin1":1,"isLatinExt":1,"latin1Text":"Ceterum"}}
2 censeo censeo X FM.la _ _ _ _ Translit=censeo|norm=censeo|details=* <0>|json={"dmoot":{"analyses":[{"details":"censeo","prob":0,"tag":"censeo"}],"morph":[{"hi":"[_FM][lat]","w":0}],"tag":"censeo"},"f":9,"lang":["la"],"lts":[{"hi":"kenzeo","w":0}],"mlatin":[{"hi":"[_FM][lat]","w":0}],"moot":{"analyses":[{"details":"[_FM][lat]","lemma":"censeo","prob":0,"tag":"FM"}],"details":{"details":"*","lemma":"censeo","prob":0,"tag":"FM.la"},"lemma":"censeo","tag":"FM.la","word":"censeo"},"msafe":1,"text":"censeo","xlit":{"isLatin1":1,"isLatinExt":1,"latin1Text":"censeo"}}
3 Carthaginem carthaginem X FM.la _ _ _ _ Translit=Carthaginem|norm=Carthaginem|details=* <0>|json={"dmoot":{"analyses":[{"details":"Carthaginem","prob":0,"tag":"Carthaginem"}],"morph":[{"hi":"[_FM][lat]","w":0}],"tag":"Carthaginem"},"f":6,"lang":["la"],"lts":[{"hi":"ka6taginem","w":0}],"mlatin":[{"hi":"[_FM][lat]","w":0}],"moot":{"analyses":[{"details":"[_FM][lat]","lemma":"carthaginem","prob":0,"tag":"FM"}],"details":{"details":"*","lemma":"carthaginem","prob":0,"tag":"FM.la"},"lemma":"carthaginem","tag":"FM.la","word":"Carthaginem"},"msafe":1,"text":"Carthaginem","xlit":{"isLatin1":1,"isLatinExt":1,"latin1Text":"Carthaginem"}}
4 esse esse X FM.la _ _ _ _ Translit=esse|norm=esse|details=* <0>|json={"dmoot":{"analyses":[{"details":"esse","prob":0,"tag":"esse"}],"morph":[{"hi":"ess~en[_VVFIN][first][sg][pres][ind]","w":0},{"hi":"ess~en[_VVFIN][first][sg][pres][subjI]","w":0},{"hi":"ess~en[_VVFIN][third][sg][pres][subjI]","w":0},{"hi":"[_FM][lat]","w":0}],"tag":"esse"},"errid":"71075","exlex":"esse","f":1046,"lang":["de","la"],"lts":[{"hi":"\\?ese","w":0}],"mlatin":[{"hi":"[_FM][lat]","w":0}],"moot":{"analyses":[{"details":"[_FM][lat]","lemma":"esse","prob":0,"tag":"FM"},{"details":"ess~en[_VVFIN][first][sg][pres][ind]","lemma":"essen","prob":0,"tag":"VVFIN"},{"details":"ess~en[_VVFIN][first][sg][pres][subjI]","lemma":"essen","prob":0,"tag":"VVFIN"},{"details":"ess~en[_VVFIN][third][sg][pres][subjI]","lemma":"essen","prob":0,"tag":"VVFIN"}],"details":{"details":"*","lemma":"esse","prob":0,"tag":"FM.la"},"lemma":"esse","tag":"FM.la","word":"esse"},"morph":[{"hi":"ess~en[_VVFIN][first][sg][pres][ind]","w":0},{"hi":"ess~en[_VVFIN][first][sg][pres][subjI]","w":0},{"hi":"ess~en[_VVFIN][third][sg][pres][subjI]","w":0}],"msafe":1,"text":"esse","xlit":{"isLatin1":1,"isLatinExt":1,"latin1Text":"esse"}}
5 delendam delendam X FM.la _ _ _ _ Translit=delendam|norm=delendam|details=* <0>|json={"dmoot":{"analyses":[{"details":"delendam","prob":0,"tag":"delendam"}],"morph":[{"hi":"[_FM][lat]","w":0}],"tag":"delendam"},"f":2,"lang":["la"],"lts":[{"hi":"delendam","w":0}],"mlatin":[{"hi":"[_FM][lat]","w":0}],"moot":{"analyses":[{"details":"[_FM][lat]","lemma":"delendam","prob":0,"tag":"FM"}],"details":{"details":"*","lemma":"delendam","prob":0,"tag":"FM.la"},"lemma":"delendam","tag":"FM.la","word":"delendam"},"msafe":1,"text":"delendam","xlit":{"isLatin1":1,"isLatinExt":1,"latin1Text":"delendam"}}
6 . . PUNCT $. _ _ _ _ Translit=.|norm=.|details=$. <0>|json={"dmoot":{"analyses":[{"details":".","prob":0,"tag":"."}],"morph":[{"hi":"$.","w":0}],"tag":"."},"errid":"ec","exlex":".","f":5318438,"lts":[{"hi":"","w":0}],"moot":{"analyses":[{"details":"$.","lemma":".","prob":0,"tag":"$."}],"details":{"details":"$.","lemma":".","prob":0,"tag":"$."},"lemma":".","tag":"$.","word":"."},"msafe":1,"text":".","toka":["$."],"tokpp":["$."],"xlit":{"isLatin1":1,"isLatinExt":1,"latin1Text":"."}}
Bryan Jurish <jurish@bbaw.de>
Copyright (C) 2020 by Bryan Jurish
This package is free software; you can redistribute it and/or modify it under the same terms as Perl itself, either Perl version 5.20.2 or, at your option, any later version of Perl 5 you may have available.
Hey! The above document had some coding errors, which are explained below:
Non-ASCII character seen before =encoding in 'ohngefähr'. Assuming UTF-8