Classes
class	AssocVector
	LISP-style assoc list using vector<>: map-like class with small memory footprint. Useful for small associative arrays. Lookup and insert are linear time. More...

struct	AssocVectorNode
	template class for individual AssocVector nodes More...

class	cmdutil_file_churner
	Class for churning through many input files, given either directly or as a list-file. More...

class	GenericLexer
	Abstract base class for Flex++ lexers. More...

class	mootClassfreqs
	Class for storage and retrieval of raw lexical-class frequencies. More...

class	mootClassfreqsCompiler
	Lexical-class frequency parameter-file compiler. More...

class	mootDynHMM
	abstract HMM subclass for use with dynamic lexical probabilities. More...

class	mootDynHMMOptions
	Generic user-level options structure for built-in mootDynHMM subclasses. More...

class	mootDynLexHMM
	mootDynHMM subclass for dynamic lexical probabilities More...

class	mootDynLexHMM_Boltzmann
	mootDynHMM subclass using a Maxwell-Boltzmann distribution to estimate f(w,t) More...

class	mootEval
	Tagger-evaluation utility class. More...

class	mootExpatParser
	C++ Wrapper for expat XML parsers. More...

class	mootHMM
	1st-order Hidden Markov Model Tagger/Disambiguator class. More...

class	mootHMMTrainer
	High-level class to gather training data for a mootHMM or mootCHMM. More...

class	mootLexfreqs
	Class for storage and retrieval of raw lexical frequencies. More...

class	mootLexfreqsCompiler
	Lexical frequency parameter-file compiler. More...

class	mootMIParser
	HMM subclass for MI parsing. More...

class	mootModelSpec

class	mootNgrams
	Class for storage & retrieval of raw N-Gram frequencies. More...

class	mootNgramsCompiler
	N-gram parameter-file compiler. More...

class	mootRecoder
	Interface to librecode character-conversion routines. More...

class	mootTaster
	High-level heuristic token classifier . More...

class	mootToken
	High-level token information object. More...

class	mootXMLRecoder
	Special 2-phase recoder object for XML text. More...

class	SuffixTrie
	Top-level class for suffix tries. More...

class	TokenBuffer
	Class for in-memory token buffers using mootSentence. More...

class	TokenIO
	Abstract class for token I/O. More...

class	TokenReader
	Abstract class for token input. More...

class	TokenReaderExpat
	Experimental XML reader class using expat. More...

class	TokenReaderNative
	Class for native "cooked" text-format token input. More...

class	TokenWriter
	Abstract class for token output. More...

class	TokenWriterExpat
	Experimental XML writer class for use with expat-parsed XML or vanilla input. More...

class	TokenWriterNative
	Class for native "cooked" text-format token output. More...

class	TrieVector
	Top-level trie class-template using an adjaceny table. More...

class	TrieVectorBase
	Base class for TrieVector. More...

struct	TrieVectorNode

struct	TrieVectorNodeBase

class	wasteAnnotator
	Sentence-functional annotations for tokens usually not covered by standard morphological analysis. More...

class	wasteAnnotatorWriter
	TokenWriter wrapper for wasteAnnotator. More...

class	wasteDecoder
	waste decoder component converts hidden tag attributes 's','S','w' to sentence- and token-boundaries More...

class	wasteLexer
	Mid-level scanner stage performs (optional) hyphenation normalization and text classification. More...

class	wasteLexerReader
	Mid-level scanner stage, wraps moot::wasteLexer in moot::TokenReader API. More...

class	wasteLexerToken

class	wasteLexicon
	simple hash_set<>-based lexicon class More...

class	wasteScanner
	Low-level class for raw text scanning. More...

class	wasteTokenScanner
	Raw text scanner class returning mootToken; wraps wasteScanner. More...

class	wasteTrainWriter
	TokenWriter wrapper class for writing WASTE tokenizer 'well-done' training data from pre-tokenized input with leading whitespace. More...

Typedefs
typedef string	mootFlavorStr

typedef UInt	mootFlavorID

typedef AssocVector< mootEnumID, ProbT >	SuffixTrieDataT

typedef string	mootTagString

typedef string	mootTokString

typedef set< mootTagString >	mootTagSet

typedef mootTokenTypeE	mootTokenType

typedef list< mootToken >	mootSentence

typedef TokenIOFormatE	TokenIOFormat

typedef float	ProbT

typedef ProbT	CountT

typedef long unsigned int	OffsetT

typedef int32_t	BinInt

typedef int32_t	BinLong

typedef uint32_t	BinUInt

typedef uint32_t	BinULong

typedef BinInt	Int

typedef BinUInt	UInt

typedef BinUInt	Size

typedef wasteScannerTypeE	wasteScannerType

typedef wasteLexerTypeE	wasteLexerType

Enumerations
enum	DynHMMClassId { dheUnknown, dheFreq, dheBoltzmann, dheMIParser, dheN }
	Enum for built-in mootDynHMM estimator modes (subclasses) More...

enum	mootTokenTypeE { TokTypeUnknown, TokTypeVanilla, TokTypeLibXML, TokTypeXMLRaw, TokTypeComment, TokTypeEOS, TokTypeEOF, TokTypeWB, TokTypeSB, TokTypeUser, NTokTypes }

enum	TokenIOFormatE { tiofNone = 0x00000000, tiofUnknown = 0x00000001, tiofNull = 0x00000002, tiofUser = 0x00000004, tiofNative = 0x00000008, tiofXML = 0x00000010, tiofConserve = 0x00000020, tiofPretty = 0x00000040, tiofText = 0x00000080, tiofAnalyzed = 0x00000100, tiofTagged = 0x00000200, tiofPruned = 0x00000400, tiofLocation = 0x00000800, tiofCost = 0x00001000, tiofTrace = 0x00002000, tiofPredict = 0x00004000, tiofFlush = 0x00008000 }

enum	wasteLexer_state { ls_flush = 0x0001, ls_hyph = 0x0002, ls_head = 0x0004, ls_tail = 0x0008, ls_nl = 0x0010, ls_sb_fw = 0x0020, ls_wb_fw = 0x0040, ls_blanked = 0x0080 }

enum	wasteScannerTypeE { wScanTypeEOF, wScanTypeWB, wScanTypeSB, wScanTypeLatin, wScanTypeGreek, wScanTypeAlpha, wScanTypeNewline, wScanTypeSpace, wScanTypeNumber, wScanTypeRoman, wScanTypeHyphen, wScanTypePunct, wScanTypeLink, wScanTypeXML, wScanTypeComment, wScanTypePercent, wScanTypeOther, NwScanTypes }

enum	wasteLexerTypeE { wLexerTypeDot, wLexerTypeComma, wLexerTypeHyph, wLexerTypeApostrophe, wLexerTypeQuote, wLexerTypeMonetary, wLexerTypePercent, wLexerTypePlus, wLexerTypeLBR, wLexerTypeRBR, wLexerTypeSlash, wLexerTypeColon, wLexerTypeSemicolon, wLexerTypeEOS, wLexerTypePunct, wLexerTypeSpace, wLexerTypeNewline, wLexerTypeNumber, wLexerTypeRomanCaps, wLexerTypeRomanLower, wLexerTypeAlphaLower, wLexerTypeAlphaUpper, wLexerTypeAlphaCaps, wLexerTypeAlphaTrunc, wLexerTypeOther, NwLexerTypes }

Functions
class mootDynHMM *	newDynHMM (DynHMMClassId which=dheFreq, const mootDynHMMOptions &opts=mootDynHMMOptions())

class mootDynHMM *	newDynHMM (const std::string &which="Freq", const mootDynHMMOptions &opts=mootDynHMMOptions())

mootToken &	sentence_printf_append (mootSentence &s, mootTokenType typ, const char *fmt,...)

std::string	utf8ToLower (const std::string &s)

Locale Utilities
void	moot_setlocale (void)

const char *	moot_lc_ctype (void)

const char *	moot_lc_numeric (void)

String Utilities
bool	moot_parse_doubles (const char str, double dbls, size_t ndbls)

void	moot_normalize_ws (const char *buf, size_t len, std::string &out, bool trim_left=true, bool trim_right=true)

void	moot_normalize_ws (const std::string &in, std::string &out, bool trim_left=true, bool trim_right=true)

void	moot_normalize_ws (const char *s, std::string &out, bool trim_left=true, bool trim_right=true)

std::string	moot_normalize_ws (const char *buf, size_t len, bool trim_left=true, bool trim_right=true)

std::string	moot_normalize_ws (const char *s, bool trim_left=true, bool trim_right=true)

std::string	moot_normalize_ws (const std::string &s, bool trim_left=true, bool trim_right=true)

void	moot_remove_newlines (char *buf, size_t len)

void	moot_remove_newlines (char *s)

void	moot_remove_newlines (std::string &s)

void	moot_strtok (const std::string &s, const std::string &delim, std::list< std::string > &out)

std::list< std::string >	moot_strtok (const std::string &s, const std::string &delim)

void	moot_strsplit (const std::string &s, const std::string &delim, std::vector< std::string > &out)

std::vector< std::string >	moot_strsplit (const std::string &s, const std::string &delim)

int	std_vsprintf (std::string &s, const char *fmt, va_list &ap)

int	std_sprintf (std::string &s, const char *fmt,...)

std::string	std_vssprintf (const char *fmt, va_list &ap)

std::string	std_ssprintf (const char *fmt,...)

Named File Utilities
bool	moot_file_exists (const char *filename)

bool	moot_file_exists (const std::string &filename)

std::string	moot_unextend (const char *filename)

const char *	moot_extension (const char *filename, size_t pos)

const char *	moot_extension (const char *filename)

lexer re2c wrappers, from wasteLexerTypes.cc
wasteLexerTypeE	waste_casetype (const std::string &tok_text)

wasteLexerTypeE	waste_lexertype (const std::string &tok_text)

Variables
const mootTaster	builtinTaster

const char *	mootTokenTypeNames [NTokTypes]

static const int	tiofRare = tiofText

static const int	tiofMediumRare = tiofText\|tiofAnalyzed

static const int	tiofMedium = tiofText\|tiofTagged

static const int	tiofWellDone = tiofText\|tiofAnalyzed\|tiofTagged

static const int	ls_init = (ls_wb_fw \| ls_sb_fw \| ls_blanked)

static const int	ls_head_hyph = ( ls_head \| ls_hyph )

static const int	ls_head_hyph_nl = ( ls_head_hyph \| ls_nl )

const char *	wasteScannerTypeNames [NwScanTypes]

const char *	wasteLexerTypeNames [NwLexerTypes]

Message and Command-line utilities
enum	VerbosityLevel { vlSilent = 0, vlErrors = 1, vlWarnings = 2, vlInfo = 3, vlProgress = 4, vlDebug = 5, vlTrace = 6, vlEverything = 255 }

std::string	moot_banner (void)

std::string	moot_program_banner (const std::string &prog_name, const std::string &prog_version, const std::string &prog_author, bool is_free=true)

void	moot_vcarp (const char *fmt, va_list &ap)

void	moot_carp (const char *fmt,...)

void	moot_vcroak (const char *fmt,...)

void	moot_croak (const char *fmt,...)

void	moot_vmsg (int curLevel, int minLevel, const char *fmt, va_list &ap)

void	moot_msg (int curLevel, int minLevel, const char *fmt,...)

waste tag attribute access
enum	wasteTagAttrPosE { wtap_w = 1, wtap_S = 4, wtap_s = 7 }

typedef wasteTagAttrPosE	wasteTagAttrPos

bool	waste_tag_attr_get (const std::string &tagstr, size_t rpos, bool mydefault=false)

void	waste_tag_attr_set (std::string &tagstr, size_t rpos, bool val)

Detailed Description

Default input buffer length for XML parsers

Typedef Documentation

◆ mootFlavorStr

typedef string moot::mootFlavorStr

◆ mootFlavorID

typedef UInt moot::mootFlavorID

◆ SuffixTrieDataT

typedef AssocVector<mootEnumID,ProbT> moot::SuffixTrieDataT

Typedef for suffix trie data

◆ mootTagString

typedef string moot::mootTagString

Tag-string type

◆ mootTokString

typedef string moot::mootTokString

Token-string type

◆ mootTagSet

typedef set<mootTagString> moot::mootTagSet

Tagset (read "lexical class") type

◆ mootTokenType

typedef mootTokenTypeE moot::mootTokenType

◆ mootSentence

typedef list<mootToken> moot::mootSentence

Sentences are just lists of mootToken objects

◆ TokenIOFormat

typedef TokenIOFormatE moot::TokenIOFormat

◆ ProbT

typedef float moot::ProbT

Type for probabilities

◆ CountT

typedef ProbT moot::CountT

Count types (for raw frequencies)

◆ OffsetT

typedef long unsigned int moot::OffsetT

Offset type (for byte offsets)

◆ BinInt

typedef int32_t moot::BinInt

Fixed-width signed integer type for binary I/O (32-bit)

◆ BinLong

typedef int32_t moot::BinLong

Fixed-width signed integer type for binary I/O (32-bit)

◆ BinUInt

typedef uint32_t moot::BinUInt

Fixed-width unsigned integer type for binary I/O (32-bit)

◆ BinULong

typedef uint32_t moot::BinULong

Fixed-width unsigned integer type for binary I/O (32-bit)

◆ Int

typedef BinInt moot::Int

alias (fixed-width)

◆ UInt

typedef BinUInt moot::UInt

alias (fixed-width)

◆ Size

typedef BinUInt moot::Size

alias (fixed-width)

◆ wasteScannerType

typedef wasteScannerTypeE moot::wasteScannerType

◆ wasteLexerType

typedef wasteLexerTypeE moot::wasteLexerType

◆ wasteTagAttrPos

typedef wasteTagAttrPosE moot::wasteTagAttrPos

Enumeration Type Documentation

◆ DynHMMClassId

enum moot::DynHMMClassId

Enumerator
dheUnknown	unknown
dheFreq	~= "Freq" ~= mootDynLexHMM
dheBoltzmann	~= "Boltzmann" ~= mootDynLexHMM_Boltzmann
dheMIParser	~= "MIParser" ~= mootMIParser
dheN	placeholder

◆ mootTokenTypeE

enum moot::mootTokenTypeE

Enumerator
TokTypeUnknown	we dunno what it is – could be anything
TokTypeVanilla	plain "vanilla" token (+/-besttag,+/-analyses)
TokTypeLibXML	plain XML token; much like 'Vanilla'
TokTypeXMLRaw	Raw XML text (for lossless XML I/O)
TokTypeComment	a comment, should be ignored by processing routines
TokTypeEOS	end-of-sentence
TokTypeEOF	end-of-file
TokTypeWB	word-break hint
TokTypeSB	sentence-break hint
TokTypeUser	user-defined token type: use in conjunction with 'tok_data'
NTokTypes	number of token-types (not a type itself)

◆ TokenIOFormatE

enum moot::TokenIOFormatE

Enum for I/O format flags

Enumerator
tiofNone	no format
tiofUnknown	unknown format
tiofNull	null i/o, useful for testing
tiofUser	some user-defined format
tiofNative	native text format
tiofXML	XML format.
tiofConserve	Conserve raw XML.
tiofPretty	Pretty-print (XML only)
tiofText	literal token text included
tiofAnalyzed	input is pre-analyzed (>= "medium rare")
tiofTagged	input is tagged ("medium" or "well done")
tiofPruned	pruned output
tiofLocation	locations appear as first non-tag analysis
tiofCost	parse/output analysis 'prob' field
tiofTrace	save full Viterbi trellis trace?
tiofPredict	include Viterbi trellis predictions in trace?
tiofFlush	autoflush output stream after write (native i/o only)?

◆ VerbosityLevel

enum moot::VerbosityLevel

enum for verbosity levels

Enumerator
vlSilent
vlErrors
vlWarnings
vlInfo
vlProgress
vlDebug
vlTrace
vlEverything

◆ wasteLexer_state

enum moot::wasteLexer_state

bitmask flags for possible lexer states (mainly used for dehyphenation)

Enumerator
ls_flush
ls_hyph
ls_head
ls_tail
ls_nl
ls_sb_fw
ls_wb_fw
ls_blanked

◆ wasteScannerTypeE

enum moot::wasteScannerTypeE

return value enum for wasteScanner::yylex()

Enumerator
wScanTypeEOF	end-of-stream
wScanTypeWB	$WB$: word-break hint
wScanTypeSB	$SB$: sentence-break hint
wScanTypeLatin	latin string
wScanTypeGreek	greek string
wScanTypeAlpha	alphabetic string, any script
wScanTypeNewline	newline
wScanTypeSpace	whitespace without embedded newline
wScanTypeNumber	number string
wScanTypeRoman	roman numeral string (subset of latin)
wScanTypeHyphen	hyphen, en-, or em-dash
wScanTypePunct	punctuation and "special" characters
wScanTypeLink	URI or other link
wScanTypeXML	raw XML
wScanTypeComment	raw comment
wScanTypePercent	escaped "%" sign (for literal "%%")
wScanTypeOther	any other character
NwScanTypes	eof

◆ wasteLexerTypeE

enum moot::wasteLexerTypeE

return value enum for wasteLexer::yylex()

Enumerator
wLexerTypeDot	"."
wLexerTypeComma	","
wLexerTypeHyph	hyphen, en-, or em-dash
wLexerTypeApostrophe	single quotes and apostrophe
wLexerTypeQuote	quotation characters (quotes and guillemets)
wLexerTypeMonetary	currency symbols
wLexerTypePercent	paragraph, percent and permille character
wLexerTypePlus	"+"
wLexerTypeLBR	left brackets and left brace
wLexerTypeRBR	right brackets and right brace
wLexerTypeSlash	slash and backslash
wLexerTypeColon	":"
wLexerTypeSemicolon	";"
wLexerTypeEOS	sentence terminating punctuation characters
wLexerTypePunct	punctuation and "special" characters
wLexerTypeSpace	whitespace without embedded newline
wLexerTypeNewline	newline
wLexerTypeNumber	number string
wLexerTypeRomanCaps	roman numeral string in caps
wLexerTypeRomanLower	roman numeral string in lower case
wLexerTypeAlphaLower	alphabetic string, any script, lower case
wLexerTypeAlphaUpper	alphabetic string, any script, first character in upper case
wLexerTypeAlphaCaps	alphabetic string, any script, all characters in upper case
wLexerTypeAlphaTrunc	alphabetic string, any script, terminated by hyphen
wLexerTypeOther	any other character
NwLexerTypes	number of lexer types

◆ wasteTagAttrPosE

enum moot::wasteTagAttrPosE

< waste tag hidden attribute positions, relative to end-of-tag

Enumerator
wtap_w
wtap_S
wtap_s

Function Documentation

◆ newDynHMM() [1/2]

class mootDynHMM* moot::newDynHMM	(	DynHMMClassId	which = `dheFreq`,
		const mootDynHMMOptions &	opts = `mootDynHMMOptions()`
	)

Generic constructor for built-in mootDynHMM subclasses

Referenced by moot::mootDynHMMOptions::~mootDynHMMOptions().

◆ newDynHMM() [2/2]

class mootDynHMM* moot::newDynHMM	(	const std::string &	which = `"Freq"`,
		const mootDynHMMOptions &	opts = `mootDynHMMOptions()`
	)

Generic constructor for built-in mootDynHMM subclasses, given subclass name

◆ sentence_printf_append()

mootToken& moot::sentence_printf_append	(	mootSentence &	s,
		mootTokenType	typ,
		const char *	fmt,
			...
	)

Sentences are just vectors of mootToken objects Utilitiy method to add a printf()-formatted token at the end of s

◆ moot_setlocale()

void moot::moot_setlocale ( void )

initialize the current locale from the environment, forcing LC_NUMERIC="C"

◆ moot_lc_ctype()

const char* moot::moot_lc_ctype ( void )

get current value of LC_CTYPE, or the string "(unavailable)"

◆ moot_lc_numeric()

const char* moot::moot_lc_numeric ( void )

get current value of LC_NUMERIC, or the string "(unavailable)"

◆ moot_parse_doubles()

bool moot::moot_parse_doubles	(	const char *	str,
		double *	dbls,
		size_t	ndbls
	)

Parse a comma-separated list of doubles (at most 'ndbls') from str into dbls. You should already have allocated space for ndbls doubles in dbls.

◆ moot_normalize_ws() [1/6]

void moot::moot_normalize_ws	(	const char *	buf,
		size_t	len,
		std::string &	out,
		bool	trim_left = `true`,
		bool	trim_right = `true`
	)

Append a whitespace-normalized C buffer to an STL string. All whitespace substrings in s are replaced with a single space in out. out is not cleared.

@param buf source buffer @param len length of source buffer in bytes @param out destination STL string @param trim_left whether to trim all leading whitespace @param trim_right whether to trim all trailing whitespace

Referenced by moot_normalize_ws(), and mootio::micbuffer::to_string().

◆ moot_normalize_ws() [2/6]

void moot::moot_normalize_ws	(	const std::string &	in,
		std::string &	out,
		bool	trim_left = `true`,
		bool	trim_right = `true`
	)

Append a whitespace-normalized C++ string to another C++ string. All whitespace substrings in in are replaced with a single space in out. out is not cleared.

Parameters

in	source string
out	destination string
trim_left	whether to trim all leading whitespace
trim_right	whether to trim all trailing whitespace

◆ moot_normalize_ws() [3/6]

void moot::moot_normalize_ws	(	const char *	s,
		std::string &	out,
		bool	trim_left = `true`,
		bool	trim_right = `true`
	)

inline

Append a whitespace-normalized NUL-terminated C string to an STL string.

Parameters

s	source string
out	destination STL string
trim_left	whether to trim all leading whitespace
trim_right	whether to trim all trailing whitespace

References moot_normalize_ws().

◆ moot_normalize_ws() [4/6]

std::string moot::moot_normalize_ws	(	const char *	buf,
		size_t	len,
		bool	trim_left = `true`,
		bool	trim_right = `true`
	)

inline

Create and return a whitespace-normalized STL string from a C memory buffer.

Parameters

buf	source buffer
len	length of source buffer, in bytes
trim_left	whether to trim all leading whitespace
trim_right	whether to trim all trailing whitespace

References moot_normalize_ws().

◆ moot_normalize_ws() [5/6]

std::string moot::moot_normalize_ws	(	const char *	s,
		bool	trim_left = `true`,
		bool	trim_right = `true`
	)

inline

Create and return a whitespace-normalized STL string from a NUL-terminated C string.

Parameters

s	source string
trim_left	whether to trim all leading whitespace
trim_right	whether to trim all trailing whitespace

References moot_normalize_ws().

◆ moot_normalize_ws() [6/6]

std::string moot::moot_normalize_ws	(	const std::string &	s,
		bool	trim_left = `true`,
		bool	trim_right = `true`
	)

inline

Create and return a whitespace-normalized STL string from a different STL string.

Parameters

s	source string
trim_left	whether to trim all leading whitespace
trim_right	whether to trim all trailing whitespace

References moot_normalize_ws(), and moot_remove_newlines().

◆ moot_remove_newlines() [1/3]

void moot::moot_remove_newlines	(	char *	buf,
		size_t	len
	)

Remove all newlines from a C buffer. Every newline is replaced with a single space.

Parameters

buf	target buffer
len	length of target buffer in bytes

Referenced by moot_normalize_ws(), and moot_remove_newlines().

◆ moot_remove_newlines() [2/3]

void moot::moot_remove_newlines ( char * s )

inline

Remove all newlines from a NUL-terminated C string.

References moot_file_exists(), moot_remove_newlines(), moot_strsplit(), moot_strtok(), std_sprintf(), std_ssprintf(), std_vsprintf(), and std_vssprintf().

◆ moot_remove_newlines() [3/3]

void moot::moot_remove_newlines ( std::string & s )

Remove all newlines from an STL string.

◆ moot_strtok() [1/2]

void moot::moot_strtok	(	const std::string &	s,
		const std::string &	delim,
		std::list< std::string > &	out
	)

Tokenize an STL string to an existing list. Multiple adjacent delimiters are treated as a single delimiter; i.e. no empty strings are returned.

Parameters

s	source string
delim	string of delimiter characters
out	destination string list

Referenced by moot_remove_newlines().

◆ moot_strtok() [2/2]

std::list<std::string> moot::moot_strtok	(	const std::string &	s,
		const std::string &	delim
	)

Tokenize an STL string to a new list.

Parameters

s	source string
delim	string of delimiter characters

◆ moot_strsplit() [1/2]

void moot::moot_strsplit	(	const std::string &	s,
		const std::string &	delim,
		std::vector< std::string > &	out
	)

Split an STL string to an existing list. All delimiters are significant, i.e. empty output strings are allowed.

Parameters

s	source string
delim	string of delimiter characters
out	destination string list

Referenced by moot_remove_newlines().

◆ moot_strsplit() [2/2]

std::vector<std::string> moot::moot_strsplit	(	const std::string &	s,
		const std::string &	delim
	)

Tokenize an STL string to a new list.

Parameters

s	source string
delim	string of delimiter characters

◆ std_vsprintf()

int moot::std_vsprintf	(	std::string &	s,
		const char *	fmt,
		va_list &	ap
	)

Stupid wrapper for append+printf() onto C++ strings.

Parameters

s	sink string
fmt	printf format
ap	printf args

Referenced by moot_remove_newlines().

◆ std_sprintf()

int moot::std_sprintf	(	std::string &	s,
		const char *	fmt,
			...
	)

Stupid wrapper for append+printf() onto C++ strings.

Parameters

s	sink string
fmt	printf format
ap	printf args

Referenced by moot_remove_newlines().

◆ std_vssprintf()

std::string moot::std_vssprintf	(	const char *	fmt,
		va_list &	ap
	)

Stupid wrapper for printf() returning a C++ string

Parameters

fmt	printf format
ap	printf args

Referenced by moot_remove_newlines().

◆ std_ssprintf()

std::string moot::std_ssprintf	(	const char *	fmt,
			...
	)

Stupid wrapper for printf() returning a C++ string

Parameters

fmt	printf format
ap	printf args

Referenced by moot_remove_newlines().

◆ moot_file_exists() [1/2]

bool moot::moot_file_exists ( const char * filename )

Check whether a file exists by trying to open it with 'fopen()'

Referenced by moot_file_exists(), and moot_remove_newlines().

◆ moot_file_exists() [2/2]

bool moot::moot_file_exists ( const std::string & filename )

inline

Check whether a file exists by trying to open it with 'fopen()', std::string version

References moot_extension(), moot_file_exists(), and moot_unextend().

◆ moot_unextend()

std::string moot::moot_unextend ( const char * filename )

Get path+basename of a file

Referenced by moot_file_exists().

◆ moot_extension() [1/2]

const char* moot::moot_extension	(	const char *	filename,
		size_t	pos
	)

Get final extension of a filename (including leading '.'), reading backwards from (filename+pos). Returns a pointer into filename. If no next extension is found, returns NULL.

Referenced by moot_extension(), and moot_file_exists().

◆ moot_extension() [2/2]

const char* moot::moot_extension ( const char * filename )

inline

Get extension of a filename (including leading '.')

References moot_extension().

◆ moot_banner()

std::string moot::moot_banner ( void )

Return a banner string for the library

◆ moot_program_banner()

std::string moot::moot_program_banner	(	const std::string &	prog_name,
		const std::string &	prog_version,
		const std::string &	prog_author,
		bool	is_free = `true`
	)

Return a full banner string for a program using the library.

◆ moot_vcarp()

void moot::moot_vcarp	(	const char *	fmt,
		va_list &	ap
	)

verbose message to stderr, va_list version

◆ moot_carp()

void moot::moot_carp	(	const char *	fmt,
			...
	)

verbose message to stderr

◆ moot_vcroak()

void moot::moot_vcroak	(	const char *	fmt,
			...
	)

verbose message to stderr followed by abort(), va_list version

◆ moot_croak()

void moot::moot_croak	(	const char *	fmt,
			...
	)

verbose message to stderr followed by abort()

◆ moot_vmsg()

void moot::moot_vmsg	(	int	curLevel,
		int	minLevel,
		const char *	fmt,
		va_list &	ap
	)

conditional message to stderr (prints only if curLevel>=minLevel), va_list version

Parameters

curLevel	current verbosity level
minLevel	minimum level for print
fmt	printf format
...	printf arguments

◆ moot_msg()

void moot::moot_msg	(	int	curLevel,
		int	minLevel,
		const char *	fmt,
			...
	)

conditional message to stderr (prints only if curLevel>=minLevel), varargs version

Parameters

curLevel	current verbosity level
minLevel	minimum level for print
fmt	printf format
...	printf arguments

◆ utf8ToLower()

std::string moot::utf8ToLower ( const std::string & s )

Return a lower-cased version of s ; in- and outputs are UTF-8 encoded byte strings

◆ waste_tag_attr_get()

bool moot::waste_tag_attr_get	(	const std::string &	tagstr,
		size_t	rpos,
		bool	mydefault = `false`
	)

inline

Get a boolean WASTE tag attribute by position relative to end-of-string

Referenced by moot::wasteDecoder::tag_attr_s(), moot::wasteDecoder::tag_attr_S(), and moot::wasteDecoder::tag_attr_w().

◆ waste_tag_attr_set()

void moot::waste_tag_attr_set	(	std::string &	tagstr,
		size_t	rpos,
		bool	val
	)

inline

Set a boolean WASTE tag attribute position relative to end-of-string

References waste_casetype(), and waste_lexertype().

◆ waste_casetype()

wasteLexerTypeE moot::waste_casetype ( const std::string & tok_text )

Get waste case-type for tok_text

Referenced by waste_tag_attr_set().

◆ waste_lexertype()

wasteLexerTypeE moot::waste_lexertype ( const std::string & tok_text )

Get waste kexer-type for tok_text

Referenced by waste_tag_attr_set().

Variable Documentation

◆ builtinTaster

const mootTaster moot::builtinTaster

Default built-in taster

Referenced by moot::mootHMM::unknown_class_name().

◆ mootTokenTypeNames

const char* moot::mootTokenTypeNames[NTokTypes]

Useful for debugging token types

◆ tiofRare

const int moot::tiofRare = tiofText

static

Format alias for 'Cooked Rare' files.

◆ tiofMediumRare

const int moot::tiofMediumRare = tiofText|tiofAnalyzed

static

Format alias for 'Cooked Medium Rare' files.

◆ tiofMedium

const int moot::tiofMedium = tiofText|tiofTagged

static

Format alias for 'Cooked Medium' files.

◆ tiofWellDone

const int moot::tiofWellDone = tiofText|tiofAnalyzed|tiofTagged

static

Format alias for 'Cooked Well Done' files.

◆ ls_init

const int moot::ls_init = (ls_wb_fw | ls_sb_fw | ls_blanked)

static

initial state of the lexer

◆ ls_head_hyph

const int moot::ls_head_hyph = ( ls_head | ls_hyph )

static

lexer has seen some word followed by a hyphen

◆ ls_head_hyph_nl

const int moot::ls_head_hyph_nl = ( ls_head_hyph | ls_nl )

static

lexer has seen some word followed by a hyphen and a newline

◆ wasteScannerTypeNames

const char* moot::wasteScannerTypeNames[NwScanTypes]

Symbolic names for wasteScannerTypeE, useful for debugging

◆ wasteLexerTypeNames

const char* moot::wasteLexerTypeNames[NwLexerTypes]

Useful for debugging old dwdsScanner types

Classes

Typedefs

Enumerations

Functions

Variables

Message and Command-line utilities

waste tag attribute access

Detailed Description

Typedef Documentation

◆ mootFlavorStr

◆ mootFlavorID

◆ SuffixTrieDataT

◆ mootTagString

◆ mootTokString

◆ mootTagSet

◆ mootTokenType

◆ mootSentence

◆ TokenIOFormat

◆ ProbT

◆ CountT

◆ OffsetT

◆ BinInt

◆ BinLong

◆ BinUInt

◆ BinULong

◆ Int

◆ UInt

◆ Size

◆ wasteScannerType

◆ wasteLexerType

◆ wasteTagAttrPos

Enumeration Type Documentation

◆ DynHMMClassId

◆ mootTokenTypeE

◆ TokenIOFormatE

◆ VerbosityLevel

◆ wasteLexer_state

◆ wasteScannerTypeE

◆ wasteLexerTypeE

◆ wasteTagAttrPosE

Function Documentation

◆ newDynHMM() [1/2]

◆ newDynHMM() [2/2]

◆ sentence_printf_append()

◆ moot_setlocale()

◆ moot_lc_ctype()

◆ moot_lc_numeric()

◆ moot_parse_doubles()

◆ moot_normalize_ws() [1/6]

◆ moot_normalize_ws() [2/6]

◆ moot_normalize_ws() [3/6]

◆ moot_normalize_ws() [4/6]

◆ moot_normalize_ws() [5/6]

◆ moot_normalize_ws() [6/6]

◆ moot_remove_newlines() [1/3]

◆ moot_remove_newlines() [2/3]

◆ moot_remove_newlines() [3/3]

◆ moot_strtok() [1/2]

◆ moot_strtok() [2/2]

◆ moot_strsplit() [1/2]

◆ moot_strsplit() [2/2]

◆ std_vsprintf()

◆ std_sprintf()

◆ std_vssprintf()

◆ std_ssprintf()

◆ moot_file_exists() [1/2]

◆ moot_file_exists() [2/2]

◆ moot_unextend()

◆ moot_extension() [1/2]

◆ moot_extension() [2/2]

◆ moot_banner()

◆ moot_program_banner()

◆ moot_vcarp()

◆ moot_carp()

◆ moot_vcroak()

◆ moot_croak()

◆ moot_vmsg()

◆ moot_msg()

◆ utf8ToLower()

◆ waste_tag_attr_get()