Classes | Typedefs | Enumerations | Functions | Variables
moot Namespace Reference

Classes

class  AssocVector
 LISP-style assoc list using vector<>: map-like class with small memory footprint. Useful for small associative arrays. Lookup and insert are linear time. More...
 
struct  AssocVectorNode
 template class for individual AssocVector nodes More...
 
class  cmdutil_file_churner
 Class for churning through many input files, given either directly or as a list-file. More...
 
class  GenericLexer
 Abstract base class for Flex++ lexers. More...
 
class  mootClassfreqs
 Class for storage and retrieval of raw lexical-class frequencies. More...
 
class  mootClassfreqsCompiler
 Lexical-class frequency parameter-file compiler. More...
 
class  mootDynHMM
 abstract HMM subclass for use with dynamic lexical probabilities. More...
 
class  mootDynHMMOptions
 Generic user-level options structure for built-in mootDynHMM subclasses. More...
 
class  mootDynLexHMM
 mootDynHMM subclass for dynamic lexical probabilities More...
 
class  mootDynLexHMM_Boltzmann
 mootDynHMM subclass using a Maxwell-Boltzmann distribution to estimate f(w,t) More...
 
class  mootEval
 Tagger-evaluation utility class. More...
 
class  mootExpatParser
 C++ Wrapper for expat XML parsers. More...
 
class  mootHMM
 1st-order Hidden Markov Model Tagger/Disambiguator class. More...
 
class  mootHMMTrainer
 High-level class to gather training data for a mootHMM or mootCHMM. More...
 
class  mootLexfreqs
 Class for storage and retrieval of raw lexical frequencies. More...
 
class  mootLexfreqsCompiler
 Lexical frequency parameter-file compiler. More...
 
class  mootMIParser
 HMM subclass for MI parsing. More...
 
class  mootModelSpec
 
class  mootNgrams
 Class for storage & retrieval of raw N-Gram frequencies. More...
 
class  mootNgramsCompiler
 N-gram parameter-file compiler. More...
 
class  mootRecoder
 Interface to librecode character-conversion routines. More...
 
class  mootTaster
 High-level heuristic token classifier . More...
 
class  mootToken
 High-level token information object. More...
 
class  mootXMLRecoder
 Special 2-phase recoder object for XML text. More...
 
class  SuffixTrie
 Top-level class for suffix tries. More...
 
class  TokenBuffer
 Class for in-memory token buffers using mootSentence. More...
 
class  TokenIO
 Abstract class for token I/O. More...
 
class  TokenReader
 Abstract class for token input. More...
 
class  TokenReaderExpat
 Experimental XML reader class using expat. More...
 
class  TokenReaderNative
 Class for native "cooked" text-format token input. More...
 
class  TokenWriter
 Abstract class for token output. More...
 
class  TokenWriterExpat
 Experimental XML writer class for use with expat-parsed XML or vanilla input. More...
 
class  TokenWriterNative
 Class for native "cooked" text-format token output. More...
 
class  TrieVector
 Top-level trie class-template using an adjaceny table. More...
 
class  TrieVectorBase
 Base class for TrieVector. More...
 
struct  TrieVectorNode
 
struct  TrieVectorNodeBase
 
class  wasteAnnotator
 Sentence-functional annotations for tokens usually not covered by standard morphological analysis. More...
 
class  wasteAnnotatorWriter
 TokenWriter wrapper for wasteAnnotator. More...
 
class  wasteDecoder
 waste decoder component converts hidden tag attributes 's','S','w' to sentence- and token-boundaries More...
 
class  wasteLexer
 Mid-level scanner stage performs (optional) hyphenation normalization and text classification. More...
 
class  wasteLexerReader
 Mid-level scanner stage, wraps moot::wasteLexer in moot::TokenReader API. More...
 
class  wasteLexerToken
 
class  wasteLexicon
 simple hash_set<>-based lexicon class More...
 
class  wasteScanner
 Low-level class for raw text scanning. More...
 
class  wasteTokenScanner
 Raw text scanner class returning mootToken; wraps wasteScanner. More...
 
class  wasteTrainWriter
 TokenWriter wrapper class for writing WASTE tokenizer 'well-done' training data from pre-tokenized input with leading whitespace. More...
 

Typedefs

typedef string mootFlavorStr
 
typedef UInt mootFlavorID
 
typedef AssocVector< mootEnumID, ProbTSuffixTrieDataT
 
typedef string mootTagString
 
typedef string mootTokString
 
typedef set< mootTagStringmootTagSet
 
typedef mootTokenTypeE mootTokenType
 
typedef list< mootTokenmootSentence
 
typedef TokenIOFormatE TokenIOFormat
 
typedef float ProbT
 
typedef ProbT CountT
 
typedef long unsigned int OffsetT
 
typedef int32_t BinInt
 
typedef int32_t BinLong
 
typedef uint32_t BinUInt
 
typedef uint32_t BinULong
 
typedef BinInt Int
 
typedef BinUInt UInt
 
typedef BinUInt Size
 
typedef wasteScannerTypeE wasteScannerType
 
typedef wasteLexerTypeE wasteLexerType
 

Enumerations

enum  DynHMMClassId {
  dheUnknown, dheFreq, dheBoltzmann, dheMIParser,
  dheN
}
 Enum for built-in mootDynHMM estimator modes (subclasses) More...
 
enum  mootTokenTypeE {
  TokTypeUnknown, TokTypeVanilla, TokTypeLibXML, TokTypeXMLRaw,
  TokTypeComment, TokTypeEOS, TokTypeEOF, TokTypeWB,
  TokTypeSB, TokTypeUser, NTokTypes
}
 
enum  TokenIOFormatE {
  tiofNone = 0x00000000, tiofUnknown = 0x00000001, tiofNull = 0x00000002, tiofUser = 0x00000004,
  tiofNative = 0x00000008, tiofXML = 0x00000010, tiofConserve = 0x00000020, tiofPretty = 0x00000040,
  tiofText = 0x00000080, tiofAnalyzed = 0x00000100, tiofTagged = 0x00000200, tiofPruned = 0x00000400,
  tiofLocation = 0x00000800, tiofCost = 0x00001000, tiofTrace = 0x00002000, tiofPredict = 0x00004000,
  tiofFlush = 0x00008000
}
 
enum  wasteLexer_state {
  ls_flush = 0x0001, ls_hyph = 0x0002, ls_head = 0x0004, ls_tail = 0x0008,
  ls_nl = 0x0010, ls_sb_fw = 0x0020, ls_wb_fw = 0x0040, ls_blanked = 0x0080
}
 
enum  wasteScannerTypeE {
  wScanTypeEOF, wScanTypeWB, wScanTypeSB, wScanTypeLatin,
  wScanTypeGreek, wScanTypeAlpha, wScanTypeNewline, wScanTypeSpace,
  wScanTypeNumber, wScanTypeRoman, wScanTypeHyphen, wScanTypePunct,
  wScanTypeLink, wScanTypeXML, wScanTypeComment, wScanTypePercent,
  wScanTypeOther, NwScanTypes
}
 
enum  wasteLexerTypeE {
  wLexerTypeDot, wLexerTypeComma, wLexerTypeHyph, wLexerTypeApostrophe,
  wLexerTypeQuote, wLexerTypeMonetary, wLexerTypePercent, wLexerTypePlus,
  wLexerTypeLBR, wLexerTypeRBR, wLexerTypeSlash, wLexerTypeColon,
  wLexerTypeSemicolon, wLexerTypeEOS, wLexerTypePunct, wLexerTypeSpace,
  wLexerTypeNewline, wLexerTypeNumber, wLexerTypeRomanCaps, wLexerTypeRomanLower,
  wLexerTypeAlphaLower, wLexerTypeAlphaUpper, wLexerTypeAlphaCaps, wLexerTypeAlphaTrunc,
  wLexerTypeOther, NwLexerTypes
}
 

Functions

class mootDynHMMnewDynHMM (DynHMMClassId which=dheFreq, const mootDynHMMOptions &opts=mootDynHMMOptions())
 
class mootDynHMMnewDynHMM (const std::string &which="Freq", const mootDynHMMOptions &opts=mootDynHMMOptions())
 
mootTokensentence_printf_append (mootSentence &s, mootTokenType typ, const char *fmt,...)
 
std::string utf8ToLower (const std::string &s)
 
Locale Utilities
void moot_setlocale (void)
 
const char * moot_lc_ctype (void)
 
const char * moot_lc_numeric (void)
 
String Utilities
bool moot_parse_doubles (const char *str, double *dbls, size_t ndbls)
 
void moot_normalize_ws (const char *buf, size_t len, std::string &out, bool trim_left=true, bool trim_right=true)
 
void moot_normalize_ws (const std::string &in, std::string &out, bool trim_left=true, bool trim_right=true)
 
void moot_normalize_ws (const char *s, std::string &out, bool trim_left=true, bool trim_right=true)
 
std::string moot_normalize_ws (const char *buf, size_t len, bool trim_left=true, bool trim_right=true)
 
std::string moot_normalize_ws (const char *s, bool trim_left=true, bool trim_right=true)
 
std::string moot_normalize_ws (const std::string &s, bool trim_left=true, bool trim_right=true)
 
void moot_remove_newlines (char *buf, size_t len)
 
void moot_remove_newlines (char *s)
 
void moot_remove_newlines (std::string &s)
 
void moot_strtok (const std::string &s, const std::string &delim, std::list< std::string > &out)
 
std::list< std::string > moot_strtok (const std::string &s, const std::string &delim)
 
void moot_strsplit (const std::string &s, const std::string &delim, std::vector< std::string > &out)
 
std::vector< std::string > moot_strsplit (const std::string &s, const std::string &delim)
 
int std_vsprintf (std::string &s, const char *fmt, va_list &ap)
 
int std_sprintf (std::string &s, const char *fmt,...)
 
std::string std_vssprintf (const char *fmt, va_list &ap)
 
std::string std_ssprintf (const char *fmt,...)
 
Named File Utilities
bool moot_file_exists (const char *filename)
 
bool moot_file_exists (const std::string &filename)
 
std::string moot_unextend (const char *filename)
 
const char * moot_extension (const char *filename, size_t pos)
 
const char * moot_extension (const char *filename)
 
lexer re2c wrappers, from wasteLexerTypes.cc
wasteLexerTypeE waste_casetype (const std::string &tok_text)
 
wasteLexerTypeE waste_lexertype (const std::string &tok_text)
 

Variables

const mootTaster builtinTaster
 
const char * mootTokenTypeNames [NTokTypes]
 
static const int tiofRare = tiofText
 
static const int tiofMediumRare = tiofText|tiofAnalyzed
 
static const int tiofMedium = tiofText|tiofTagged
 
static const int tiofWellDone = tiofText|tiofAnalyzed|tiofTagged
 
static const int ls_init = (ls_wb_fw | ls_sb_fw | ls_blanked)
 
static const int ls_head_hyph = ( ls_head | ls_hyph )
 
static const int ls_head_hyph_nl = ( ls_head_hyph | ls_nl )
 
const char * wasteScannerTypeNames [NwScanTypes]
 
const char * wasteLexerTypeNames [NwLexerTypes]
 

Message and Command-line utilities

enum  VerbosityLevel {
  vlSilent = 0, vlErrors = 1, vlWarnings = 2, vlInfo = 3,
  vlProgress = 4, vlDebug = 5, vlTrace = 6, vlEverything = 255
}
 
std::string moot_banner (void)
 
std::string moot_program_banner (const std::string &prog_name, const std::string &prog_version, const std::string &prog_author, bool is_free=true)
 
void moot_vcarp (const char *fmt, va_list &ap)
 
void moot_carp (const char *fmt,...)
 
void moot_vcroak (const char *fmt,...)
 
void moot_croak (const char *fmt,...)
 
void moot_vmsg (int curLevel, int minLevel, const char *fmt, va_list &ap)
 
void moot_msg (int curLevel, int minLevel, const char *fmt,...)
 

waste tag attribute access

enum  wasteTagAttrPosE { wtap_w = 1, wtap_S = 4, wtap_s = 7 }
 
typedef wasteTagAttrPosE wasteTagAttrPos
 
bool waste_tag_attr_get (const std::string &tagstr, size_t rpos, bool mydefault=false)
 
void waste_tag_attr_set (std::string &tagstr, size_t rpos, bool val)
 

Detailed Description

Default input buffer length for XML parsers

Typedef Documentation

◆ mootFlavorStr

typedef string moot::mootFlavorStr

◆ mootFlavorID

◆ SuffixTrieDataT

Typedef for suffix trie data

◆ mootTagString

typedef string moot::mootTagString

Tag-string type

◆ mootTokString

typedef string moot::mootTokString

Token-string type

◆ mootTagSet

Tagset (read "lexical class") type

◆ mootTokenType

◆ mootSentence

Sentences are just lists of mootToken objects

◆ TokenIOFormat

◆ ProbT

typedef float moot::ProbT

Type for probabilities

◆ CountT

Count types (for raw frequencies)

◆ OffsetT

typedef long unsigned int moot::OffsetT

Offset type (for byte offsets)

◆ BinInt

typedef int32_t moot::BinInt

Fixed-width signed integer type for binary I/O (32-bit)

◆ BinLong

typedef int32_t moot::BinLong

Fixed-width signed integer type for binary I/O (32-bit)

◆ BinUInt

typedef uint32_t moot::BinUInt

Fixed-width unsigned integer type for binary I/O (32-bit)

◆ BinULong

typedef uint32_t moot::BinULong

Fixed-width unsigned integer type for binary I/O (32-bit)

◆ Int

typedef BinInt moot::Int

alias (fixed-width)

◆ UInt

alias (fixed-width)

◆ Size

alias (fixed-width)

◆ wasteScannerType

◆ wasteLexerType

◆ wasteTagAttrPos

Enumeration Type Documentation

◆ DynHMMClassId

Enumerator
dheUnknown 

unknown

dheFreq 

~= "Freq" ~= mootDynLexHMM

dheBoltzmann 

~= "Boltzmann" ~= mootDynLexHMM_Boltzmann

dheMIParser 

~= "MIParser" ~= mootMIParser

dheN 

placeholder

◆ mootTokenTypeE

Enumerator
TokTypeUnknown 

we dunno what it is – could be anything

TokTypeVanilla 

plain "vanilla" token (+/-besttag,+/-analyses)

TokTypeLibXML 

plain XML token; much like 'Vanilla'

TokTypeXMLRaw 

Raw XML text (for lossless XML I/O)

TokTypeComment 

a comment, should be ignored by processing routines

TokTypeEOS 

end-of-sentence

TokTypeEOF 

end-of-file

TokTypeWB 

word-break hint

TokTypeSB 

sentence-break hint

TokTypeUser 

user-defined token type: use in conjunction with 'tok_data'

NTokTypes 

number of token-types (not a type itself)

◆ TokenIOFormatE

Enum for I/O format flags

Enumerator
tiofNone 

no format

tiofUnknown 

unknown format

tiofNull 

null i/o, useful for testing

tiofUser 

some user-defined format

tiofNative 

native text format

tiofXML 

XML format.

tiofConserve 

Conserve raw XML.

tiofPretty 

Pretty-print (XML only)

tiofText 

literal token text included

tiofAnalyzed 

input is pre-analyzed (>= "medium rare")

tiofTagged 

input is tagged ("medium" or "well done")

tiofPruned 

pruned output

tiofLocation 

locations appear as first non-tag analysis

tiofCost 

parse/output analysis 'prob' field

tiofTrace 

save full Viterbi trellis trace?

tiofPredict 

include Viterbi trellis predictions in trace?

tiofFlush 

autoflush output stream after write (native i/o only)?

◆ VerbosityLevel

enum for verbosity levels

Enumerator
vlSilent 
vlErrors 
vlWarnings 
vlInfo 
vlProgress 
vlDebug 
vlTrace 
vlEverything 

◆ wasteLexer_state

bitmask flags for possible lexer states (mainly used for dehyphenation)

Enumerator
ls_flush 
ls_hyph 
ls_head 
ls_tail 
ls_nl 
ls_sb_fw 
ls_wb_fw 
ls_blanked 

◆ wasteScannerTypeE

return value enum for wasteScanner::yylex()

Enumerator
wScanTypeEOF 

end-of-stream

wScanTypeWB 

$WB$: word-break hint

wScanTypeSB 

$SB$: sentence-break hint

wScanTypeLatin 

latin string

wScanTypeGreek 

greek string

wScanTypeAlpha 

alphabetic string, any script

wScanTypeNewline 

newline

wScanTypeSpace 

whitespace without embedded newline

wScanTypeNumber 

number string

wScanTypeRoman 

roman numeral string (subset of latin)

wScanTypeHyphen 

hyphen, en-, or em-dash

wScanTypePunct 

punctuation and "special" characters

wScanTypeLink 

URI or other link

wScanTypeXML 

raw XML

wScanTypeComment 

raw comment

wScanTypePercent 

escaped "%" sign (for literal "%%")

wScanTypeOther 

any other character

NwScanTypes 

eof

◆ wasteLexerTypeE

return value enum for wasteLexer::yylex()

Enumerator
wLexerTypeDot 

"."

wLexerTypeComma 

","

wLexerTypeHyph 

hyphen, en-, or em-dash

wLexerTypeApostrophe 

single quotes and apostrophe

wLexerTypeQuote 

quotation characters (quotes and guillemets)

wLexerTypeMonetary 

currency symbols

wLexerTypePercent 

paragraph, percent and permille character

wLexerTypePlus 

"+"

wLexerTypeLBR 

left brackets and left brace

wLexerTypeRBR 

right brackets and right brace

wLexerTypeSlash 

slash and backslash

wLexerTypeColon 

":"

wLexerTypeSemicolon 

";"

wLexerTypeEOS 

sentence terminating punctuation characters

wLexerTypePunct 

punctuation and "special" characters

wLexerTypeSpace 

whitespace without embedded newline

wLexerTypeNewline 

newline

wLexerTypeNumber 

number string

wLexerTypeRomanCaps 

roman numeral string in caps

wLexerTypeRomanLower 

roman numeral string in lower case

wLexerTypeAlphaLower 

alphabetic string, any script, lower case

wLexerTypeAlphaUpper 

alphabetic string, any script, first character in upper case

wLexerTypeAlphaCaps 

alphabetic string, any script, all characters in upper case

wLexerTypeAlphaTrunc 

alphabetic string, any script, terminated by hyphen

wLexerTypeOther 

any other character

NwLexerTypes 

number of lexer types

◆ wasteTagAttrPosE

< waste tag hidden attribute positions, relative to end-of-tag

Enumerator
wtap_w 
wtap_S 
wtap_s 

Function Documentation

◆ newDynHMM() [1/2]

class mootDynHMM* moot::newDynHMM ( DynHMMClassId  which = dheFreq,
const mootDynHMMOptions opts = mootDynHMMOptions() 
)

Generic constructor for built-in mootDynHMM subclasses

Referenced by moot::mootDynHMMOptions::~mootDynHMMOptions().

◆ newDynHMM() [2/2]

class mootDynHMM* moot::newDynHMM ( const std::string &  which = "Freq",
const mootDynHMMOptions opts = mootDynHMMOptions() 
)

Generic constructor for built-in mootDynHMM subclasses, given subclass name

◆ sentence_printf_append()

mootToken& moot::sentence_printf_append ( mootSentence s,
mootTokenType  typ,
const char *  fmt,
  ... 
)

Sentences are just vectors of mootToken objects Utilitiy method to add a printf()-formatted token at the end of s

◆ moot_setlocale()

void moot::moot_setlocale ( void  )

initialize the current locale from the environment, forcing LC_NUMERIC="C"

◆ moot_lc_ctype()

const char* moot::moot_lc_ctype ( void  )

get current value of LC_CTYPE, or the string "(unavailable)"

◆ moot_lc_numeric()

const char* moot::moot_lc_numeric ( void  )

get current value of LC_NUMERIC, or the string "(unavailable)"

◆ moot_parse_doubles()

bool moot::moot_parse_doubles ( const char *  str,
double *  dbls,
size_t  ndbls 
)

Parse a comma-separated list of doubles (at most 'ndbls') from str into dbls. You should already have allocated space for ndbls doubles in dbls.

◆ moot_normalize_ws() [1/6]

void moot::moot_normalize_ws ( const char *  buf,
size_t  len,
std::string &  out,
bool  trim_left = true,
bool  trim_right = true 
)

Append a whitespace-normalized C buffer to an STL string. All whitespace substrings in s are replaced with a single space in out. out is not cleared.

@param buf source buffer @param len length of source buffer in bytes @param out destination STL string @param trim_left whether to trim all leading whitespace @param trim_right whether to trim all trailing whitespace

Referenced by moot_normalize_ws(), and mootio::micbuffer::to_string().

◆ moot_normalize_ws() [2/6]

void moot::moot_normalize_ws ( const std::string &  in,
std::string &  out,
bool  trim_left = true,
bool  trim_right = true 
)

Append a whitespace-normalized C++ string to another C++ string. All whitespace substrings in in are replaced with a single space in out. out is not cleared.

Parameters
insource string
outdestination string
trim_leftwhether to trim all leading whitespace
trim_rightwhether to trim all trailing whitespace

◆ moot_normalize_ws() [3/6]

void moot::moot_normalize_ws ( const char *  s,
std::string &  out,
bool  trim_left = true,
bool  trim_right = true 
)
inline

Append a whitespace-normalized NUL-terminated C string to an STL string.

Parameters
ssource string
outdestination STL string
trim_leftwhether to trim all leading whitespace
trim_rightwhether to trim all trailing whitespace

References moot_normalize_ws().

◆ moot_normalize_ws() [4/6]

std::string moot::moot_normalize_ws ( const char *  buf,
size_t  len,
bool  trim_left = true,
bool  trim_right = true 
)
inline

Create and return a whitespace-normalized STL string from a C memory buffer.

Parameters
bufsource buffer
lenlength of source buffer, in bytes
trim_leftwhether to trim all leading whitespace
trim_rightwhether to trim all trailing whitespace

References moot_normalize_ws().

◆ moot_normalize_ws() [5/6]

std::string moot::moot_normalize_ws ( const char *  s,
bool  trim_left = true,
bool  trim_right = true 
)
inline

Create and return a whitespace-normalized STL string from a NUL-terminated C string.

Parameters
ssource string
trim_leftwhether to trim all leading whitespace
trim_rightwhether to trim all trailing whitespace

References moot_normalize_ws().

◆ moot_normalize_ws() [6/6]

std::string moot::moot_normalize_ws ( const std::string &  s,
bool  trim_left = true,
bool  trim_right = true 
)
inline

Create and return a whitespace-normalized STL string from a different STL string.

Parameters
ssource string
trim_leftwhether to trim all leading whitespace
trim_rightwhether to trim all trailing whitespace

References moot_normalize_ws(), and moot_remove_newlines().

◆ moot_remove_newlines() [1/3]

void moot::moot_remove_newlines ( char *  buf,
size_t  len 
)

Remove all newlines from a C buffer. Every newline is replaced with a single space.

Parameters
buftarget buffer
lenlength of target buffer in bytes

Referenced by moot_normalize_ws(), and moot_remove_newlines().

◆ moot_remove_newlines() [2/3]

void moot::moot_remove_newlines ( char *  s)
inline

Remove all newlines from a NUL-terminated C string.

References moot_file_exists(), moot_remove_newlines(), moot_strsplit(), moot_strtok(), std_sprintf(), std_ssprintf(), std_vsprintf(), and std_vssprintf().

◆ moot_remove_newlines() [3/3]

void moot::moot_remove_newlines ( std::string &  s)

Remove all newlines from an STL string.

◆ moot_strtok() [1/2]

void moot::moot_strtok ( const std::string &  s,
const std::string &  delim,
std::list< std::string > &  out 
)

Tokenize an STL string to an existing list. Multiple adjacent delimiters are treated as a single delimiter; i.e. no empty strings are returned.

Parameters
ssource string
delimstring of delimiter characters
outdestination string list

Referenced by moot_remove_newlines().

◆ moot_strtok() [2/2]

std::list<std::string> moot::moot_strtok ( const std::string &  s,
const std::string &  delim 
)

Tokenize an STL string to a new list.

Parameters
ssource string
delimstring of delimiter characters

◆ moot_strsplit() [1/2]

void moot::moot_strsplit ( const std::string &  s,
const std::string &  delim,
std::vector< std::string > &  out 
)

Split an STL string to an existing list. All delimiters are significant, i.e. empty output strings are allowed.

Parameters
ssource string
delimstring of delimiter characters
outdestination string list

Referenced by moot_remove_newlines().

◆ moot_strsplit() [2/2]

std::vector<std::string> moot::moot_strsplit ( const std::string &  s,
const std::string &  delim 
)

Tokenize an STL string to a new list.

Parameters
ssource string
delimstring of delimiter characters

◆ std_vsprintf()

int moot::std_vsprintf ( std::string &  s,
const char *  fmt,
va_list &  ap 
)

Stupid wrapper for append+printf() onto C++ strings.

Parameters
ssink string
fmtprintf format
apprintf args

Referenced by moot_remove_newlines().

◆ std_sprintf()

int moot::std_sprintf ( std::string &  s,
const char *  fmt,
  ... 
)

Stupid wrapper for append+printf() onto C++ strings.

Parameters
ssink string
fmtprintf format
apprintf args

Referenced by moot_remove_newlines().

◆ std_vssprintf()

std::string moot::std_vssprintf ( const char *  fmt,
va_list &  ap 
)

Stupid wrapper for printf() returning a C++ string

Parameters
fmtprintf format
apprintf args

Referenced by moot_remove_newlines().

◆ std_ssprintf()

std::string moot::std_ssprintf ( const char *  fmt,
  ... 
)

Stupid wrapper for printf() returning a C++ string

Parameters
fmtprintf format
apprintf args

Referenced by moot_remove_newlines().

◆ moot_file_exists() [1/2]

bool moot::moot_file_exists ( const char *  filename)

Check whether a file exists by trying to open it with 'fopen()'

Referenced by moot_file_exists(), and moot_remove_newlines().

◆ moot_file_exists() [2/2]

bool moot::moot_file_exists ( const std::string &  filename)
inline

Check whether a file exists by trying to open it with 'fopen()', std::string version

References moot_extension(), moot_file_exists(), and moot_unextend().

◆ moot_unextend()

std::string moot::moot_unextend ( const char *  filename)

Get path+basename of a file

Referenced by moot_file_exists().

◆ moot_extension() [1/2]

const char* moot::moot_extension ( const char *  filename,
size_t  pos 
)

Get final extension of a filename (including leading '.'), reading backwards from (filename+pos). Returns a pointer into filename. If no next extension is found, returns NULL.

Referenced by moot_extension(), and moot_file_exists().

◆ moot_extension() [2/2]

const char* moot::moot_extension ( const char *  filename)
inline

Get extension of a filename (including leading '.')

References moot_extension().

◆ moot_banner()

std::string moot::moot_banner ( void  )

Return a banner string for the library

◆ moot_program_banner()

std::string moot::moot_program_banner ( const std::string &  prog_name,
const std::string &  prog_version,
const std::string &  prog_author,
bool  is_free = true 
)

Return a full banner string for a program using the library.

◆ moot_vcarp()

void moot::moot_vcarp ( const char *  fmt,
va_list &  ap 
)

verbose message to stderr, va_list version

◆ moot_carp()

void moot::moot_carp ( const char *  fmt,
  ... 
)

verbose message to stderr

◆ moot_vcroak()

void moot::moot_vcroak ( const char *  fmt,
  ... 
)

verbose message to stderr followed by abort(), va_list version

◆ moot_croak()

void moot::moot_croak ( const char *  fmt,
  ... 
)

verbose message to stderr followed by abort()

◆ moot_vmsg()

void moot::moot_vmsg ( int  curLevel,
int  minLevel,
const char *  fmt,
va_list &  ap 
)

conditional message to stderr (prints only if curLevel>=minLevel), va_list version

Parameters
curLevelcurrent verbosity level
minLevelminimum level for print
fmtprintf format
...printf arguments

◆ moot_msg()

void moot::moot_msg ( int  curLevel,
int  minLevel,
const char *  fmt,
  ... 
)

conditional message to stderr (prints only if curLevel>=minLevel), varargs version

Parameters
curLevelcurrent verbosity level
minLevelminimum level for print
fmtprintf format
...printf arguments

◆ utf8ToLower()

std::string moot::utf8ToLower ( const std::string &  s)

Return a lower-cased version of s ; in- and outputs are UTF-8 encoded byte strings

◆ waste_tag_attr_get()

bool moot::waste_tag_attr_get ( const std::string &  tagstr,
size_t  rpos,
bool  mydefault = false 
)
inline

Get a boolean WASTE tag attribute by position relative to end-of-string

Referenced by moot::wasteDecoder::tag_attr_s(), moot::wasteDecoder::tag_attr_S(), and moot::wasteDecoder::tag_attr_w().

◆ waste_tag_attr_set()

void moot::waste_tag_attr_set ( std::string &  tagstr,
size_t  rpos,
bool  val 
)
inline

Set a boolean WASTE tag attribute position relative to end-of-string

References waste_casetype(), and waste_lexertype().

◆ waste_casetype()

wasteLexerTypeE moot::waste_casetype ( const std::string &  tok_text)

Get waste case-type for tok_text

Referenced by waste_tag_attr_set().

◆ waste_lexertype()

wasteLexerTypeE moot::waste_lexertype ( const std::string &  tok_text)

Get waste kexer-type for tok_text

Referenced by waste_tag_attr_set().

Variable Documentation

◆ builtinTaster

const mootTaster moot::builtinTaster

Default built-in taster

Referenced by moot::mootHMM::unknown_class_name().

◆ mootTokenTypeNames

const char* moot::mootTokenTypeNames[NTokTypes]

Useful for debugging token types

◆ tiofRare

const int moot::tiofRare = tiofText
static

Format alias for 'Cooked Rare' files.

◆ tiofMediumRare

const int moot::tiofMediumRare = tiofText|tiofAnalyzed
static

Format alias for 'Cooked Medium Rare' files.

◆ tiofMedium

const int moot::tiofMedium = tiofText|tiofTagged
static

Format alias for 'Cooked Medium' files.

◆ tiofWellDone

const int moot::tiofWellDone = tiofText|tiofAnalyzed|tiofTagged
static

Format alias for 'Cooked Well Done' files.

◆ ls_init

const int moot::ls_init = (ls_wb_fw | ls_sb_fw | ls_blanked)
static

initial state of the lexer

◆ ls_head_hyph

const int moot::ls_head_hyph = ( ls_head | ls_hyph )
static

lexer has seen some word followed by a hyphen

◆ ls_head_hyph_nl

const int moot::ls_head_hyph_nl = ( ls_head_hyph | ls_nl )
static

lexer has seen some word followed by a hyphen and a newline

◆ wasteScannerTypeNames

const char* moot::wasteScannerTypeNames[NwScanTypes]

Symbolic names for wasteScannerTypeE, useful for debugging

◆ wasteLexerTypeNames

const char* moot::wasteLexerTypeNames[NwLexerTypes]

Useful for debugging old dwdsScanner types