; Stemming rules for Paice/Husk stemmer ; Developed by A. Zamora - 7/30/2003 ; Format is: keystr,repstr,flag\t ; keystr is ending to be matched. repstr is the replacement string. ; Question mark (?) is used to represent a null repstr. ; flag is one of:"protect","intact","continue","protint","contint", or "stop" ; without the quotes and followed by a tab (\t). Comments may follow the tab. ; Rules are in alphabetical order by last letter and in the sequence in which ; they should be applied. ; The rule format is intended for the upgraded version that allows comments, ; blank lines, and longer suffixes. ; === A === ea,?,stop ; sia,?,stop ; ia,?,stop ; ata,?,stop ; a,?,stop ; latin plural ending ; === C === atic,a10,continue ; osic,?,stop ; antic,an,stop ; istic,?,stop ; ytic,y,stop ; metric,met,stop ; ic,?,stop ; ; === D === oid,?,stop ; aged,ag12,continue ; ated,a10,continue ; ied,?,stop ; ised,?,stop ; ized,?,stop ; eed,e,stop ; ed,14,continue ; ihood,?,stop ; hood,?,stop ; ; === E === ae,?,stop ; ance,14,continue ; ence,14,continue ; icide,?,stop ; ide,?,stop ; ee,ee,stop ; age,ag12,continue ; iable,?,stop ; isable,?,stop ; izable,?,stop ; able,?,stop ; ible,?,stop ; tile,t,stop ; erine,er,stop ; tre,t,stop ; metre ure,?,stop ; ise,?,stop ; euse,?,stop ; yse,y,stop ; yte,?,stop ; icate,?,stop ; ate,a10,continue ; ette,?,stop ; esque,?,stop ; ative,a10,continue ; tive,?,stop ; ive,?,stop ; ize,?,stop ; yze,y,stop ; e,?,stop ; ; === G === aging,ag12,continue ; ising,?,stop ; ating,a10,continue ; ying,?,stop ; izing,?,stop ; ing,14,continue ; ; === H === ish,?,stop ; ; === I === i,?,stop ; latin plural ending ; === L === atical,a10,continue ; istical,?,stop ; metrical,met,stop ; ical,?,stop ; orial,?,stop ; ial,?,stop ; ssional,ss,stop ; sional,?,stop ; ational,a10,continue ; tional,?,stop ; tral,t,stop ; ttal,t,stop ; al,?,stop ; adjectival ending iful,?,stop ; ssful,ss,stop ; sful,?,stop ; ful,?,stop ; ; === M === icism,?,stop ; oidism,?,stop ; tionism,?,stop ; ionism,?,stop ; ism,?,stop ; ium,?,stop ; um,?,stop ; latin singular ending ; === N === ean,?,stop ; adjectival ending ian,?,stop ; adjectival ending sman,?,stop ; smen,?,stop ; man,m,stop ; men,m,stop ; dden,d,stop ; tten,t,stop ; een,een,stop ; protect en,?,stop ; verbalization of noun or adjective ssion,ss,stop ; sion,?,stop ; atisation,a,stop ; atization,a,stop ; ication,?,stop ; isation,?,stop ; ization,?,stop ; xion,?,stop ; ation,a10,continue ; tion,?,stop ; ion,?,stop ; ern,?,stop ; adjectival form ; === P === ship,?,continue ;...(ship)->() ; relationship ; === R === ar,?,stop ; adjectival ending eer,eer,stop ; lier,?,stop ; ier,14,continue ; comparative ending for noun ending in "y" ener,?,stop ; en+er ending iser,?,stop ; izer,?,stop ; yzer,y,stop ; er,14,continue ; "one who ..." comparative ending ator,a10,continue ; or,14,continue ; "one who ..." eur,?,stop ; our,o,stop ; ; === S === sias,?,stop ; ias,?,stop ; as,?,stop ; atics,a10,continue ; ics,?,stop ; hoods,?,stop ; ices,i,stop ; ences,14,continue ; ances,a10,continue ; icides,?,stop ; itides,?,stop ; plural for some "itis" forms ides,?,stop ; ees,ee,stop ; ages,ag12,continue ; ancies,a10,continue ; ories,?,stop ; metries,met,stop ; sies,?,stop ; iabilities,?,stop ; abilities,?,stop ; icities,?,stop ; arities,?,stop ; ities,?,stop ; ies,?,stop ; plural of "y" ables,?,stop ; oses,?,stop ; ises,?,stop ; sses,ss,stop ; euses,?,stop ; uses,?,stop ; ses,?,stop ; ates,a10,continue ; ytes,?,stop ; izes,?,stop ; xes,?,stop ; es,?,stop ; ings,14,continue ; osis,?,stop ; iasis,i,stop ; sis,?,stop ; itis,?,stop ; is,?,stop ; ttals,t,stop ; als,?,stop ; oidisms,?,stop ; isms,?,stop ; noun ending iums,?,stop ; ums,?,stop ; eans,?,stop ; ians,?,stop ; adjectival ending ssions,ss,stop ; sions,?,stop ; atisations,a,stop ; atizations,a,stop ; ications,?,stop ; isations,?,stop ; izations,?,stop ; ations,a10,continue ; tions,?,stop ; xions,?,stop ; ions,i,continue ; ships,?,continue ; relationship eers,eer,stop ; iers,?,stop ; isers,?,stop ; izers,?,stop ; ers,14,continue ; "one who ..." , comparative ending ators,a10,continue ; ors,?,stop ; "one who ..." eurs,?,stop ; ists,?,stop ; one who studies ... iless,?,stop ; less,?,continue ; liness,?,stop ; iness,?,stop ; ness,?,continue ; ress,r,stop ; ss,ss,stop ; protect ments,?,continue ; tionists,?,stop ; tants,?,stop ; ants,a10,continue ; ients,?,stop ; ents,14,continue ; ious,?,stop ; adj ending ous,?,stop ; us,?,stop ; latin singular ending s,?,stop ; plural ending, 3rd person verb ending ; === T === tant,?,stop ; iant,?,stop ; ant,a10,continue ; ment,?,continue ; ient,?,stop ; ent,14,continue ; liest,?,stop ; iest,?,stop ; superlative for noun ending in "y" est,14,continue ; superlative tionist,?,stop ; ionist,?,stop ; yist,?,stop ; ist,?,stop ; one who studies ... yst,y,stop ; ; === X === atrix,a10,continue ; x,?,stop ; ; === Y === acy,a10,continue ; iancy,?,stop ; ancy,a10,continue ; ency,14,continue ; ncy,?,stop ; cy,?,stop ; idy,?,stop ; ify,?,stop ; logy,log,stop ; ; start *ly rules egaly,egal,stop ; megaly phaly,phal,stop ; cephaly omaly,omal,stop ; anomaly iably,?,stop ; ably,?,stop ; ibly,?,stop ; embly,embl,stop ; assembly ately,a10,continue ; atively,a10,continue ; ively,?,stop ; ely,?,stop ; phily,phil,stop ; family,famil,stop ; ily,?,stop ; reply,repl,stop ; tiply,tipl,stop ; limply,limp,stop ; imply,impl,stop ; comply,compl,stop ; pply,ppl,stop ; tyly,tyl,stop ; dactyly ly,?,continue ; ; end of *ly rules tionary,?,stop ; iary,?,stop ; ary,?,stop ; atory,a10,continue ; ory,?,stop ; istry,ist,stop ; try,t,stop ; sy,?,stop ; ocity,?,stop ; icity,?,stop ; iability,?,stop ; ability,?,stop ; ibility,?,stop ; ality,?,stop ; arity,?,stop ; iability,?,stop ; ity,?,stop ; exy,e,stop ; y,14,continue ; ; === # === ; By convention, 2-digit special markers may terminate a string. ; a10 is a special marker for "ate", "ation", "ant", etc. ; Determine whether the "a" of the suffix is part of the stem or not. nda10,nd,stop ; yla10,yl,stop ; gma10,gm,stop ; rba10,rb,stop ; rda10,rd,stop ; rma10,rm,stop ; bra10,br,stop ; era10,er,stop ; ora10,or,stop ; stra10,str,stop ; ura10,ur,stop ; ua10,u,stop ; na10,n,stop ; isa10,is,stop ; rsa10,rs,stop ; ssa10,ss,stop ; ta10,t,stop ; forestation -> forest ava10,av,stop ; iva10,iv,stop ; xa10,x,stop ; a10,a,stop ; the "a" is part of the stem ; ag12 marker resolves "age", "ages" endings dag12,d,stop ; eag12,e,stop ; kag12,k,stop ; ggag12,g,stop ; bag(gage) -> (bag) blag12,bl,stop ; onag12,on,stop ; ppag12,p,stop ; tag12,t,stop ; wag12,w,stop ; ag12,ag,stop ; default: (age) -> (ag) ; 14 marker is used to undouble some doubled letters bb14,b,stop ; robber -> rob dd14,d,stop ; ff14,f,stop ; gg14,g,stop ; mm14,m,stop ; nn14,n,stop ; pp14,p,stop ; rr14,r,stop ; tt14,t,stop ; 14,?,stop ; default -- just strip the marker ; === END ===