pyparsing.py 220 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240524152425243524452455246524752485249525052515252525352545255525652575258525952605261526252635264526552665267526852695270527152725273527452755276527752785279528052815282528352845285528652875288528952905291529252935294529552965297529852995300530153025303530453055306530753085309531053115312531353145315531653175318531953205321532253235324532553265327532853295330533153325333533453355336533753385339534053415342534353445345534653475348534953505351535253535354535553565357535853595360536153625363536453655366536753685369537053715372537353745375537653775378537953805381538253835384538553865387538853895390539153925393539453955396539753985399540054015402540354045405540654075408540954105411541254135414541554165417541854195420542154225423542454255426542754285429543054315432543354345435543654375438543954405441544254435444544554465447544854495450545154525453545454555456545754585459546054615462546354645465546654675468546954705471547254735474547554765477547854795480548154825483548454855486548754885489549054915492549354945495549654975498549955005501550255035504550555065507550855095510551155125513551455155516551755185519552055215522552355245525552655275528552955305531553255335534553555365537553855395540554155425543554455455546554755485549555055515552555355545555555655575558555955605561556255635564556555665567556855695570557155725573557455755576557755785579558055815582558355845585558655875588558955905591559255935594559555965597559855995600560156025603560456055606560756085609561056115612561356145615561656175618561956205621562256235624562556265627562856295630563156325633563456355636563756385639564056415642564356445645564656475648564956505651565256535654565556565657565856595660566156625663566456655666566756685669567056715672567356745675567656775678567956805681568256835684568556865687568856895690569156925693569456955696569756985699570057015702570357045705570657075708570957105711571257135714571557165717571857195720
  1. # module pyparsing.py
  2. #
  3. # Copyright (c) 2003-2016 Paul T. McGuire
  4. #
  5. # Permission is hereby granted, free of charge, to any person obtaining
  6. # a copy of this software and associated documentation files (the
  7. # "Software"), to deal in the Software without restriction, including
  8. # without limitation the rights to use, copy, modify, merge, publish,
  9. # distribute, sublicense, and/or sell copies of the Software, and to
  10. # permit persons to whom the Software is furnished to do so, subject to
  11. # the following conditions:
  12. #
  13. # The above copyright notice and this permission notice shall be
  14. # included in all copies or substantial portions of the Software.
  15. #
  16. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  17. # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  18. # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  19. # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
  20. # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  21. # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  22. # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23. #
  24. __doc__ = \
  25. """
  26. pyparsing module - Classes and methods to define and execute parsing grammars
  27. The pyparsing module is an alternative approach to creating and executing simple grammars,
  28. vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you
  29. don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
  30. provides a library of classes that you use to construct the grammar directly in Python.
  31. Here is a program to parse "Hello, World!" (or any greeting of the form
  32. C{"<salutation>, <addressee>!"}), built up using L{Word}, L{Literal}, and L{And} elements
  33. (L{'+'<ParserElement.__add__>} operator gives L{And} expressions, strings are auto-converted to
  34. L{Literal} expressions)::
  35. from pip._vendor.pyparsing import Word, alphas
  36. # define grammar of a greeting
  37. greet = Word(alphas) + "," + Word(alphas) + "!"
  38. hello = "Hello, World!"
  39. print (hello, "->", greet.parseString(hello))
  40. The program outputs the following::
  41. Hello, World! -> ['Hello', ',', 'World', '!']
  42. The Python representation of the grammar is quite readable, owing to the self-explanatory
  43. class names, and the use of '+', '|' and '^' operators.
  44. The L{ParseResults} object returned from L{ParserElement.parseString<ParserElement.parseString>} can be accessed as a nested list, a dictionary, or an
  45. object with named attributes.
  46. The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
  47. - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.)
  48. - quoted strings
  49. - embedded comments
  50. """
  51. __version__ = "2.2.0"
  52. __versionTime__ = "06 Mar 2017 02:06 UTC"
  53. __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
  54. import string
  55. from weakref import ref as wkref
  56. import copy
  57. import sys
  58. import warnings
  59. import re
  60. import sre_constants
  61. import collections
  62. import pprint
  63. import traceback
  64. import types
  65. from datetime import datetime
  66. try:
  67. from _thread import RLock
  68. except ImportError:
  69. from threading import RLock
  70. try:
  71. from collections import OrderedDict as _OrderedDict
  72. except ImportError:
  73. try:
  74. from ordereddict import OrderedDict as _OrderedDict
  75. except ImportError:
  76. _OrderedDict = None
  77. #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
  78. __all__ = [
  79. 'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',
  80. 'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',
  81. 'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',
  82. 'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',
  83. 'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',
  84. 'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter',
  85. 'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore',
  86. 'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',
  87. 'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',
  88. 'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums',
  89. 'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno',
  90. 'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
  91. 'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
  92. 'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
  93. 'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
  94. 'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
  95. 'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 'withClass',
  96. 'CloseMatch', 'tokenMap', 'pyparsing_common',
  97. ]
  98. system_version = tuple(sys.version_info)[:3]
  99. PY_3 = system_version[0] == 3
  100. if PY_3:
  101. _MAX_INT = sys.maxsize
  102. basestring = str
  103. unichr = chr
  104. _ustr = str
  105. # build list of single arg builtins, that can be used as parse actions
  106. singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max]
  107. else:
  108. _MAX_INT = sys.maxint
  109. range = xrange
  110. def _ustr(obj):
  111. """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
  112. str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
  113. then < returns the unicode object | encodes it with the default encoding | ... >.
  114. """
  115. if isinstance(obj,unicode):
  116. return obj
  117. try:
  118. # If this works, then _ustr(obj) has the same behaviour as str(obj), so
  119. # it won't break any existing code.
  120. return str(obj)
  121. except UnicodeEncodeError:
  122. # Else encode it
  123. ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace')
  124. xmlcharref = Regex(r'&#\d+;')
  125. xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:])
  126. return xmlcharref.transformString(ret)
  127. # build list of single arg builtins, tolerant of Python version, that can be used as parse actions
  128. singleArgBuiltins = []
  129. import __builtin__
  130. for fname in "sum len sorted reversed list tuple set any all min max".split():
  131. try:
  132. singleArgBuiltins.append(getattr(__builtin__,fname))
  133. except AttributeError:
  134. continue
  135. _generatorType = type((y for y in range(1)))
  136. def _xml_escape(data):
  137. """Escape &, <, >, ", ', etc. in a string of data."""
  138. # ampersand must be replaced first
  139. from_symbols = '&><"\''
  140. to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split())
  141. for from_,to_ in zip(from_symbols, to_symbols):
  142. data = data.replace(from_, to_)
  143. return data
  144. class _Constants(object):
  145. pass
  146. alphas = string.ascii_uppercase + string.ascii_lowercase
  147. nums = "0123456789"
  148. hexnums = nums + "ABCDEFabcdef"
  149. alphanums = alphas + nums
  150. _bslash = chr(92)
  151. printables = "".join(c for c in string.printable if c not in string.whitespace)
  152. class ParseBaseException(Exception):
  153. """base exception class for all parsing runtime exceptions"""
  154. # Performance tuning: we construct a *lot* of these, so keep this
  155. # constructor as small and fast as possible
  156. def __init__( self, pstr, loc=0, msg=None, elem=None ):
  157. self.loc = loc
  158. if msg is None:
  159. self.msg = pstr
  160. self.pstr = ""
  161. else:
  162. self.msg = msg
  163. self.pstr = pstr
  164. self.parserElement = elem
  165. self.args = (pstr, loc, msg)
  166. @classmethod
  167. def _from_exception(cls, pe):
  168. """
  169. internal factory method to simplify creating one type of ParseException
  170. from another - avoids having __init__ signature conflicts among subclasses
  171. """
  172. return cls(pe.pstr, pe.loc, pe.msg, pe.parserElement)
  173. def __getattr__( self, aname ):
  174. """supported attributes by name are:
  175. - lineno - returns the line number of the exception text
  176. - col - returns the column number of the exception text
  177. - line - returns the line containing the exception text
  178. """
  179. if( aname == "lineno" ):
  180. return lineno( self.loc, self.pstr )
  181. elif( aname in ("col", "column") ):
  182. return col( self.loc, self.pstr )
  183. elif( aname == "line" ):
  184. return line( self.loc, self.pstr )
  185. else:
  186. raise AttributeError(aname)
  187. def __str__( self ):
  188. return "%s (at char %d), (line:%d, col:%d)" % \
  189. ( self.msg, self.loc, self.lineno, self.column )
  190. def __repr__( self ):
  191. return _ustr(self)
  192. def markInputline( self, markerString = ">!<" ):
  193. """Extracts the exception line from the input string, and marks
  194. the location of the exception with a special symbol.
  195. """
  196. line_str = self.line
  197. line_column = self.column - 1
  198. if markerString:
  199. line_str = "".join((line_str[:line_column],
  200. markerString, line_str[line_column:]))
  201. return line_str.strip()
  202. def __dir__(self):
  203. return "lineno col line".split() + dir(type(self))
  204. class ParseException(ParseBaseException):
  205. """
  206. Exception thrown when parse expressions don't match class;
  207. supported attributes by name are:
  208. - lineno - returns the line number of the exception text
  209. - col - returns the column number of the exception text
  210. - line - returns the line containing the exception text
  211. Example::
  212. try:
  213. Word(nums).setName("integer").parseString("ABC")
  214. except ParseException as pe:
  215. print(pe)
  216. print("column: {}".format(pe.col))
  217. prints::
  218. Expected integer (at char 0), (line:1, col:1)
  219. column: 1
  220. """
  221. pass
  222. class ParseFatalException(ParseBaseException):
  223. """user-throwable exception thrown when inconsistent parse content
  224. is found; stops all parsing immediately"""
  225. pass
  226. class ParseSyntaxException(ParseFatalException):
  227. """just like L{ParseFatalException}, but thrown internally when an
  228. L{ErrorStop<And._ErrorStop>} ('-' operator) indicates that parsing is to stop
  229. immediately because an unbacktrackable syntax error has been found"""
  230. pass
  231. #~ class ReparseException(ParseBaseException):
  232. #~ """Experimental class - parse actions can raise this exception to cause
  233. #~ pyparsing to reparse the input string:
  234. #~ - with a modified input string, and/or
  235. #~ - with a modified start location
  236. #~ Set the values of the ReparseException in the constructor, and raise the
  237. #~ exception in a parse action to cause pyparsing to use the new string/location.
  238. #~ Setting the values as None causes no change to be made.
  239. #~ """
  240. #~ def __init_( self, newstring, restartLoc ):
  241. #~ self.newParseText = newstring
  242. #~ self.reparseLoc = restartLoc
  243. class RecursiveGrammarException(Exception):
  244. """exception thrown by L{ParserElement.validate} if the grammar could be improperly recursive"""
  245. def __init__( self, parseElementList ):
  246. self.parseElementTrace = parseElementList
  247. def __str__( self ):
  248. return "RecursiveGrammarException: %s" % self.parseElementTrace
  249. class _ParseResultsWithOffset(object):
  250. def __init__(self,p1,p2):
  251. self.tup = (p1,p2)
  252. def __getitem__(self,i):
  253. return self.tup[i]
  254. def __repr__(self):
  255. return repr(self.tup[0])
  256. def setOffset(self,i):
  257. self.tup = (self.tup[0],i)
  258. class ParseResults(object):
  259. """
  260. Structured parse results, to provide multiple means of access to the parsed data:
  261. - as a list (C{len(results)})
  262. - by list index (C{results[0], results[1]}, etc.)
  263. - by attribute (C{results.<resultsName>} - see L{ParserElement.setResultsName})
  264. Example::
  265. integer = Word(nums)
  266. date_str = (integer.setResultsName("year") + '/'
  267. + integer.setResultsName("month") + '/'
  268. + integer.setResultsName("day"))
  269. # equivalent form:
  270. # date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
  271. # parseString returns a ParseResults object
  272. result = date_str.parseString("1999/12/31")
  273. def test(s, fn=repr):
  274. print("%s -> %s" % (s, fn(eval(s))))
  275. test("list(result)")
  276. test("result[0]")
  277. test("result['month']")
  278. test("result.day")
  279. test("'month' in result")
  280. test("'minutes' in result")
  281. test("result.dump()", str)
  282. prints::
  283. list(result) -> ['1999', '/', '12', '/', '31']
  284. result[0] -> '1999'
  285. result['month'] -> '12'
  286. result.day -> '31'
  287. 'month' in result -> True
  288. 'minutes' in result -> False
  289. result.dump() -> ['1999', '/', '12', '/', '31']
  290. - day: 31
  291. - month: 12
  292. - year: 1999
  293. """
  294. def __new__(cls, toklist=None, name=None, asList=True, modal=True ):
  295. if isinstance(toklist, cls):
  296. return toklist
  297. retobj = object.__new__(cls)
  298. retobj.__doinit = True
  299. return retobj
  300. # Performance tuning: we construct a *lot* of these, so keep this
  301. # constructor as small and fast as possible
  302. def __init__( self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance ):
  303. if self.__doinit:
  304. self.__doinit = False
  305. self.__name = None
  306. self.__parent = None
  307. self.__accumNames = {}
  308. self.__asList = asList
  309. self.__modal = modal
  310. if toklist is None:
  311. toklist = []
  312. if isinstance(toklist, list):
  313. self.__toklist = toklist[:]
  314. elif isinstance(toklist, _generatorType):
  315. self.__toklist = list(toklist)
  316. else:
  317. self.__toklist = [toklist]
  318. self.__tokdict = dict()
  319. if name is not None and name:
  320. if not modal:
  321. self.__accumNames[name] = 0
  322. if isinstance(name,int):
  323. name = _ustr(name) # will always return a str, but use _ustr for consistency
  324. self.__name = name
  325. if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None,'',[])):
  326. if isinstance(toklist,basestring):
  327. toklist = [ toklist ]
  328. if asList:
  329. if isinstance(toklist,ParseResults):
  330. self[name] = _ParseResultsWithOffset(toklist.copy(),0)
  331. else:
  332. self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0)
  333. self[name].__name = name
  334. else:
  335. try:
  336. self[name] = toklist[0]
  337. except (KeyError,TypeError,IndexError):
  338. self[name] = toklist
  339. def __getitem__( self, i ):
  340. if isinstance( i, (int,slice) ):
  341. return self.__toklist[i]
  342. else:
  343. if i not in self.__accumNames:
  344. return self.__tokdict[i][-1][0]
  345. else:
  346. return ParseResults([ v[0] for v in self.__tokdict[i] ])
  347. def __setitem__( self, k, v, isinstance=isinstance ):
  348. if isinstance(v,_ParseResultsWithOffset):
  349. self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]
  350. sub = v[0]
  351. elif isinstance(k,(int,slice)):
  352. self.__toklist[k] = v
  353. sub = v
  354. else:
  355. self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)]
  356. sub = v
  357. if isinstance(sub,ParseResults):
  358. sub.__parent = wkref(self)
  359. def __delitem__( self, i ):
  360. if isinstance(i,(int,slice)):
  361. mylen = len( self.__toklist )
  362. del self.__toklist[i]
  363. # convert int to slice
  364. if isinstance(i, int):
  365. if i < 0:
  366. i += mylen
  367. i = slice(i, i+1)
  368. # get removed indices
  369. removed = list(range(*i.indices(mylen)))
  370. removed.reverse()
  371. # fixup indices in token dictionary
  372. for name,occurrences in self.__tokdict.items():
  373. for j in removed:
  374. for k, (value, position) in enumerate(occurrences):
  375. occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))
  376. else:
  377. del self.__tokdict[i]
  378. def __contains__( self, k ):
  379. return k in self.__tokdict
  380. def __len__( self ): return len( self.__toklist )
  381. def __bool__(self): return ( not not self.__toklist )
  382. __nonzero__ = __bool__
  383. def __iter__( self ): return iter( self.__toklist )
  384. def __reversed__( self ): return iter( self.__toklist[::-1] )
  385. def _iterkeys( self ):
  386. if hasattr(self.__tokdict, "iterkeys"):
  387. return self.__tokdict.iterkeys()
  388. else:
  389. return iter(self.__tokdict)
  390. def _itervalues( self ):
  391. return (self[k] for k in self._iterkeys())
  392. def _iteritems( self ):
  393. return ((k, self[k]) for k in self._iterkeys())
  394. if PY_3:
  395. keys = _iterkeys
  396. """Returns an iterator of all named result keys (Python 3.x only)."""
  397. values = _itervalues
  398. """Returns an iterator of all named result values (Python 3.x only)."""
  399. items = _iteritems
  400. """Returns an iterator of all named result key-value tuples (Python 3.x only)."""
  401. else:
  402. iterkeys = _iterkeys
  403. """Returns an iterator of all named result keys (Python 2.x only)."""
  404. itervalues = _itervalues
  405. """Returns an iterator of all named result values (Python 2.x only)."""
  406. iteritems = _iteritems
  407. """Returns an iterator of all named result key-value tuples (Python 2.x only)."""
  408. def keys( self ):
  409. """Returns all named result keys (as a list in Python 2.x, as an iterator in Python 3.x)."""
  410. return list(self.iterkeys())
  411. def values( self ):
  412. """Returns all named result values (as a list in Python 2.x, as an iterator in Python 3.x)."""
  413. return list(self.itervalues())
  414. def items( self ):
  415. """Returns all named result key-values (as a list of tuples in Python 2.x, as an iterator in Python 3.x)."""
  416. return list(self.iteritems())
  417. def haskeys( self ):
  418. """Since keys() returns an iterator, this method is helpful in bypassing
  419. code that looks for the existence of any defined results names."""
  420. return bool(self.__tokdict)
  421. def pop( self, *args, **kwargs):
  422. """
  423. Removes and returns item at specified index (default=C{last}).
  424. Supports both C{list} and C{dict} semantics for C{pop()}. If passed no
  425. argument or an integer argument, it will use C{list} semantics
  426. and pop tokens from the list of parsed tokens. If passed a
  427. non-integer argument (most likely a string), it will use C{dict}
  428. semantics and pop the corresponding value from any defined
  429. results names. A second default return value argument is
  430. supported, just as in C{dict.pop()}.
  431. Example::
  432. def remove_first(tokens):
  433. tokens.pop(0)
  434. print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
  435. print(OneOrMore(Word(nums)).addParseAction(remove_first).parseString("0 123 321")) # -> ['123', '321']
  436. label = Word(alphas)
  437. patt = label("LABEL") + OneOrMore(Word(nums))
  438. print(patt.parseString("AAB 123 321").dump())
  439. # Use pop() in a parse action to remove named result (note that corresponding value is not
  440. # removed from list form of results)
  441. def remove_LABEL(tokens):
  442. tokens.pop("LABEL")
  443. return tokens
  444. patt.addParseAction(remove_LABEL)
  445. print(patt.parseString("AAB 123 321").dump())
  446. prints::
  447. ['AAB', '123', '321']
  448. - LABEL: AAB
  449. ['AAB', '123', '321']
  450. """
  451. if not args:
  452. args = [-1]
  453. for k,v in kwargs.items():
  454. if k == 'default':
  455. args = (args[0], v)
  456. else:
  457. raise TypeError("pop() got an unexpected keyword argument '%s'" % k)
  458. if (isinstance(args[0], int) or
  459. len(args) == 1 or
  460. args[0] in self):
  461. index = args[0]
  462. ret = self[index]
  463. del self[index]
  464. return ret
  465. else:
  466. defaultvalue = args[1]
  467. return defaultvalue
  468. def get(self, key, defaultValue=None):
  469. """
  470. Returns named result matching the given key, or if there is no
  471. such name, then returns the given C{defaultValue} or C{None} if no
  472. C{defaultValue} is specified.
  473. Similar to C{dict.get()}.
  474. Example::
  475. integer = Word(nums)
  476. date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
  477. result = date_str.parseString("1999/12/31")
  478. print(result.get("year")) # -> '1999'
  479. print(result.get("hour", "not specified")) # -> 'not specified'
  480. print(result.get("hour")) # -> None
  481. """
  482. if key in self:
  483. return self[key]
  484. else:
  485. return defaultValue
  486. def insert( self, index, insStr ):
  487. """
  488. Inserts new element at location index in the list of parsed tokens.
  489. Similar to C{list.insert()}.
  490. Example::
  491. print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
  492. # use a parse action to insert the parse location in the front of the parsed results
  493. def insert_locn(locn, tokens):
  494. tokens.insert(0, locn)
  495. print(OneOrMore(Word(nums)).addParseAction(insert_locn).parseString("0 123 321")) # -> [0, '0', '123', '321']
  496. """
  497. self.__toklist.insert(index, insStr)
  498. # fixup indices in token dictionary
  499. for name,occurrences in self.__tokdict.items():
  500. for k, (value, position) in enumerate(occurrences):
  501. occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
  502. def append( self, item ):
  503. """
  504. Add single element to end of ParseResults list of elements.
  505. Example::
  506. print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
  507. # use a parse action to compute the sum of the parsed integers, and add it to the end
  508. def append_sum(tokens):
  509. tokens.append(sum(map(int, tokens)))
  510. print(OneOrMore(Word(nums)).addParseAction(append_sum).parseString("0 123 321")) # -> ['0', '123', '321', 444]
  511. """
  512. self.__toklist.append(item)
  513. def extend( self, itemseq ):
  514. """
  515. Add sequence of elements to end of ParseResults list of elements.
  516. Example::
  517. patt = OneOrMore(Word(alphas))
  518. # use a parse action to append the reverse of the matched strings, to make a palindrome
  519. def make_palindrome(tokens):
  520. tokens.extend(reversed([t[::-1] for t in tokens]))
  521. return ''.join(tokens)
  522. print(patt.addParseAction(make_palindrome).parseString("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl'
  523. """
  524. if isinstance(itemseq, ParseResults):
  525. self += itemseq
  526. else:
  527. self.__toklist.extend(itemseq)
  528. def clear( self ):
  529. """
  530. Clear all elements and results names.
  531. """
  532. del self.__toklist[:]
  533. self.__tokdict.clear()
  534. def __getattr__( self, name ):
  535. try:
  536. return self[name]
  537. except KeyError:
  538. return ""
  539. if name in self.__tokdict:
  540. if name not in self.__accumNames:
  541. return self.__tokdict[name][-1][0]
  542. else:
  543. return ParseResults([ v[0] for v in self.__tokdict[name] ])
  544. else:
  545. return ""
  546. def __add__( self, other ):
  547. ret = self.copy()
  548. ret += other
  549. return ret
  550. def __iadd__( self, other ):
  551. if other.__tokdict:
  552. offset = len(self.__toklist)
  553. addoffset = lambda a: offset if a<0 else a+offset
  554. otheritems = other.__tokdict.items()
  555. otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) )
  556. for (k,vlist) in otheritems for v in vlist]
  557. for k,v in otherdictitems:
  558. self[k] = v
  559. if isinstance(v[0],ParseResults):
  560. v[0].__parent = wkref(self)
  561. self.__toklist += other.__toklist
  562. self.__accumNames.update( other.__accumNames )
  563. return self
  564. def __radd__(self, other):
  565. if isinstance(other,int) and other == 0:
  566. # useful for merging many ParseResults using sum() builtin
  567. return self.copy()
  568. else:
  569. # this may raise a TypeError - so be it
  570. return other + self
  571. def __repr__( self ):
  572. return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
  573. def __str__( self ):
  574. return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']'
  575. def _asStringList( self, sep='' ):
  576. out = []
  577. for item in self.__toklist:
  578. if out and sep:
  579. out.append(sep)
  580. if isinstance( item, ParseResults ):
  581. out += item._asStringList()
  582. else:
  583. out.append( _ustr(item) )
  584. return out
  585. def asList( self ):
  586. """
  587. Returns the parse results as a nested list of matching tokens, all converted to strings.
  588. Example::
  589. patt = OneOrMore(Word(alphas))
  590. result = patt.parseString("sldkj lsdkj sldkj")
  591. # even though the result prints in string-like form, it is actually a pyparsing ParseResults
  592. print(type(result), result) # -> <class 'pyparsing.ParseResults'> ['sldkj', 'lsdkj', 'sldkj']
  593. # Use asList() to create an actual list
  594. result_list = result.asList()
  595. print(type(result_list), result_list) # -> <class 'list'> ['sldkj', 'lsdkj', 'sldkj']
  596. """
  597. return [res.asList() if isinstance(res,ParseResults) else res for res in self.__toklist]
  598. def asDict( self ):
  599. """
  600. Returns the named parse results as a nested dictionary.
  601. Example::
  602. integer = Word(nums)
  603. date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
  604. result = date_str.parseString('12/31/1999')
  605. print(type(result), repr(result)) # -> <class 'pyparsing.ParseResults'> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]})
  606. result_dict = result.asDict()
  607. print(type(result_dict), repr(result_dict)) # -> <class 'dict'> {'day': '1999', 'year': '12', 'month': '31'}
  608. # even though a ParseResults supports dict-like access, sometime you just need to have a dict
  609. import json
  610. print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable
  611. print(json.dumps(result.asDict())) # -> {"month": "31", "day": "1999", "year": "12"}
  612. """
  613. if PY_3:
  614. item_fn = self.items
  615. else:
  616. item_fn = self.iteritems
  617. def toItem(obj):
  618. if isinstance(obj, ParseResults):
  619. if obj.haskeys():
  620. return obj.asDict()
  621. else:
  622. return [toItem(v) for v in obj]
  623. else:
  624. return obj
  625. return dict((k,toItem(v)) for k,v in item_fn())
  626. def copy( self ):
  627. """
  628. Returns a new copy of a C{ParseResults} object.
  629. """
  630. ret = ParseResults( self.__toklist )
  631. ret.__tokdict = self.__tokdict.copy()
  632. ret.__parent = self.__parent
  633. ret.__accumNames.update( self.__accumNames )
  634. ret.__name = self.__name
  635. return ret
  636. def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
  637. """
  638. (Deprecated) Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.
  639. """
  640. nl = "\n"
  641. out = []
  642. namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items()
  643. for v in vlist)
  644. nextLevelIndent = indent + " "
  645. # collapse out indents if formatting is not desired
  646. if not formatted:
  647. indent = ""
  648. nextLevelIndent = ""
  649. nl = ""
  650. selfTag = None
  651. if doctag is not None:
  652. selfTag = doctag
  653. else:
  654. if self.__name:
  655. selfTag = self.__name
  656. if not selfTag:
  657. if namedItemsOnly:
  658. return ""
  659. else:
  660. selfTag = "ITEM"
  661. out += [ nl, indent, "<", selfTag, ">" ]
  662. for i,res in enumerate(self.__toklist):
  663. if isinstance(res,ParseResults):
  664. if i in namedItems:
  665. out += [ res.asXML(namedItems[i],
  666. namedItemsOnly and doctag is None,
  667. nextLevelIndent,
  668. formatted)]
  669. else:
  670. out += [ res.asXML(None,
  671. namedItemsOnly and doctag is None,
  672. nextLevelIndent,
  673. formatted)]
  674. else:
  675. # individual token, see if there is a name for it
  676. resTag = None
  677. if i in namedItems:
  678. resTag = namedItems[i]
  679. if not resTag:
  680. if namedItemsOnly:
  681. continue
  682. else:
  683. resTag = "ITEM"
  684. xmlBodyText = _xml_escape(_ustr(res))
  685. out += [ nl, nextLevelIndent, "<", resTag, ">",
  686. xmlBodyText,
  687. "</", resTag, ">" ]
  688. out += [ nl, indent, "</", selfTag, ">" ]
  689. return "".join(out)
  690. def __lookup(self,sub):
  691. for k,vlist in self.__tokdict.items():
  692. for v,loc in vlist:
  693. if sub is v:
  694. return k
  695. return None
  696. def getName(self):
  697. r"""
  698. Returns the results name for this token expression. Useful when several
  699. different expressions might match at a particular location.
  700. Example::
  701. integer = Word(nums)
  702. ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d")
  703. house_number_expr = Suppress('#') + Word(nums, alphanums)
  704. user_data = (Group(house_number_expr)("house_number")
  705. | Group(ssn_expr)("ssn")
  706. | Group(integer)("age"))
  707. user_info = OneOrMore(user_data)
  708. result = user_info.parseString("22 111-22-3333 #221B")
  709. for item in result:
  710. print(item.getName(), ':', item[0])
  711. prints::
  712. age : 22
  713. ssn : 111-22-3333
  714. house_number : 221B
  715. """
  716. if self.__name:
  717. return self.__name
  718. elif self.__parent:
  719. par = self.__parent()
  720. if par:
  721. return par.__lookup(self)
  722. else:
  723. return None
  724. elif (len(self) == 1 and
  725. len(self.__tokdict) == 1 and
  726. next(iter(self.__tokdict.values()))[0][1] in (0,-1)):
  727. return next(iter(self.__tokdict.keys()))
  728. else:
  729. return None
  730. def dump(self, indent='', depth=0, full=True):
  731. """
  732. Diagnostic method for listing out the contents of a C{ParseResults}.
  733. Accepts an optional C{indent} argument so that this string can be embedded
  734. in a nested display of other data.
  735. Example::
  736. integer = Word(nums)
  737. date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
  738. result = date_str.parseString('12/31/1999')
  739. print(result.dump())
  740. prints::
  741. ['12', '/', '31', '/', '1999']
  742. - day: 1999
  743. - month: 31
  744. - year: 12
  745. """
  746. out = []
  747. NL = '\n'
  748. out.append( indent+_ustr(self.asList()) )
  749. if full:
  750. if self.haskeys():
  751. items = sorted((str(k), v) for k,v in self.items())
  752. for k,v in items:
  753. if out:
  754. out.append(NL)
  755. out.append( "%s%s- %s: " % (indent,(' '*depth), k) )
  756. if isinstance(v,ParseResults):
  757. if v:
  758. out.append( v.dump(indent,depth+1) )
  759. else:
  760. out.append(_ustr(v))
  761. else:
  762. out.append(repr(v))
  763. elif any(isinstance(vv,ParseResults) for vv in self):
  764. v = self
  765. for i,vv in enumerate(v):
  766. if isinstance(vv,ParseResults):
  767. out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),vv.dump(indent,depth+1) ))
  768. else:
  769. out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),_ustr(vv)))
  770. return "".join(out)
  771. def pprint(self, *args, **kwargs):
  772. """
  773. Pretty-printer for parsed results as a list, using the C{pprint} module.
  774. Accepts additional positional or keyword args as defined for the
  775. C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint})
  776. Example::
  777. ident = Word(alphas, alphanums)
  778. num = Word(nums)
  779. func = Forward()
  780. term = ident | num | Group('(' + func + ')')
  781. func <<= ident + Group(Optional(delimitedList(term)))
  782. result = func.parseString("fna a,b,(fnb c,d,200),100")
  783. result.pprint(width=40)
  784. prints::
  785. ['fna',
  786. ['a',
  787. 'b',
  788. ['(', 'fnb', ['c', 'd', '200'], ')'],
  789. '100']]
  790. """
  791. pprint.pprint(self.asList(), *args, **kwargs)
  792. # add support for pickle protocol
  793. def __getstate__(self):
  794. return ( self.__toklist,
  795. ( self.__tokdict.copy(),
  796. self.__parent is not None and self.__parent() or None,
  797. self.__accumNames,
  798. self.__name ) )
  799. def __setstate__(self,state):
  800. self.__toklist = state[0]
  801. (self.__tokdict,
  802. par,
  803. inAccumNames,
  804. self.__name) = state[1]
  805. self.__accumNames = {}
  806. self.__accumNames.update(inAccumNames)
  807. if par is not None:
  808. self.__parent = wkref(par)
  809. else:
  810. self.__parent = None
  811. def __getnewargs__(self):
  812. return self.__toklist, self.__name, self.__asList, self.__modal
  813. def __dir__(self):
  814. return (dir(type(self)) + list(self.keys()))
  815. collections.MutableMapping.register(ParseResults)
  816. def col (loc,strg):
  817. """Returns current column within a string, counting newlines as line separators.
  818. The first column is number 1.
  819. Note: the default parsing behavior is to expand tabs in the input string
  820. before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
  821. on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
  822. consistent view of the parsed string, the parse location, and line and column
  823. positions within the parsed string.
  824. """
  825. s = strg
  826. return 1 if 0<loc<len(s) and s[loc-1] == '\n' else loc - s.rfind("\n", 0, loc)
  827. def lineno(loc,strg):
  828. """Returns current line number within a string, counting newlines as line separators.
  829. The first line is number 1.
  830. Note: the default parsing behavior is to expand tabs in the input string
  831. before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
  832. on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
  833. consistent view of the parsed string, the parse location, and line and column
  834. positions within the parsed string.
  835. """
  836. return strg.count("\n",0,loc) + 1
  837. def line( loc, strg ):
  838. """Returns the line of text containing loc within a string, counting newlines as line separators.
  839. """
  840. lastCR = strg.rfind("\n", 0, loc)
  841. nextCR = strg.find("\n", loc)
  842. if nextCR >= 0:
  843. return strg[lastCR+1:nextCR]
  844. else:
  845. return strg[lastCR+1:]
  846. def _defaultStartDebugAction( instring, loc, expr ):
  847. print (("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )))
  848. def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
  849. print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
  850. def _defaultExceptionDebugAction( instring, loc, expr, exc ):
  851. print ("Exception raised:" + _ustr(exc))
  852. def nullDebugAction(*args):
  853. """'Do-nothing' debug action, to suppress debugging output during parsing."""
  854. pass
  855. # Only works on Python 3.x - nonlocal is toxic to Python 2 installs
  856. #~ 'decorator to trim function calls to match the arity of the target'
  857. #~ def _trim_arity(func, maxargs=3):
  858. #~ if func in singleArgBuiltins:
  859. #~ return lambda s,l,t: func(t)
  860. #~ limit = 0
  861. #~ foundArity = False
  862. #~ def wrapper(*args):
  863. #~ nonlocal limit,foundArity
  864. #~ while 1:
  865. #~ try:
  866. #~ ret = func(*args[limit:])
  867. #~ foundArity = True
  868. #~ return ret
  869. #~ except TypeError:
  870. #~ if limit == maxargs or foundArity:
  871. #~ raise
  872. #~ limit += 1
  873. #~ continue
  874. #~ return wrapper
  875. # this version is Python 2.x-3.x cross-compatible
  876. 'decorator to trim function calls to match the arity of the target'
  877. def _trim_arity(func, maxargs=2):
  878. if func in singleArgBuiltins:
  879. return lambda s,l,t: func(t)
  880. limit = [0]
  881. foundArity = [False]
  882. # traceback return data structure changed in Py3.5 - normalize back to plain tuples
  883. if system_version[:2] >= (3,5):
  884. def extract_stack(limit=0):
  885. # special handling for Python 3.5.0 - extra deep call stack by 1
  886. offset = -3 if system_version == (3,5,0) else -2
  887. frame_summary = traceback.extract_stack(limit=-offset+limit-1)[offset]
  888. return [(frame_summary.filename, frame_summary.lineno)]
  889. def extract_tb(tb, limit=0):
  890. frames = traceback.extract_tb(tb, limit=limit)
  891. frame_summary = frames[-1]
  892. return [(frame_summary.filename, frame_summary.lineno)]
  893. else:
  894. extract_stack = traceback.extract_stack
  895. extract_tb = traceback.extract_tb
  896. # synthesize what would be returned by traceback.extract_stack at the call to
  897. # user's parse action 'func', so that we don't incur call penalty at parse time
  898. LINE_DIFF = 6
  899. # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND
  900. # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!!
  901. this_line = extract_stack(limit=2)[-1]
  902. pa_call_line_synth = (this_line[0], this_line[1]+LINE_DIFF)
  903. def wrapper(*args):
  904. while 1:
  905. try:
  906. ret = func(*args[limit[0]:])
  907. foundArity[0] = True
  908. return ret
  909. except TypeError:
  910. # re-raise TypeErrors if they did not come from our arity testing
  911. if foundArity[0]:
  912. raise
  913. else:
  914. try:
  915. tb = sys.exc_info()[-1]
  916. if not extract_tb(tb, limit=2)[-1][:2] == pa_call_line_synth:
  917. raise
  918. finally:
  919. del tb
  920. if limit[0] <= maxargs:
  921. limit[0] += 1
  922. continue
  923. raise
  924. # copy func name to wrapper for sensible debug output
  925. func_name = "<parse action>"
  926. try:
  927. func_name = getattr(func, '__name__',
  928. getattr(func, '__class__').__name__)
  929. except Exception:
  930. func_name = str(func)
  931. wrapper.__name__ = func_name
  932. return wrapper
  933. class ParserElement(object):
  934. """Abstract base level parser element class."""
  935. DEFAULT_WHITE_CHARS = " \n\t\r"
  936. verbose_stacktrace = False
  937. @staticmethod
  938. def setDefaultWhitespaceChars( chars ):
  939. r"""
  940. Overrides the default whitespace chars
  941. Example::
  942. # default whitespace chars are space, <TAB> and newline
  943. OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl']
  944. # change to just treat newline as significant
  945. ParserElement.setDefaultWhitespaceChars(" \t")
  946. OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def']
  947. """
  948. ParserElement.DEFAULT_WHITE_CHARS = chars
  949. @staticmethod
  950. def inlineLiteralsUsing(cls):
  951. """
  952. Set class to be used for inclusion of string literals into a parser.
  953. Example::
  954. # default literal class used is Literal
  955. integer = Word(nums)
  956. date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
  957. date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31']
  958. # change to Suppress
  959. ParserElement.inlineLiteralsUsing(Suppress)
  960. date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
  961. date_str.parseString("1999/12/31") # -> ['1999', '12', '31']
  962. """
  963. ParserElement._literalStringClass = cls
  964. def __init__( self, savelist=False ):
  965. self.parseAction = list()
  966. self.failAction = None
  967. #~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall
  968. self.strRepr = None
  969. self.resultsName = None
  970. self.saveAsList = savelist
  971. self.skipWhitespace = True
  972. self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
  973. self.copyDefaultWhiteChars = True
  974. self.mayReturnEmpty = False # used when checking for left-recursion
  975. self.keepTabs = False
  976. self.ignoreExprs = list()
  977. self.debug = False
  978. self.streamlined = False
  979. self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index
  980. self.errmsg = ""
  981. self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all)
  982. self.debugActions = ( None, None, None ) #custom debug actions
  983. self.re = None
  984. self.callPreparse = True # used to avoid redundant calls to preParse
  985. self.callDuringTry = False
  986. def copy( self ):
  987. """
  988. Make a copy of this C{ParserElement}. Useful for defining different parse actions
  989. for the same parsing pattern, using copies of the original parse element.
  990. Example::
  991. integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
  992. integerK = integer.copy().addParseAction(lambda toks: toks[0]*1024) + Suppress("K")
  993. integerM = integer.copy().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M")
  994. print(OneOrMore(integerK | integerM | integer).parseString("5K 100 640K 256M"))
  995. prints::
  996. [5120, 100, 655360, 268435456]
  997. Equivalent form of C{expr.copy()} is just C{expr()}::
  998. integerM = integer().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M")
  999. """
  1000. cpy = copy.copy( self )
  1001. cpy.parseAction = self.parseAction[:]
  1002. cpy.ignoreExprs = self.ignoreExprs[:]
  1003. if self.copyDefaultWhiteChars:
  1004. cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
  1005. return cpy
  1006. def setName( self, name ):
  1007. """
  1008. Define name for this expression, makes debugging and exception messages clearer.
  1009. Example::
  1010. Word(nums).parseString("ABC") # -> Exception: Expected W:(0123...) (at char 0), (line:1, col:1)
  1011. Word(nums).setName("integer").parseString("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1)
  1012. """
  1013. self.name = name
  1014. self.errmsg = "Expected " + self.name
  1015. if hasattr(self,"exception"):
  1016. self.exception.msg = self.errmsg
  1017. return self
  1018. def setResultsName( self, name, listAllMatches=False ):
  1019. """
  1020. Define name for referencing matching tokens as a nested attribute
  1021. of the returned parse results.
  1022. NOTE: this returns a *copy* of the original C{ParserElement} object;
  1023. this is so that the client can define a basic element, such as an
  1024. integer, and reference it in multiple places with different names.
  1025. You can also set results names using the abbreviated syntax,
  1026. C{expr("name")} in place of C{expr.setResultsName("name")} -
  1027. see L{I{__call__}<__call__>}.
  1028. Example::
  1029. date_str = (integer.setResultsName("year") + '/'
  1030. + integer.setResultsName("month") + '/'
  1031. + integer.setResultsName("day"))
  1032. # equivalent form:
  1033. date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
  1034. """
  1035. newself = self.copy()
  1036. if name.endswith("*"):
  1037. name = name[:-1]
  1038. listAllMatches=True
  1039. newself.resultsName = name
  1040. newself.modalResults = not listAllMatches
  1041. return newself
  1042. def setBreak(self,breakFlag = True):
  1043. """Method to invoke the Python pdb debugger when this element is
  1044. about to be parsed. Set C{breakFlag} to True to enable, False to
  1045. disable.
  1046. """
  1047. if breakFlag:
  1048. _parseMethod = self._parse
  1049. def breaker(instring, loc, doActions=True, callPreParse=True):
  1050. import pdb
  1051. pdb.set_trace()
  1052. return _parseMethod( instring, loc, doActions, callPreParse )
  1053. breaker._originalParseMethod = _parseMethod
  1054. self._parse = breaker
  1055. else:
  1056. if hasattr(self._parse,"_originalParseMethod"):
  1057. self._parse = self._parse._originalParseMethod
  1058. return self
  1059. def setParseAction( self, *fns, **kwargs ):
  1060. """
  1061. Define one or more actions to perform when successfully matching parse element definition.
  1062. Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)},
  1063. C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where:
  1064. - s = the original string being parsed (see note below)
  1065. - loc = the location of the matching substring
  1066. - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object
  1067. If the functions in fns modify the tokens, they can return them as the return
  1068. value from fn, and the modified list of tokens will replace the original.
  1069. Otherwise, fn does not need to return any value.
  1070. Optional keyword arguments:
  1071. - callDuringTry = (default=C{False}) indicate if parse action should be run during lookaheads and alternate testing
  1072. Note: the default parsing behavior is to expand tabs in the input string
  1073. before starting the parsing process. See L{I{parseString}<parseString>} for more information
  1074. on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
  1075. consistent view of the parsed string, the parse location, and line and column
  1076. positions within the parsed string.
  1077. Example::
  1078. integer = Word(nums)
  1079. date_str = integer + '/' + integer + '/' + integer
  1080. date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31']
  1081. # use parse action to convert to ints at parse time
  1082. integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
  1083. date_str = integer + '/' + integer + '/' + integer
  1084. # note that integer fields are now ints, not strings
  1085. date_str.parseString("1999/12/31") # -> [1999, '/', 12, '/', 31]
  1086. """
  1087. self.parseAction = list(map(_trim_arity, list(fns)))
  1088. self.callDuringTry = kwargs.get("callDuringTry", False)
  1089. return self
  1090. def addParseAction( self, *fns, **kwargs ):
  1091. """
  1092. Add one or more parse actions to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}.
  1093. See examples in L{I{copy}<copy>}.
  1094. """
  1095. self.parseAction += list(map(_trim_arity, list(fns)))
  1096. self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
  1097. return self
  1098. def addCondition(self, *fns, **kwargs):
  1099. """Add a boolean predicate function to expression's list of parse actions. See
  1100. L{I{setParseAction}<setParseAction>} for function call signatures. Unlike C{setParseAction},
  1101. functions passed to C{addCondition} need to return boolean success/fail of the condition.
  1102. Optional keyword arguments:
  1103. - message = define a custom message to be used in the raised exception
  1104. - fatal = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise ParseException
  1105. Example::
  1106. integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
  1107. year_int = integer.copy()
  1108. year_int.addCondition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later")
  1109. date_str = year_int + '/' + integer + '/' + integer
  1110. result = date_str.parseString("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0), (line:1, col:1)
  1111. """
  1112. msg = kwargs.get("message", "failed user-defined condition")
  1113. exc_type = ParseFatalException if kwargs.get("fatal", False) else ParseException
  1114. for fn in fns:
  1115. def pa(s,l,t):
  1116. if not bool(_trim_arity(fn)(s,l,t)):
  1117. raise exc_type(s,l,msg)
  1118. self.parseAction.append(pa)
  1119. self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
  1120. return self
  1121. def setFailAction( self, fn ):
  1122. """Define action to perform if parsing fails at this expression.
  1123. Fail acton fn is a callable function that takes the arguments
  1124. C{fn(s,loc,expr,err)} where:
  1125. - s = string being parsed
  1126. - loc = location where expression match was attempted and failed
  1127. - expr = the parse expression that failed
  1128. - err = the exception thrown
  1129. The function returns no value. It may throw C{L{ParseFatalException}}
  1130. if it is desired to stop parsing immediately."""
  1131. self.failAction = fn
  1132. return self
  1133. def _skipIgnorables( self, instring, loc ):
  1134. exprsFound = True
  1135. while exprsFound:
  1136. exprsFound = False
  1137. for e in self.ignoreExprs:
  1138. try:
  1139. while 1:
  1140. loc,dummy = e._parse( instring, loc )
  1141. exprsFound = True
  1142. except ParseException:
  1143. pass
  1144. return loc
  1145. def preParse( self, instring, loc ):
  1146. if self.ignoreExprs:
  1147. loc = self._skipIgnorables( instring, loc )
  1148. if self.skipWhitespace:
  1149. wt = self.whiteChars
  1150. instrlen = len(instring)
  1151. while loc < instrlen and instring[loc] in wt:
  1152. loc += 1
  1153. return loc
  1154. def parseImpl( self, instring, loc, doActions=True ):
  1155. return loc, []
  1156. def postParse( self, instring, loc, tokenlist ):
  1157. return tokenlist
  1158. #~ @profile
  1159. def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
  1160. debugging = ( self.debug ) #and doActions )
  1161. if debugging or self.failAction:
  1162. #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
  1163. if (self.debugActions[0] ):
  1164. self.debugActions[0]( instring, loc, self )
  1165. if callPreParse and self.callPreparse:
  1166. preloc = self.preParse( instring, loc )
  1167. else:
  1168. preloc = loc
  1169. tokensStart = preloc
  1170. try:
  1171. try:
  1172. loc,tokens = self.parseImpl( instring, preloc, doActions )
  1173. except IndexError:
  1174. raise ParseException( instring, len(instring), self.errmsg, self )
  1175. except ParseBaseException as err:
  1176. #~ print ("Exception raised:", err)
  1177. if self.debugActions[2]:
  1178. self.debugActions[2]( instring, tokensStart, self, err )
  1179. if self.failAction:
  1180. self.failAction( instring, tokensStart, self, err )
  1181. raise
  1182. else:
  1183. if callPreParse and self.callPreparse:
  1184. preloc = self.preParse( instring, loc )
  1185. else:
  1186. preloc = loc
  1187. tokensStart = preloc
  1188. if self.mayIndexError or loc >= len(instring):
  1189. try:
  1190. loc,tokens = self.parseImpl( instring, preloc, doActions )
  1191. except IndexError:
  1192. raise ParseException( instring, len(instring), self.errmsg, self )
  1193. else:
  1194. loc,tokens = self.parseImpl( instring, preloc, doActions )
  1195. tokens = self.postParse( instring, loc, tokens )
  1196. retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )
  1197. if self.parseAction and (doActions or self.callDuringTry):
  1198. if debugging:
  1199. try:
  1200. for fn in self.parseAction:
  1201. tokens = fn( instring, tokensStart, retTokens )
  1202. if tokens is not None:
  1203. retTokens = ParseResults( tokens,
  1204. self.resultsName,
  1205. asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
  1206. modal=self.modalResults )
  1207. except ParseBaseException as err:
  1208. #~ print "Exception raised in user parse action:", err
  1209. if (self.debugActions[2] ):
  1210. self.debugActions[2]( instring, tokensStart, self, err )
  1211. raise
  1212. else:
  1213. for fn in self.parseAction:
  1214. tokens = fn( instring, tokensStart, retTokens )
  1215. if tokens is not None:
  1216. retTokens = ParseResults( tokens,
  1217. self.resultsName,
  1218. asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
  1219. modal=self.modalResults )
  1220. if debugging:
  1221. #~ print ("Matched",self,"->",retTokens.asList())
  1222. if (self.debugActions[1] ):
  1223. self.debugActions[1]( instring, tokensStart, loc, self, retTokens )
  1224. return loc, retTokens
  1225. def tryParse( self, instring, loc ):
  1226. try:
  1227. return self._parse( instring, loc, doActions=False )[0]
  1228. except ParseFatalException:
  1229. raise ParseException( instring, loc, self.errmsg, self)
  1230. def canParseNext(self, instring, loc):
  1231. try:
  1232. self.tryParse(instring, loc)
  1233. except (ParseException, IndexError):
  1234. return False
  1235. else:
  1236. return True
  1237. class _UnboundedCache(object):
  1238. def __init__(self):
  1239. cache = {}
  1240. self.not_in_cache = not_in_cache = object()
  1241. def get(self, key):
  1242. return cache.get(key, not_in_cache)
  1243. def set(self, key, value):
  1244. cache[key] = value
  1245. def clear(self):
  1246. cache.clear()
  1247. def cache_len(self):
  1248. return len(cache)
  1249. self.get = types.MethodType(get, self)
  1250. self.set = types.MethodType(set, self)
  1251. self.clear = types.MethodType(clear, self)
  1252. self.__len__ = types.MethodType(cache_len, self)
  1253. if _OrderedDict is not None:
  1254. class _FifoCache(object):
  1255. def __init__(self, size):
  1256. self.not_in_cache = not_in_cache = object()
  1257. cache = _OrderedDict()
  1258. def get(self, key):
  1259. return cache.get(key, not_in_cache)
  1260. def set(self, key, value):
  1261. cache[key] = value
  1262. while len(cache) > size:
  1263. try:
  1264. cache.popitem(False)
  1265. except KeyError:
  1266. pass
  1267. def clear(self):
  1268. cache.clear()
  1269. def cache_len(self):
  1270. return len(cache)
  1271. self.get = types.MethodType(get, self)
  1272. self.set = types.MethodType(set, self)
  1273. self.clear = types.MethodType(clear, self)
  1274. self.__len__ = types.MethodType(cache_len, self)
  1275. else:
  1276. class _FifoCache(object):
  1277. def __init__(self, size):
  1278. self.not_in_cache = not_in_cache = object()
  1279. cache = {}
  1280. key_fifo = collections.deque([], size)
  1281. def get(self, key):
  1282. return cache.get(key, not_in_cache)
  1283. def set(self, key, value):
  1284. cache[key] = value
  1285. while len(key_fifo) > size:
  1286. cache.pop(key_fifo.popleft(), None)
  1287. key_fifo.append(key)
  1288. def clear(self):
  1289. cache.clear()
  1290. key_fifo.clear()
  1291. def cache_len(self):
  1292. return len(cache)
  1293. self.get = types.MethodType(get, self)
  1294. self.set = types.MethodType(set, self)
  1295. self.clear = types.MethodType(clear, self)
  1296. self.__len__ = types.MethodType(cache_len, self)
  1297. # argument cache for optimizing repeated calls when backtracking through recursive expressions
  1298. packrat_cache = {} # this is set later by enabledPackrat(); this is here so that resetCache() doesn't fail
  1299. packrat_cache_lock = RLock()
  1300. packrat_cache_stats = [0, 0]
  1301. # this method gets repeatedly called during backtracking with the same arguments -
  1302. # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
  1303. def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
  1304. HIT, MISS = 0, 1
  1305. lookup = (self, instring, loc, callPreParse, doActions)
  1306. with ParserElement.packrat_cache_lock:
  1307. cache = ParserElement.packrat_cache
  1308. value = cache.get(lookup)
  1309. if value is cache.not_in_cache:
  1310. ParserElement.packrat_cache_stats[MISS] += 1
  1311. try:
  1312. value = self._parseNoCache(instring, loc, doActions, callPreParse)
  1313. except ParseBaseException as pe:
  1314. # cache a copy of the exception, without the traceback
  1315. cache.set(lookup, pe.__class__(*pe.args))
  1316. raise
  1317. else:
  1318. cache.set(lookup, (value[0], value[1].copy()))
  1319. return value
  1320. else:
  1321. ParserElement.packrat_cache_stats[HIT] += 1
  1322. if isinstance(value, Exception):
  1323. raise value
  1324. return (value[0], value[1].copy())
  1325. _parse = _parseNoCache
  1326. @staticmethod
  1327. def resetCache():
  1328. ParserElement.packrat_cache.clear()
  1329. ParserElement.packrat_cache_stats[:] = [0] * len(ParserElement.packrat_cache_stats)
  1330. _packratEnabled = False
  1331. @staticmethod
  1332. def enablePackrat(cache_size_limit=128):
  1333. """Enables "packrat" parsing, which adds memoizing to the parsing logic.
  1334. Repeated parse attempts at the same string location (which happens
  1335. often in many complex grammars) can immediately return a cached value,
  1336. instead of re-executing parsing/validating code. Memoizing is done of
  1337. both valid results and parsing exceptions.
  1338. Parameters:
  1339. - cache_size_limit - (default=C{128}) - if an integer value is provided
  1340. will limit the size of the packrat cache; if None is passed, then
  1341. the cache size will be unbounded; if 0 is passed, the cache will
  1342. be effectively disabled.
  1343. This speedup may break existing programs that use parse actions that
  1344. have side-effects. For this reason, packrat parsing is disabled when
  1345. you first import pyparsing. To activate the packrat feature, your
  1346. program must call the class method C{ParserElement.enablePackrat()}. If
  1347. your program uses C{psyco} to "compile as you go", you must call
  1348. C{enablePackrat} before calling C{psyco.full()}. If you do not do this,
  1349. Python will crash. For best results, call C{enablePackrat()} immediately
  1350. after importing pyparsing.
  1351. Example::
  1352. from pip._vendor import pyparsing
  1353. pyparsing.ParserElement.enablePackrat()
  1354. """
  1355. if not ParserElement._packratEnabled:
  1356. ParserElement._packratEnabled = True
  1357. if cache_size_limit is None:
  1358. ParserElement.packrat_cache = ParserElement._UnboundedCache()
  1359. else:
  1360. ParserElement.packrat_cache = ParserElement._FifoCache(cache_size_limit)
  1361. ParserElement._parse = ParserElement._parseCache
  1362. def parseString( self, instring, parseAll=False ):
  1363. """
  1364. Execute the parse expression with the given string.
  1365. This is the main interface to the client code, once the complete
  1366. expression has been built.
  1367. If you want the grammar to require that the entire input string be
  1368. successfully parsed, then set C{parseAll} to True (equivalent to ending
  1369. the grammar with C{L{StringEnd()}}).
  1370. Note: C{parseString} implicitly calls C{expandtabs()} on the input string,
  1371. in order to report proper column numbers in parse actions.
  1372. If the input string contains tabs and
  1373. the grammar uses parse actions that use the C{loc} argument to index into the
  1374. string being parsed, you can ensure you have a consistent view of the input
  1375. string by:
  1376. - calling C{parseWithTabs} on your grammar before calling C{parseString}
  1377. (see L{I{parseWithTabs}<parseWithTabs>})
  1378. - define your parse action using the full C{(s,loc,toks)} signature, and
  1379. reference the input string using the parse action's C{s} argument
  1380. - explictly expand the tabs in your input string before calling
  1381. C{parseString}
  1382. Example::
  1383. Word('a').parseString('aaaaabaaa') # -> ['aaaaa']
  1384. Word('a').parseString('aaaaabaaa', parseAll=True) # -> Exception: Expected end of text
  1385. """
  1386. ParserElement.resetCache()
  1387. if not self.streamlined:
  1388. self.streamline()
  1389. #~ self.saveAsList = True
  1390. for e in self.ignoreExprs:
  1391. e.streamline()
  1392. if not self.keepTabs:
  1393. instring = instring.expandtabs()
  1394. try:
  1395. loc, tokens = self._parse( instring, 0 )
  1396. if parseAll:
  1397. loc = self.preParse( instring, loc )
  1398. se = Empty() + StringEnd()
  1399. se._parse( instring, loc )
  1400. except ParseBaseException as exc:
  1401. if ParserElement.verbose_stacktrace:
  1402. raise
  1403. else:
  1404. # catch and re-raise exception from here, clears out pyparsing internal stack trace
  1405. raise exc
  1406. else:
  1407. return tokens
  1408. def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ):
  1409. """
  1410. Scan the input string for expression matches. Each match will return the
  1411. matching tokens, start location, and end location. May be called with optional
  1412. C{maxMatches} argument, to clip scanning after 'n' matches are found. If
  1413. C{overlap} is specified, then overlapping matches will be reported.
  1414. Note that the start and end locations are reported relative to the string
  1415. being parsed. See L{I{parseString}<parseString>} for more information on parsing
  1416. strings with embedded tabs.
  1417. Example::
  1418. source = "sldjf123lsdjjkf345sldkjf879lkjsfd987"
  1419. print(source)
  1420. for tokens,start,end in Word(alphas).scanString(source):
  1421. print(' '*start + '^'*(end-start))
  1422. print(' '*start + tokens[0])
  1423. prints::
  1424. sldjf123lsdjjkf345sldkjf879lkjsfd987
  1425. ^^^^^
  1426. sldjf
  1427. ^^^^^^^
  1428. lsdjjkf
  1429. ^^^^^^
  1430. sldkjf
  1431. ^^^^^^
  1432. lkjsfd
  1433. """
  1434. if not self.streamlined:
  1435. self.streamline()
  1436. for e in self.ignoreExprs:
  1437. e.streamline()
  1438. if not self.keepTabs:
  1439. instring = _ustr(instring).expandtabs()
  1440. instrlen = len(instring)
  1441. loc = 0
  1442. preparseFn = self.preParse
  1443. parseFn = self._parse
  1444. ParserElement.resetCache()
  1445. matches = 0
  1446. try:
  1447. while loc <= instrlen and matches < maxMatches:
  1448. try:
  1449. preloc = preparseFn( instring, loc )
  1450. nextLoc,tokens = parseFn( instring, preloc, callPreParse=False )
  1451. except ParseException:
  1452. loc = preloc+1
  1453. else:
  1454. if nextLoc > loc:
  1455. matches += 1
  1456. yield tokens, preloc, nextLoc
  1457. if overlap:
  1458. nextloc = preparseFn( instring, loc )
  1459. if nextloc > loc:
  1460. loc = nextLoc
  1461. else:
  1462. loc += 1
  1463. else:
  1464. loc = nextLoc
  1465. else:
  1466. loc = preloc+1
  1467. except ParseBaseException as exc:
  1468. if ParserElement.verbose_stacktrace:
  1469. raise
  1470. else:
  1471. # catch and re-raise exception from here, clears out pyparsing internal stack trace
  1472. raise exc
  1473. def transformString( self, instring ):
  1474. """
  1475. Extension to C{L{scanString}}, to modify matching text with modified tokens that may
  1476. be returned from a parse action. To use C{transformString}, define a grammar and
  1477. attach a parse action to it that modifies the returned token list.
  1478. Invoking C{transformString()} on a target string will then scan for matches,
  1479. and replace the matched text patterns according to the logic in the parse
  1480. action. C{transformString()} returns the resulting transformed string.
  1481. Example::
  1482. wd = Word(alphas)
  1483. wd.setParseAction(lambda toks: toks[0].title())
  1484. print(wd.transformString("now is the winter of our discontent made glorious summer by this sun of york."))
  1485. Prints::
  1486. Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York.
  1487. """
  1488. out = []
  1489. lastE = 0
  1490. # force preservation of <TAB>s, to minimize unwanted transformation of string, and to
  1491. # keep string locs straight between transformString and scanString
  1492. self.keepTabs = True
  1493. try:
  1494. for t,s,e in self.scanString( instring ):
  1495. out.append( instring[lastE:s] )
  1496. if t:
  1497. if isinstance(t,ParseResults):
  1498. out += t.asList()
  1499. elif isinstance(t,list):
  1500. out += t
  1501. else:
  1502. out.append(t)
  1503. lastE = e
  1504. out.append(instring[lastE:])
  1505. out = [o for o in out if o]
  1506. return "".join(map(_ustr,_flatten(out)))
  1507. except ParseBaseException as exc:
  1508. if ParserElement.verbose_stacktrace:
  1509. raise
  1510. else:
  1511. # catch and re-raise exception from here, clears out pyparsing internal stack trace
  1512. raise exc
  1513. def searchString( self, instring, maxMatches=_MAX_INT ):
  1514. """
  1515. Another extension to C{L{scanString}}, simplifying the access to the tokens found
  1516. to match the given parse expression. May be called with optional
  1517. C{maxMatches} argument, to clip searching after 'n' matches are found.
  1518. Example::
  1519. # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters
  1520. cap_word = Word(alphas.upper(), alphas.lower())
  1521. print(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity"))
  1522. # the sum() builtin can be used to merge results into a single ParseResults object
  1523. print(sum(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity")))
  1524. prints::
  1525. [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']]
  1526. ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity']
  1527. """
  1528. try:
  1529. return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
  1530. except ParseBaseException as exc:
  1531. if ParserElement.verbose_stacktrace:
  1532. raise
  1533. else:
  1534. # catch and re-raise exception from here, clears out pyparsing internal stack trace
  1535. raise exc
  1536. def split(self, instring, maxsplit=_MAX_INT, includeSeparators=False):
  1537. """
  1538. Generator method to split a string using the given expression as a separator.
  1539. May be called with optional C{maxsplit} argument, to limit the number of splits;
  1540. and the optional C{includeSeparators} argument (default=C{False}), if the separating
  1541. matching text should be included in the split results.
  1542. Example::
  1543. punc = oneOf(list(".,;:/-!?"))
  1544. print(list(punc.split("This, this?, this sentence, is badly punctuated!")))
  1545. prints::
  1546. ['This', ' this', '', ' this sentence', ' is badly punctuated', '']
  1547. """
  1548. splits = 0
  1549. last = 0
  1550. for t,s,e in self.scanString(instring, maxMatches=maxsplit):
  1551. yield instring[last:s]
  1552. if includeSeparators:
  1553. yield t[0]
  1554. last = e
  1555. yield instring[last:]
  1556. def __add__(self, other ):
  1557. """
  1558. Implementation of + operator - returns C{L{And}}. Adding strings to a ParserElement
  1559. converts them to L{Literal}s by default.
  1560. Example::
  1561. greet = Word(alphas) + "," + Word(alphas) + "!"
  1562. hello = "Hello, World!"
  1563. print (hello, "->", greet.parseString(hello))
  1564. Prints::
  1565. Hello, World! -> ['Hello', ',', 'World', '!']
  1566. """
  1567. if isinstance( other, basestring ):
  1568. other = ParserElement._literalStringClass( other )
  1569. if not isinstance( other, ParserElement ):
  1570. warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
  1571. SyntaxWarning, stacklevel=2)
  1572. return None
  1573. return And( [ self, other ] )
  1574. def __radd__(self, other ):
  1575. """
  1576. Implementation of + operator when left operand is not a C{L{ParserElement}}
  1577. """
  1578. if isinstance( other, basestring ):
  1579. other = ParserElement._literalStringClass( other )
  1580. if not isinstance( other, ParserElement ):
  1581. warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
  1582. SyntaxWarning, stacklevel=2)
  1583. return None
  1584. return other + self
  1585. def __sub__(self, other):
  1586. """
  1587. Implementation of - operator, returns C{L{And}} with error stop
  1588. """
  1589. if isinstance( other, basestring ):
  1590. other = ParserElement._literalStringClass( other )
  1591. if not isinstance( other, ParserElement ):
  1592. warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
  1593. SyntaxWarning, stacklevel=2)
  1594. return None
  1595. return self + And._ErrorStop() + other
  1596. def __rsub__(self, other ):
  1597. """
  1598. Implementation of - operator when left operand is not a C{L{ParserElement}}
  1599. """
  1600. if isinstance( other, basestring ):
  1601. other = ParserElement._literalStringClass( other )
  1602. if not isinstance( other, ParserElement ):
  1603. warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
  1604. SyntaxWarning, stacklevel=2)
  1605. return None
  1606. return other - self
  1607. def __mul__(self,other):
  1608. """
  1609. Implementation of * operator, allows use of C{expr * 3} in place of
  1610. C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer
  1611. tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples
  1612. may also include C{None} as in:
  1613. - C{expr*(n,None)} or C{expr*(n,)} is equivalent
  1614. to C{expr*n + L{ZeroOrMore}(expr)}
  1615. (read as "at least n instances of C{expr}")
  1616. - C{expr*(None,n)} is equivalent to C{expr*(0,n)}
  1617. (read as "0 to n instances of C{expr}")
  1618. - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)}
  1619. - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)}
  1620. Note that C{expr*(None,n)} does not raise an exception if
  1621. more than n exprs exist in the input stream; that is,
  1622. C{expr*(None,n)} does not enforce a maximum number of expr
  1623. occurrences. If this behavior is desired, then write
  1624. C{expr*(None,n) + ~expr}
  1625. """
  1626. if isinstance(other,int):
  1627. minElements, optElements = other,0
  1628. elif isinstance(other,tuple):
  1629. other = (other + (None, None))[:2]
  1630. if other[0] is None:
  1631. other = (0, other[1])
  1632. if isinstance(other[0],int) and other[1] is None:
  1633. if other[0] == 0:
  1634. return ZeroOrMore(self)
  1635. if other[0] == 1:
  1636. return OneOrMore(self)
  1637. else:
  1638. return self*other[0] + ZeroOrMore(self)
  1639. elif isinstance(other[0],int) and isinstance(other[1],int):
  1640. minElements, optElements = other
  1641. optElements -= minElements
  1642. else:
  1643. raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))
  1644. else:
  1645. raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
  1646. if minElements < 0:
  1647. raise ValueError("cannot multiply ParserElement by negative value")
  1648. if optElements < 0:
  1649. raise ValueError("second tuple value must be greater or equal to first tuple value")
  1650. if minElements == optElements == 0:
  1651. raise ValueError("cannot multiply ParserElement by 0 or (0,0)")
  1652. if (optElements):
  1653. def makeOptionalList(n):
  1654. if n>1:
  1655. return Optional(self + makeOptionalList(n-1))
  1656. else:
  1657. return Optional(self)
  1658. if minElements:
  1659. if minElements == 1:
  1660. ret = self + makeOptionalList(optElements)
  1661. else:
  1662. ret = And([self]*minElements) + makeOptionalList(optElements)
  1663. else:
  1664. ret = makeOptionalList(optElements)
  1665. else:
  1666. if minElements == 1:
  1667. ret = self
  1668. else:
  1669. ret = And([self]*minElements)
  1670. return ret
  1671. def __rmul__(self, other):
  1672. return self.__mul__(other)
  1673. def __or__(self, other ):
  1674. """
  1675. Implementation of | operator - returns C{L{MatchFirst}}
  1676. """
  1677. if isinstance( other, basestring ):
  1678. other = ParserElement._literalStringClass( other )
  1679. if not isinstance( other, ParserElement ):
  1680. warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
  1681. SyntaxWarning, stacklevel=2)
  1682. return None
  1683. return MatchFirst( [ self, other ] )
  1684. def __ror__(self, other ):
  1685. """
  1686. Implementation of | operator when left operand is not a C{L{ParserElement}}
  1687. """
  1688. if isinstance( other, basestring ):
  1689. other = ParserElement._literalStringClass( other )
  1690. if not isinstance( other, ParserElement ):
  1691. warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
  1692. SyntaxWarning, stacklevel=2)
  1693. return None
  1694. return other | self
  1695. def __xor__(self, other ):
  1696. """
  1697. Implementation of ^ operator - returns C{L{Or}}
  1698. """
  1699. if isinstance( other, basestring ):
  1700. other = ParserElement._literalStringClass( other )
  1701. if not isinstance( other, ParserElement ):
  1702. warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
  1703. SyntaxWarning, stacklevel=2)
  1704. return None
  1705. return Or( [ self, other ] )
  1706. def __rxor__(self, other ):
  1707. """
  1708. Implementation of ^ operator when left operand is not a C{L{ParserElement}}
  1709. """
  1710. if isinstance( other, basestring ):
  1711. other = ParserElement._literalStringClass( other )
  1712. if not isinstance( other, ParserElement ):
  1713. warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
  1714. SyntaxWarning, stacklevel=2)
  1715. return None
  1716. return other ^ self
  1717. def __and__(self, other ):
  1718. """
  1719. Implementation of & operator - returns C{L{Each}}
  1720. """
  1721. if isinstance( other, basestring ):
  1722. other = ParserElement._literalStringClass( other )
  1723. if not isinstance( other, ParserElement ):
  1724. warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
  1725. SyntaxWarning, stacklevel=2)
  1726. return None
  1727. return Each( [ self, other ] )
  1728. def __rand__(self, other ):
  1729. """
  1730. Implementation of & operator when left operand is not a C{L{ParserElement}}
  1731. """
  1732. if isinstance( other, basestring ):
  1733. other = ParserElement._literalStringClass( other )
  1734. if not isinstance( other, ParserElement ):
  1735. warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
  1736. SyntaxWarning, stacklevel=2)
  1737. return None
  1738. return other & self
  1739. def __invert__( self ):
  1740. """
  1741. Implementation of ~ operator - returns C{L{NotAny}}
  1742. """
  1743. return NotAny( self )
  1744. def __call__(self, name=None):
  1745. """
  1746. Shortcut for C{L{setResultsName}}, with C{listAllMatches=False}.
  1747. If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be
  1748. passed as C{True}.
  1749. If C{name} is omitted, same as calling C{L{copy}}.
  1750. Example::
  1751. # these are equivalent
  1752. userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
  1753. userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
  1754. """
  1755. if name is not None:
  1756. return self.setResultsName(name)
  1757. else:
  1758. return self.copy()
  1759. def suppress( self ):
  1760. """
  1761. Suppresses the output of this C{ParserElement}; useful to keep punctuation from
  1762. cluttering up returned output.
  1763. """
  1764. return Suppress( self )
  1765. def leaveWhitespace( self ):
  1766. """
  1767. Disables the skipping of whitespace before matching the characters in the
  1768. C{ParserElement}'s defined pattern. This is normally only used internally by
  1769. the pyparsing module, but may be needed in some whitespace-sensitive grammars.
  1770. """
  1771. self.skipWhitespace = False
  1772. return self
  1773. def setWhitespaceChars( self, chars ):
  1774. """
  1775. Overrides the default whitespace chars
  1776. """
  1777. self.skipWhitespace = True
  1778. self.whiteChars = chars
  1779. self.copyDefaultWhiteChars = False
  1780. return self
  1781. def parseWithTabs( self ):
  1782. """
  1783. Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string.
  1784. Must be called before C{parseString} when the input grammar contains elements that
  1785. match C{<TAB>} characters.
  1786. """
  1787. self.keepTabs = True
  1788. return self
  1789. def ignore( self, other ):
  1790. """
  1791. Define expression to be ignored (e.g., comments) while doing pattern
  1792. matching; may be called repeatedly, to define multiple comment or other
  1793. ignorable patterns.
  1794. Example::
  1795. patt = OneOrMore(Word(alphas))
  1796. patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj']
  1797. patt.ignore(cStyleComment)
  1798. patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj', 'lskjd']
  1799. """
  1800. if isinstance(other, basestring):
  1801. other = Suppress(other)
  1802. if isinstance( other, Suppress ):
  1803. if other not in self.ignoreExprs:
  1804. self.ignoreExprs.append(other)
  1805. else:
  1806. self.ignoreExprs.append( Suppress( other.copy() ) )
  1807. return self
  1808. def setDebugActions( self, startAction, successAction, exceptionAction ):
  1809. """
  1810. Enable display of debugging messages while doing pattern matching.
  1811. """
  1812. self.debugActions = (startAction or _defaultStartDebugAction,
  1813. successAction or _defaultSuccessDebugAction,
  1814. exceptionAction or _defaultExceptionDebugAction)
  1815. self.debug = True
  1816. return self
  1817. def setDebug( self, flag=True ):
  1818. """
  1819. Enable display of debugging messages while doing pattern matching.
  1820. Set C{flag} to True to enable, False to disable.
  1821. Example::
  1822. wd = Word(alphas).setName("alphaword")
  1823. integer = Word(nums).setName("numword")
  1824. term = wd | integer
  1825. # turn on debugging for wd
  1826. wd.setDebug()
  1827. OneOrMore(term).parseString("abc 123 xyz 890")
  1828. prints::
  1829. Match alphaword at loc 0(1,1)
  1830. Matched alphaword -> ['abc']
  1831. Match alphaword at loc 3(1,4)
  1832. Exception raised:Expected alphaword (at char 4), (line:1, col:5)
  1833. Match alphaword at loc 7(1,8)
  1834. Matched alphaword -> ['xyz']
  1835. Match alphaword at loc 11(1,12)
  1836. Exception raised:Expected alphaword (at char 12), (line:1, col:13)
  1837. Match alphaword at loc 15(1,16)
  1838. Exception raised:Expected alphaword (at char 15), (line:1, col:16)
  1839. The output shown is that produced by the default debug actions - custom debug actions can be
  1840. specified using L{setDebugActions}. Prior to attempting
  1841. to match the C{wd} expression, the debugging message C{"Match <exprname> at loc <n>(<line>,<col>)"}
  1842. is shown. Then if the parse succeeds, a C{"Matched"} message is shown, or an C{"Exception raised"}
  1843. message is shown. Also note the use of L{setName} to assign a human-readable name to the expression,
  1844. which makes debugging and exception messages easier to understand - for instance, the default
  1845. name created for the C{Word} expression without calling C{setName} is C{"W:(ABCD...)"}.
  1846. """
  1847. if flag:
  1848. self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )
  1849. else:
  1850. self.debug = False
  1851. return self
  1852. def __str__( self ):
  1853. return self.name
  1854. def __repr__( self ):
  1855. return _ustr(self)
  1856. def streamline( self ):
  1857. self.streamlined = True
  1858. self.strRepr = None
  1859. return self
  1860. def checkRecursion( self, parseElementList ):
  1861. pass
  1862. def validate( self, validateTrace=[] ):
  1863. """
  1864. Check defined expressions for valid structure, check for infinite recursive definitions.
  1865. """
  1866. self.checkRecursion( [] )
  1867. def parseFile( self, file_or_filename, parseAll=False ):
  1868. """
  1869. Execute the parse expression on the given file or filename.
  1870. If a filename is specified (instead of a file object),
  1871. the entire file is opened, read, and closed before parsing.
  1872. """
  1873. try:
  1874. file_contents = file_or_filename.read()
  1875. except AttributeError:
  1876. with open(file_or_filename, "r") as f:
  1877. file_contents = f.read()
  1878. try:
  1879. return self.parseString(file_contents, parseAll)
  1880. except ParseBaseException as exc:
  1881. if ParserElement.verbose_stacktrace:
  1882. raise
  1883. else:
  1884. # catch and re-raise exception from here, clears out pyparsing internal stack trace
  1885. raise exc
  1886. def __eq__(self,other):
  1887. if isinstance(other, ParserElement):
  1888. return self is other or vars(self) == vars(other)
  1889. elif isinstance(other, basestring):
  1890. return self.matches(other)
  1891. else:
  1892. return super(ParserElement,self)==other
  1893. def __ne__(self,other):
  1894. return not (self == other)
  1895. def __hash__(self):
  1896. return hash(id(self))
  1897. def __req__(self,other):
  1898. return self == other
  1899. def __rne__(self,other):
  1900. return not (self == other)
  1901. def matches(self, testString, parseAll=True):
  1902. """
  1903. Method for quick testing of a parser against a test string. Good for simple
  1904. inline microtests of sub expressions while building up larger parser.
  1905. Parameters:
  1906. - testString - to test against this expression for a match
  1907. - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests
  1908. Example::
  1909. expr = Word(nums)
  1910. assert expr.matches("100")
  1911. """
  1912. try:
  1913. self.parseString(_ustr(testString), parseAll=parseAll)
  1914. return True
  1915. except ParseBaseException:
  1916. return False
  1917. def runTests(self, tests, parseAll=True, comment='#', fullDump=True, printResults=True, failureTests=False):
  1918. """
  1919. Execute the parse expression on a series of test strings, showing each
  1920. test, the parsed results or where the parse failed. Quick and easy way to
  1921. run a parse expression against a list of sample strings.
  1922. Parameters:
  1923. - tests - a list of separate test strings, or a multiline string of test strings
  1924. - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests
  1925. - comment - (default=C{'#'}) - expression for indicating embedded comments in the test
  1926. string; pass None to disable comment filtering
  1927. - fullDump - (default=C{True}) - dump results as list followed by results names in nested outline;
  1928. if False, only dump nested list
  1929. - printResults - (default=C{True}) prints test output to stdout
  1930. - failureTests - (default=C{False}) indicates if these tests are expected to fail parsing
  1931. Returns: a (success, results) tuple, where success indicates that all tests succeeded
  1932. (or failed if C{failureTests} is True), and the results contain a list of lines of each
  1933. test's output
  1934. Example::
  1935. number_expr = pyparsing_common.number.copy()
  1936. result = number_expr.runTests('''
  1937. # unsigned integer
  1938. 100
  1939. # negative integer
  1940. -100
  1941. # float with scientific notation
  1942. 6.02e23
  1943. # integer with scientific notation
  1944. 1e-12
  1945. ''')
  1946. print("Success" if result[0] else "Failed!")
  1947. result = number_expr.runTests('''
  1948. # stray character
  1949. 100Z
  1950. # missing leading digit before '.'
  1951. -.100
  1952. # too many '.'
  1953. 3.14.159
  1954. ''', failureTests=True)
  1955. print("Success" if result[0] else "Failed!")
  1956. prints::
  1957. # unsigned integer
  1958. 100
  1959. [100]
  1960. # negative integer
  1961. -100
  1962. [-100]
  1963. # float with scientific notation
  1964. 6.02e23
  1965. [6.02e+23]
  1966. # integer with scientific notation
  1967. 1e-12
  1968. [1e-12]
  1969. Success
  1970. # stray character
  1971. 100Z
  1972. ^
  1973. FAIL: Expected end of text (at char 3), (line:1, col:4)
  1974. # missing leading digit before '.'
  1975. -.100
  1976. ^
  1977. FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1)
  1978. # too many '.'
  1979. 3.14.159
  1980. ^
  1981. FAIL: Expected end of text (at char 4), (line:1, col:5)
  1982. Success
  1983. Each test string must be on a single line. If you want to test a string that spans multiple
  1984. lines, create a test like this::
  1985. expr.runTest(r"this is a test\\n of strings that spans \\n 3 lines")
  1986. (Note that this is a raw string literal, you must include the leading 'r'.)
  1987. """
  1988. if isinstance(tests, basestring):
  1989. tests = list(map(str.strip, tests.rstrip().splitlines()))
  1990. if isinstance(comment, basestring):
  1991. comment = Literal(comment)
  1992. allResults = []
  1993. comments = []
  1994. success = True
  1995. for t in tests:
  1996. if comment is not None and comment.matches(t, False) or comments and not t:
  1997. comments.append(t)
  1998. continue
  1999. if not t:
  2000. continue
  2001. out = ['\n'.join(comments), t]
  2002. comments = []
  2003. try:
  2004. t = t.replace(r'\n','\n')
  2005. result = self.parseString(t, parseAll=parseAll)
  2006. out.append(result.dump(full=fullDump))
  2007. success = success and not failureTests
  2008. except ParseBaseException as pe:
  2009. fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else ""
  2010. if '\n' in t:
  2011. out.append(line(pe.loc, t))
  2012. out.append(' '*(col(pe.loc,t)-1) + '^' + fatal)
  2013. else:
  2014. out.append(' '*pe.loc + '^' + fatal)
  2015. out.append("FAIL: " + str(pe))
  2016. success = success and failureTests
  2017. result = pe
  2018. except Exception as exc:
  2019. out.append("FAIL-EXCEPTION: " + str(exc))
  2020. success = success and failureTests
  2021. result = exc
  2022. if printResults:
  2023. if fullDump:
  2024. out.append('')
  2025. print('\n'.join(out))
  2026. allResults.append((t, result))
  2027. return success, allResults
  2028. class Token(ParserElement):
  2029. """
  2030. Abstract C{ParserElement} subclass, for defining atomic matching patterns.
  2031. """
  2032. def __init__( self ):
  2033. super(Token,self).__init__( savelist=False )
  2034. class Empty(Token):
  2035. """
  2036. An empty token, will always match.
  2037. """
  2038. def __init__( self ):
  2039. super(Empty,self).__init__()
  2040. self.name = "Empty"
  2041. self.mayReturnEmpty = True
  2042. self.mayIndexError = False
  2043. class NoMatch(Token):
  2044. """
  2045. A token that will never match.
  2046. """
  2047. def __init__( self ):
  2048. super(NoMatch,self).__init__()
  2049. self.name = "NoMatch"
  2050. self.mayReturnEmpty = True
  2051. self.mayIndexError = False
  2052. self.errmsg = "Unmatchable token"
  2053. def parseImpl( self, instring, loc, doActions=True ):
  2054. raise ParseException(instring, loc, self.errmsg, self)
  2055. class Literal(Token):
  2056. """
  2057. Token to exactly match a specified string.
  2058. Example::
  2059. Literal('blah').parseString('blah') # -> ['blah']
  2060. Literal('blah').parseString('blahfooblah') # -> ['blah']
  2061. Literal('blah').parseString('bla') # -> Exception: Expected "blah"
  2062. For case-insensitive matching, use L{CaselessLiteral}.
  2063. For keyword matching (force word break before and after the matched string),
  2064. use L{Keyword} or L{CaselessKeyword}.
  2065. """
  2066. def __init__( self, matchString ):
  2067. super(Literal,self).__init__()
  2068. self.match = matchString
  2069. self.matchLen = len(matchString)
  2070. try:
  2071. self.firstMatchChar = matchString[0]
  2072. except IndexError:
  2073. warnings.warn("null string passed to Literal; use Empty() instead",
  2074. SyntaxWarning, stacklevel=2)
  2075. self.__class__ = Empty
  2076. self.name = '"%s"' % _ustr(self.match)
  2077. self.errmsg = "Expected " + self.name
  2078. self.mayReturnEmpty = False
  2079. self.mayIndexError = False
  2080. # Performance tuning: this routine gets called a *lot*
  2081. # if this is a single character match string and the first character matches,
  2082. # short-circuit as quickly as possible, and avoid calling startswith
  2083. #~ @profile
  2084. def parseImpl( self, instring, loc, doActions=True ):
  2085. if (instring[loc] == self.firstMatchChar and
  2086. (self.matchLen==1 or instring.startswith(self.match,loc)) ):
  2087. return loc+self.matchLen, self.match
  2088. raise ParseException(instring, loc, self.errmsg, self)
  2089. _L = Literal
  2090. ParserElement._literalStringClass = Literal
  2091. class Keyword(Token):
  2092. """
  2093. Token to exactly match a specified string as a keyword, that is, it must be
  2094. immediately followed by a non-keyword character. Compare with C{L{Literal}}:
  2095. - C{Literal("if")} will match the leading C{'if'} in C{'ifAndOnlyIf'}.
  2096. - C{Keyword("if")} will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'}
  2097. Accepts two optional constructor arguments in addition to the keyword string:
  2098. - C{identChars} is a string of characters that would be valid identifier characters,
  2099. defaulting to all alphanumerics + "_" and "$"
  2100. - C{caseless} allows case-insensitive matching, default is C{False}.
  2101. Example::
  2102. Keyword("start").parseString("start") # -> ['start']
  2103. Keyword("start").parseString("starting") # -> Exception
  2104. For case-insensitive matching, use L{CaselessKeyword}.
  2105. """
  2106. DEFAULT_KEYWORD_CHARS = alphanums+"_$"
  2107. def __init__( self, matchString, identChars=None, caseless=False ):
  2108. super(Keyword,self).__init__()
  2109. if identChars is None:
  2110. identChars = Keyword.DEFAULT_KEYWORD_CHARS
  2111. self.match = matchString
  2112. self.matchLen = len(matchString)
  2113. try:
  2114. self.firstMatchChar = matchString[0]
  2115. except IndexError:
  2116. warnings.warn("null string passed to Keyword; use Empty() instead",
  2117. SyntaxWarning, stacklevel=2)
  2118. self.name = '"%s"' % self.match
  2119. self.errmsg = "Expected " + self.name
  2120. self.mayReturnEmpty = False
  2121. self.mayIndexError = False
  2122. self.caseless = caseless
  2123. if caseless:
  2124. self.caselessmatch = matchString.upper()
  2125. identChars = identChars.upper()
  2126. self.identChars = set(identChars)
  2127. def parseImpl( self, instring, loc, doActions=True ):
  2128. if self.caseless:
  2129. if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
  2130. (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and
  2131. (loc == 0 or instring[loc-1].upper() not in self.identChars) ):
  2132. return loc+self.matchLen, self.match
  2133. else:
  2134. if (instring[loc] == self.firstMatchChar and
  2135. (self.matchLen==1 or instring.startswith(self.match,loc)) and
  2136. (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and
  2137. (loc == 0 or instring[loc-1] not in self.identChars) ):
  2138. return loc+self.matchLen, self.match
  2139. raise ParseException(instring, loc, self.errmsg, self)
  2140. def copy(self):
  2141. c = super(Keyword,self).copy()
  2142. c.identChars = Keyword.DEFAULT_KEYWORD_CHARS
  2143. return c
  2144. @staticmethod
  2145. def setDefaultKeywordChars( chars ):
  2146. """Overrides the default Keyword chars
  2147. """
  2148. Keyword.DEFAULT_KEYWORD_CHARS = chars
  2149. class CaselessLiteral(Literal):
  2150. """
  2151. Token to match a specified string, ignoring case of letters.
  2152. Note: the matched results will always be in the case of the given
  2153. match string, NOT the case of the input text.
  2154. Example::
  2155. OneOrMore(CaselessLiteral("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD', 'CMD']
  2156. (Contrast with example for L{CaselessKeyword}.)
  2157. """
  2158. def __init__( self, matchString ):
  2159. super(CaselessLiteral,self).__init__( matchString.upper() )
  2160. # Preserve the defining literal.
  2161. self.returnString = matchString
  2162. self.name = "'%s'" % self.returnString
  2163. self.errmsg = "Expected " + self.name
  2164. def parseImpl( self, instring, loc, doActions=True ):
  2165. if instring[ loc:loc+self.matchLen ].upper() == self.match:
  2166. return loc+self.matchLen, self.returnString
  2167. raise ParseException(instring, loc, self.errmsg, self)
  2168. class CaselessKeyword(Keyword):
  2169. """
  2170. Caseless version of L{Keyword}.
  2171. Example::
  2172. OneOrMore(CaselessKeyword("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD']
  2173. (Contrast with example for L{CaselessLiteral}.)
  2174. """
  2175. def __init__( self, matchString, identChars=None ):
  2176. super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
  2177. def parseImpl( self, instring, loc, doActions=True ):
  2178. if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
  2179. (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ):
  2180. return loc+self.matchLen, self.match
  2181. raise ParseException(instring, loc, self.errmsg, self)
  2182. class CloseMatch(Token):
  2183. """
  2184. A variation on L{Literal} which matches "close" matches, that is,
  2185. strings with at most 'n' mismatching characters. C{CloseMatch} takes parameters:
  2186. - C{match_string} - string to be matched
  2187. - C{maxMismatches} - (C{default=1}) maximum number of mismatches allowed to count as a match
  2188. The results from a successful parse will contain the matched text from the input string and the following named results:
  2189. - C{mismatches} - a list of the positions within the match_string where mismatches were found
  2190. - C{original} - the original match_string used to compare against the input string
  2191. If C{mismatches} is an empty list, then the match was an exact match.
  2192. Example::
  2193. patt = CloseMatch("ATCATCGAATGGA")
  2194. patt.parseString("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']})
  2195. patt.parseString("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1)
  2196. # exact match
  2197. patt.parseString("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']})
  2198. # close match allowing up to 2 mismatches
  2199. patt = CloseMatch("ATCATCGAATGGA", maxMismatches=2)
  2200. patt.parseString("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']})
  2201. """
  2202. def __init__(self, match_string, maxMismatches=1):
  2203. super(CloseMatch,self).__init__()
  2204. self.name = match_string
  2205. self.match_string = match_string
  2206. self.maxMismatches = maxMismatches
  2207. self.errmsg = "Expected %r (with up to %d mismatches)" % (self.match_string, self.maxMismatches)
  2208. self.mayIndexError = False
  2209. self.mayReturnEmpty = False
  2210. def parseImpl( self, instring, loc, doActions=True ):
  2211. start = loc
  2212. instrlen = len(instring)
  2213. maxloc = start + len(self.match_string)
  2214. if maxloc <= instrlen:
  2215. match_string = self.match_string
  2216. match_stringloc = 0
  2217. mismatches = []
  2218. maxMismatches = self.maxMismatches
  2219. for match_stringloc,s_m in enumerate(zip(instring[loc:maxloc], self.match_string)):
  2220. src,mat = s_m
  2221. if src != mat:
  2222. mismatches.append(match_stringloc)
  2223. if len(mismatches) > maxMismatches:
  2224. break
  2225. else:
  2226. loc = match_stringloc + 1
  2227. results = ParseResults([instring[start:loc]])
  2228. results['original'] = self.match_string
  2229. results['mismatches'] = mismatches
  2230. return loc, results
  2231. raise ParseException(instring, loc, self.errmsg, self)
  2232. class Word(Token):
  2233. """
  2234. Token for matching words composed of allowed character sets.
  2235. Defined with string containing all allowed initial characters,
  2236. an optional string containing allowed body characters (if omitted,
  2237. defaults to the initial character set), and an optional minimum,
  2238. maximum, and/or exact length. The default value for C{min} is 1 (a
  2239. minimum value < 1 is not valid); the default values for C{max} and C{exact}
  2240. are 0, meaning no maximum or exact length restriction. An optional
  2241. C{excludeChars} parameter can list characters that might be found in
  2242. the input C{bodyChars} string; useful to define a word of all printables
  2243. except for one or two characters, for instance.
  2244. L{srange} is useful for defining custom character set strings for defining
  2245. C{Word} expressions, using range notation from regular expression character sets.
  2246. A common mistake is to use C{Word} to match a specific literal string, as in
  2247. C{Word("Address")}. Remember that C{Word} uses the string argument to define
  2248. I{sets} of matchable characters. This expression would match "Add", "AAA",
  2249. "dAred", or any other word made up of the characters 'A', 'd', 'r', 'e', and 's'.
  2250. To match an exact literal string, use L{Literal} or L{Keyword}.
  2251. pyparsing includes helper strings for building Words:
  2252. - L{alphas}
  2253. - L{nums}
  2254. - L{alphanums}
  2255. - L{hexnums}
  2256. - L{alphas8bit} (alphabetic characters in ASCII range 128-255 - accented, tilded, umlauted, etc.)
  2257. - L{punc8bit} (non-alphabetic characters in ASCII range 128-255 - currency, symbols, superscripts, diacriticals, etc.)
  2258. - L{printables} (any non-whitespace character)
  2259. Example::
  2260. # a word composed of digits
  2261. integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9"))
  2262. # a word with a leading capital, and zero or more lowercase
  2263. capital_word = Word(alphas.upper(), alphas.lower())
  2264. # hostnames are alphanumeric, with leading alpha, and '-'
  2265. hostname = Word(alphas, alphanums+'-')
  2266. # roman numeral (not a strict parser, accepts invalid mix of characters)
  2267. roman = Word("IVXLCDM")
  2268. # any string of non-whitespace characters, except for ','
  2269. csv_value = Word(printables, excludeChars=",")
  2270. """
  2271. def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):
  2272. super(Word,self).__init__()
  2273. if excludeChars:
  2274. initChars = ''.join(c for c in initChars if c not in excludeChars)
  2275. if bodyChars:
  2276. bodyChars = ''.join(c for c in bodyChars if c not in excludeChars)
  2277. self.initCharsOrig = initChars
  2278. self.initChars = set(initChars)
  2279. if bodyChars :
  2280. self.bodyCharsOrig = bodyChars
  2281. self.bodyChars = set(bodyChars)
  2282. else:
  2283. self.bodyCharsOrig = initChars
  2284. self.bodyChars = set(initChars)
  2285. self.maxSpecified = max > 0
  2286. if min < 1:
  2287. raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")
  2288. self.minLen = min
  2289. if max > 0:
  2290. self.maxLen = max
  2291. else:
  2292. self.maxLen = _MAX_INT
  2293. if exact > 0:
  2294. self.maxLen = exact
  2295. self.minLen = exact
  2296. self.name = _ustr(self)
  2297. self.errmsg = "Expected " + self.name
  2298. self.mayIndexError = False
  2299. self.asKeyword = asKeyword
  2300. if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):
  2301. if self.bodyCharsOrig == self.initCharsOrig:
  2302. self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
  2303. elif len(self.initCharsOrig) == 1:
  2304. self.reString = "%s[%s]*" % \
  2305. (re.escape(self.initCharsOrig),
  2306. _escapeRegexRangeChars(self.bodyCharsOrig),)
  2307. else:
  2308. self.reString = "[%s][%s]*" % \
  2309. (_escapeRegexRangeChars(self.initCharsOrig),
  2310. _escapeRegexRangeChars(self.bodyCharsOrig),)
  2311. if self.asKeyword:
  2312. self.reString = r"\b"+self.reString+r"\b"
  2313. try:
  2314. self.re = re.compile( self.reString )
  2315. except Exception:
  2316. self.re = None
  2317. def parseImpl( self, instring, loc, doActions=True ):
  2318. if self.re:
  2319. result = self.re.match(instring,loc)
  2320. if not result:
  2321. raise ParseException(instring, loc, self.errmsg, self)
  2322. loc = result.end()
  2323. return loc, result.group()
  2324. if not(instring[ loc ] in self.initChars):
  2325. raise ParseException(instring, loc, self.errmsg, self)
  2326. start = loc
  2327. loc += 1
  2328. instrlen = len(instring)
  2329. bodychars = self.bodyChars
  2330. maxloc = start + self.maxLen
  2331. maxloc = min( maxloc, instrlen )
  2332. while loc < maxloc and instring[loc] in bodychars:
  2333. loc += 1
  2334. throwException = False
  2335. if loc - start < self.minLen:
  2336. throwException = True
  2337. if self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
  2338. throwException = True
  2339. if self.asKeyword:
  2340. if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars):
  2341. throwException = True
  2342. if throwException:
  2343. raise ParseException(instring, loc, self.errmsg, self)
  2344. return loc, instring[start:loc]
  2345. def __str__( self ):
  2346. try:
  2347. return super(Word,self).__str__()
  2348. except Exception:
  2349. pass
  2350. if self.strRepr is None:
  2351. def charsAsStr(s):
  2352. if len(s)>4:
  2353. return s[:4]+"..."
  2354. else:
  2355. return s
  2356. if ( self.initCharsOrig != self.bodyCharsOrig ):
  2357. self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
  2358. else:
  2359. self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
  2360. return self.strRepr
  2361. class Regex(Token):
  2362. r"""
  2363. Token for matching strings that match a given regular expression.
  2364. Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.
  2365. If the given regex contains named groups (defined using C{(?P<name>...)}), these will be preserved as
  2366. named parse results.
  2367. Example::
  2368. realnum = Regex(r"[+-]?\d+\.\d*")
  2369. date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)')
  2370. # ref: http://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression
  2371. roman = Regex(r"M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})")
  2372. """
  2373. compiledREtype = type(re.compile("[A-Z]"))
  2374. def __init__( self, pattern, flags=0):
  2375. """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags."""
  2376. super(Regex,self).__init__()
  2377. if isinstance(pattern, basestring):
  2378. if not pattern:
  2379. warnings.warn("null string passed to Regex; use Empty() instead",
  2380. SyntaxWarning, stacklevel=2)
  2381. self.pattern = pattern
  2382. self.flags = flags
  2383. try:
  2384. self.re = re.compile(self.pattern, self.flags)
  2385. self.reString = self.pattern
  2386. except sre_constants.error:
  2387. warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
  2388. SyntaxWarning, stacklevel=2)
  2389. raise
  2390. elif isinstance(pattern, Regex.compiledREtype):
  2391. self.re = pattern
  2392. self.pattern = \
  2393. self.reString = str(pattern)
  2394. self.flags = flags
  2395. else:
  2396. raise ValueError("Regex may only be constructed with a string or a compiled RE object")
  2397. self.name = _ustr(self)
  2398. self.errmsg = "Expected " + self.name
  2399. self.mayIndexError = False
  2400. self.mayReturnEmpty = True
  2401. def parseImpl( self, instring, loc, doActions=True ):
  2402. result = self.re.match(instring,loc)
  2403. if not result:
  2404. raise ParseException(instring, loc, self.errmsg, self)
  2405. loc = result.end()
  2406. d = result.groupdict()
  2407. ret = ParseResults(result.group())
  2408. if d:
  2409. for k in d:
  2410. ret[k] = d[k]
  2411. return loc,ret
  2412. def __str__( self ):
  2413. try:
  2414. return super(Regex,self).__str__()
  2415. except Exception:
  2416. pass
  2417. if self.strRepr is None:
  2418. self.strRepr = "Re:(%s)" % repr(self.pattern)
  2419. return self.strRepr
  2420. class QuotedString(Token):
  2421. r"""
  2422. Token for matching strings that are delimited by quoting characters.
  2423. Defined with the following parameters:
  2424. - quoteChar - string of one or more characters defining the quote delimiting string
  2425. - escChar - character to escape quotes, typically backslash (default=C{None})
  2426. - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=C{None})
  2427. - multiline - boolean indicating whether quotes can span multiple lines (default=C{False})
  2428. - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True})
  2429. - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar)
  2430. - convertWhitespaceEscapes - convert escaped whitespace (C{'\t'}, C{'\n'}, etc.) to actual whitespace (default=C{True})
  2431. Example::
  2432. qs = QuotedString('"')
  2433. print(qs.searchString('lsjdf "This is the quote" sldjf'))
  2434. complex_qs = QuotedString('{{', endQuoteChar='}}')
  2435. print(complex_qs.searchString('lsjdf {{This is the "quote"}} sldjf'))
  2436. sql_qs = QuotedString('"', escQuote='""')
  2437. print(sql_qs.searchString('lsjdf "This is the quote with ""embedded"" quotes" sldjf'))
  2438. prints::
  2439. [['This is the quote']]
  2440. [['This is the "quote"']]
  2441. [['This is the quote with "embedded" quotes']]
  2442. """
  2443. def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True):
  2444. super(QuotedString,self).__init__()
  2445. # remove white space from quote chars - wont work anyway
  2446. quoteChar = quoteChar.strip()
  2447. if not quoteChar:
  2448. warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
  2449. raise SyntaxError()
  2450. if endQuoteChar is None:
  2451. endQuoteChar = quoteChar
  2452. else:
  2453. endQuoteChar = endQuoteChar.strip()
  2454. if not endQuoteChar:
  2455. warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
  2456. raise SyntaxError()
  2457. self.quoteChar = quoteChar
  2458. self.quoteCharLen = len(quoteChar)
  2459. self.firstQuoteChar = quoteChar[0]
  2460. self.endQuoteChar = endQuoteChar
  2461. self.endQuoteCharLen = len(endQuoteChar)
  2462. self.escChar = escChar
  2463. self.escQuote = escQuote
  2464. self.unquoteResults = unquoteResults
  2465. self.convertWhitespaceEscapes = convertWhitespaceEscapes
  2466. if multiline:
  2467. self.flags = re.MULTILINE | re.DOTALL
  2468. self.pattern = r'%s(?:[^%s%s]' % \
  2469. ( re.escape(self.quoteChar),
  2470. _escapeRegexRangeChars(self.endQuoteChar[0]),
  2471. (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
  2472. else:
  2473. self.flags = 0
  2474. self.pattern = r'%s(?:[^%s\n\r%s]' % \
  2475. ( re.escape(self.quoteChar),
  2476. _escapeRegexRangeChars(self.endQuoteChar[0]),
  2477. (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
  2478. if len(self.endQuoteChar) > 1:
  2479. self.pattern += (
  2480. '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
  2481. _escapeRegexRangeChars(self.endQuoteChar[i]))
  2482. for i in range(len(self.endQuoteChar)-1,0,-1)) + ')'
  2483. )
  2484. if escQuote:
  2485. self.pattern += (r'|(?:%s)' % re.escape(escQuote))
  2486. if escChar:
  2487. self.pattern += (r'|(?:%s.)' % re.escape(escChar))
  2488. self.escCharReplacePattern = re.escape(self.escChar)+"(.)"
  2489. self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
  2490. try:
  2491. self.re = re.compile(self.pattern, self.flags)
  2492. self.reString = self.pattern
  2493. except sre_constants.error:
  2494. warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,
  2495. SyntaxWarning, stacklevel=2)
  2496. raise
  2497. self.name = _ustr(self)
  2498. self.errmsg = "Expected " + self.name
  2499. self.mayIndexError = False
  2500. self.mayReturnEmpty = True
  2501. def parseImpl( self, instring, loc, doActions=True ):
  2502. result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None
  2503. if not result:
  2504. raise ParseException(instring, loc, self.errmsg, self)
  2505. loc = result.end()
  2506. ret = result.group()
  2507. if self.unquoteResults:
  2508. # strip off quotes
  2509. ret = ret[self.quoteCharLen:-self.endQuoteCharLen]
  2510. if isinstance(ret,basestring):
  2511. # replace escaped whitespace
  2512. if '\\' in ret and self.convertWhitespaceEscapes:
  2513. ws_map = {
  2514. r'\t' : '\t',
  2515. r'\n' : '\n',
  2516. r'\f' : '\f',
  2517. r'\r' : '\r',
  2518. }
  2519. for wslit,wschar in ws_map.items():
  2520. ret = ret.replace(wslit, wschar)
  2521. # replace escaped characters
  2522. if self.escChar:
  2523. ret = re.sub(self.escCharReplacePattern, r"\g<1>", ret)
  2524. # replace escaped quotes
  2525. if self.escQuote:
  2526. ret = ret.replace(self.escQuote, self.endQuoteChar)
  2527. return loc, ret
  2528. def __str__( self ):
  2529. try:
  2530. return super(QuotedString,self).__str__()
  2531. except Exception:
  2532. pass
  2533. if self.strRepr is None:
  2534. self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)
  2535. return self.strRepr
  2536. class CharsNotIn(Token):
  2537. """
  2538. Token for matching words composed of characters I{not} in a given set (will
  2539. include whitespace in matched characters if not listed in the provided exclusion set - see example).
  2540. Defined with string containing all disallowed characters, and an optional
  2541. minimum, maximum, and/or exact length. The default value for C{min} is 1 (a
  2542. minimum value < 1 is not valid); the default values for C{max} and C{exact}
  2543. are 0, meaning no maximum or exact length restriction.
  2544. Example::
  2545. # define a comma-separated-value as anything that is not a ','
  2546. csv_value = CharsNotIn(',')
  2547. print(delimitedList(csv_value).parseString("dkls,lsdkjf,s12 34,@!#,213"))
  2548. prints::
  2549. ['dkls', 'lsdkjf', 's12 34', '@!#', '213']
  2550. """
  2551. def __init__( self, notChars, min=1, max=0, exact=0 ):
  2552. super(CharsNotIn,self).__init__()
  2553. self.skipWhitespace = False
  2554. self.notChars = notChars
  2555. if min < 1:
  2556. raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted")
  2557. self.minLen = min
  2558. if max > 0:
  2559. self.maxLen = max
  2560. else:
  2561. self.maxLen = _MAX_INT
  2562. if exact > 0:
  2563. self.maxLen = exact
  2564. self.minLen = exact
  2565. self.name = _ustr(self)
  2566. self.errmsg = "Expected " + self.name
  2567. self.mayReturnEmpty = ( self.minLen == 0 )
  2568. self.mayIndexError = False
  2569. def parseImpl( self, instring, loc, doActions=True ):
  2570. if instring[loc] in self.notChars:
  2571. raise ParseException(instring, loc, self.errmsg, self)
  2572. start = loc
  2573. loc += 1
  2574. notchars = self.notChars
  2575. maxlen = min( start+self.maxLen, len(instring) )
  2576. while loc < maxlen and \
  2577. (instring[loc] not in notchars):
  2578. loc += 1
  2579. if loc - start < self.minLen:
  2580. raise ParseException(instring, loc, self.errmsg, self)
  2581. return loc, instring[start:loc]
  2582. def __str__( self ):
  2583. try:
  2584. return super(CharsNotIn, self).__str__()
  2585. except Exception:
  2586. pass
  2587. if self.strRepr is None:
  2588. if len(self.notChars) > 4:
  2589. self.strRepr = "!W:(%s...)" % self.notChars[:4]
  2590. else:
  2591. self.strRepr = "!W:(%s)" % self.notChars
  2592. return self.strRepr
  2593. class White(Token):
  2594. """
  2595. Special matching class for matching whitespace. Normally, whitespace is ignored
  2596. by pyparsing grammars. This class is included when some whitespace structures
  2597. are significant. Define with a string containing the whitespace characters to be
  2598. matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments,
  2599. as defined for the C{L{Word}} class.
  2600. """
  2601. whiteStrs = {
  2602. " " : "<SPC>",
  2603. "\t": "<TAB>",
  2604. "\n": "<LF>",
  2605. "\r": "<CR>",
  2606. "\f": "<FF>",
  2607. }
  2608. def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
  2609. super(White,self).__init__()
  2610. self.matchWhite = ws
  2611. self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in self.matchWhite) )
  2612. #~ self.leaveWhitespace()
  2613. self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite))
  2614. self.mayReturnEmpty = True
  2615. self.errmsg = "Expected " + self.name
  2616. self.minLen = min
  2617. if max > 0:
  2618. self.maxLen = max
  2619. else:
  2620. self.maxLen = _MAX_INT
  2621. if exact > 0:
  2622. self.maxLen = exact
  2623. self.minLen = exact
  2624. def parseImpl( self, instring, loc, doActions=True ):
  2625. if not(instring[ loc ] in self.matchWhite):
  2626. raise ParseException(instring, loc, self.errmsg, self)
  2627. start = loc
  2628. loc += 1
  2629. maxloc = start + self.maxLen
  2630. maxloc = min( maxloc, len(instring) )
  2631. while loc < maxloc and instring[loc] in self.matchWhite:
  2632. loc += 1
  2633. if loc - start < self.minLen:
  2634. raise ParseException(instring, loc, self.errmsg, self)
  2635. return loc, instring[start:loc]
  2636. class _PositionToken(Token):
  2637. def __init__( self ):
  2638. super(_PositionToken,self).__init__()
  2639. self.name=self.__class__.__name__
  2640. self.mayReturnEmpty = True
  2641. self.mayIndexError = False
  2642. class GoToColumn(_PositionToken):
  2643. """
  2644. Token to advance to a specific column of input text; useful for tabular report scraping.
  2645. """
  2646. def __init__( self, colno ):
  2647. super(GoToColumn,self).__init__()
  2648. self.col = colno
  2649. def preParse( self, instring, loc ):
  2650. if col(loc,instring) != self.col:
  2651. instrlen = len(instring)
  2652. if self.ignoreExprs:
  2653. loc = self._skipIgnorables( instring, loc )
  2654. while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :
  2655. loc += 1
  2656. return loc
  2657. def parseImpl( self, instring, loc, doActions=True ):
  2658. thiscol = col( loc, instring )
  2659. if thiscol > self.col:
  2660. raise ParseException( instring, loc, "Text not in expected column", self )
  2661. newloc = loc + self.col - thiscol
  2662. ret = instring[ loc: newloc ]
  2663. return newloc, ret
  2664. class LineStart(_PositionToken):
  2665. """
  2666. Matches if current position is at the beginning of a line within the parse string
  2667. Example::
  2668. test = '''\
  2669. AAA this line
  2670. AAA and this line
  2671. AAA but not this one
  2672. B AAA and definitely not this one
  2673. '''
  2674. for t in (LineStart() + 'AAA' + restOfLine).searchString(test):
  2675. print(t)
  2676. Prints::
  2677. ['AAA', ' this line']
  2678. ['AAA', ' and this line']
  2679. """
  2680. def __init__( self ):
  2681. super(LineStart,self).__init__()
  2682. self.errmsg = "Expected start of line"
  2683. def parseImpl( self, instring, loc, doActions=True ):
  2684. if col(loc, instring) == 1:
  2685. return loc, []
  2686. raise ParseException(instring, loc, self.errmsg, self)
  2687. class LineEnd(_PositionToken):
  2688. """
  2689. Matches if current position is at the end of a line within the parse string
  2690. """
  2691. def __init__( self ):
  2692. super(LineEnd,self).__init__()
  2693. self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
  2694. self.errmsg = "Expected end of line"
  2695. def parseImpl( self, instring, loc, doActions=True ):
  2696. if loc<len(instring):
  2697. if instring[loc] == "\n":
  2698. return loc+1, "\n"
  2699. else:
  2700. raise ParseException(instring, loc, self.errmsg, self)
  2701. elif loc == len(instring):
  2702. return loc+1, []
  2703. else:
  2704. raise ParseException(instring, loc, self.errmsg, self)
  2705. class StringStart(_PositionToken):
  2706. """
  2707. Matches if current position is at the beginning of the parse string
  2708. """
  2709. def __init__( self ):
  2710. super(StringStart,self).__init__()
  2711. self.errmsg = "Expected start of text"
  2712. def parseImpl( self, instring, loc, doActions=True ):
  2713. if loc != 0:
  2714. # see if entire string up to here is just whitespace and ignoreables
  2715. if loc != self.preParse( instring, 0 ):
  2716. raise ParseException(instring, loc, self.errmsg, self)
  2717. return loc, []
  2718. class StringEnd(_PositionToken):
  2719. """
  2720. Matches if current position is at the end of the parse string
  2721. """
  2722. def __init__( self ):
  2723. super(StringEnd,self).__init__()
  2724. self.errmsg = "Expected end of text"
  2725. def parseImpl( self, instring, loc, doActions=True ):
  2726. if loc < len(instring):
  2727. raise ParseException(instring, loc, self.errmsg, self)
  2728. elif loc == len(instring):
  2729. return loc+1, []
  2730. elif loc > len(instring):
  2731. return loc, []
  2732. else:
  2733. raise ParseException(instring, loc, self.errmsg, self)
  2734. class WordStart(_PositionToken):
  2735. """
  2736. Matches if the current position is at the beginning of a Word, and
  2737. is not preceded by any character in a given set of C{wordChars}
  2738. (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
  2739. use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of
  2740. the string being parsed, or at the beginning of a line.
  2741. """
  2742. def __init__(self, wordChars = printables):
  2743. super(WordStart,self).__init__()
  2744. self.wordChars = set(wordChars)
  2745. self.errmsg = "Not at the start of a word"
  2746. def parseImpl(self, instring, loc, doActions=True ):
  2747. if loc != 0:
  2748. if (instring[loc-1] in self.wordChars or
  2749. instring[loc] not in self.wordChars):
  2750. raise ParseException(instring, loc, self.errmsg, self)
  2751. return loc, []
  2752. class WordEnd(_PositionToken):
  2753. """
  2754. Matches if the current position is at the end of a Word, and
  2755. is not followed by any character in a given set of C{wordChars}
  2756. (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
  2757. use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of
  2758. the string being parsed, or at the end of a line.
  2759. """
  2760. def __init__(self, wordChars = printables):
  2761. super(WordEnd,self).__init__()
  2762. self.wordChars = set(wordChars)
  2763. self.skipWhitespace = False
  2764. self.errmsg = "Not at the end of a word"
  2765. def parseImpl(self, instring, loc, doActions=True ):
  2766. instrlen = len(instring)
  2767. if instrlen>0 and loc<instrlen:
  2768. if (instring[loc] in self.wordChars or
  2769. instring[loc-1] not in self.wordChars):
  2770. raise ParseException(instring, loc, self.errmsg, self)
  2771. return loc, []
  2772. class ParseExpression(ParserElement):
  2773. """
  2774. Abstract subclass of ParserElement, for combining and post-processing parsed tokens.
  2775. """
  2776. def __init__( self, exprs, savelist = False ):
  2777. super(ParseExpression,self).__init__(savelist)
  2778. if isinstance( exprs, _generatorType ):
  2779. exprs = list(exprs)
  2780. if isinstance( exprs, basestring ):
  2781. self.exprs = [ ParserElement._literalStringClass( exprs ) ]
  2782. elif isinstance( exprs, collections.Iterable ):
  2783. exprs = list(exprs)
  2784. # if sequence of strings provided, wrap with Literal
  2785. if all(isinstance(expr, basestring) for expr in exprs):
  2786. exprs = map(ParserElement._literalStringClass, exprs)
  2787. self.exprs = list(exprs)
  2788. else:
  2789. try:
  2790. self.exprs = list( exprs )
  2791. except TypeError:
  2792. self.exprs = [ exprs ]
  2793. self.callPreparse = False
  2794. def __getitem__( self, i ):
  2795. return self.exprs[i]
  2796. def append( self, other ):
  2797. self.exprs.append( other )
  2798. self.strRepr = None
  2799. return self
  2800. def leaveWhitespace( self ):
  2801. """Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on
  2802. all contained expressions."""
  2803. self.skipWhitespace = False
  2804. self.exprs = [ e.copy() for e in self.exprs ]
  2805. for e in self.exprs:
  2806. e.leaveWhitespace()
  2807. return self
  2808. def ignore( self, other ):
  2809. if isinstance( other, Suppress ):
  2810. if other not in self.ignoreExprs:
  2811. super( ParseExpression, self).ignore( other )
  2812. for e in self.exprs:
  2813. e.ignore( self.ignoreExprs[-1] )
  2814. else:
  2815. super( ParseExpression, self).ignore( other )
  2816. for e in self.exprs:
  2817. e.ignore( self.ignoreExprs[-1] )
  2818. return self
  2819. def __str__( self ):
  2820. try:
  2821. return super(ParseExpression,self).__str__()
  2822. except Exception:
  2823. pass
  2824. if self.strRepr is None:
  2825. self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )
  2826. return self.strRepr
  2827. def streamline( self ):
  2828. super(ParseExpression,self).streamline()
  2829. for e in self.exprs:
  2830. e.streamline()
  2831. # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d )
  2832. # but only if there are no parse actions or resultsNames on the nested And's
  2833. # (likewise for Or's and MatchFirst's)
  2834. if ( len(self.exprs) == 2 ):
  2835. other = self.exprs[0]
  2836. if ( isinstance( other, self.__class__ ) and
  2837. not(other.parseAction) and
  2838. other.resultsName is None and
  2839. not other.debug ):
  2840. self.exprs = other.exprs[:] + [ self.exprs[1] ]
  2841. self.strRepr = None
  2842. self.mayReturnEmpty |= other.mayReturnEmpty
  2843. self.mayIndexError |= other.mayIndexError
  2844. other = self.exprs[-1]
  2845. if ( isinstance( other, self.__class__ ) and
  2846. not(other.parseAction) and
  2847. other.resultsName is None and
  2848. not other.debug ):
  2849. self.exprs = self.exprs[:-1] + other.exprs[:]
  2850. self.strRepr = None
  2851. self.mayReturnEmpty |= other.mayReturnEmpty
  2852. self.mayIndexError |= other.mayIndexError
  2853. self.errmsg = "Expected " + _ustr(self)
  2854. return self
  2855. def setResultsName( self, name, listAllMatches=False ):
  2856. ret = super(ParseExpression,self).setResultsName(name,listAllMatches)
  2857. return ret
  2858. def validate( self, validateTrace=[] ):
  2859. tmp = validateTrace[:]+[self]
  2860. for e in self.exprs:
  2861. e.validate(tmp)
  2862. self.checkRecursion( [] )
  2863. def copy(self):
  2864. ret = super(ParseExpression,self).copy()
  2865. ret.exprs = [e.copy() for e in self.exprs]
  2866. return ret
  2867. class And(ParseExpression):
  2868. """
  2869. Requires all given C{ParseExpression}s to be found in the given order.
  2870. Expressions may be separated by whitespace.
  2871. May be constructed using the C{'+'} operator.
  2872. May also be constructed using the C{'-'} operator, which will suppress backtracking.
  2873. Example::
  2874. integer = Word(nums)
  2875. name_expr = OneOrMore(Word(alphas))
  2876. expr = And([integer("id"),name_expr("name"),integer("age")])
  2877. # more easily written as:
  2878. expr = integer("id") + name_expr("name") + integer("age")
  2879. """
  2880. class _ErrorStop(Empty):
  2881. def __init__(self, *args, **kwargs):
  2882. super(And._ErrorStop,self).__init__(*args, **kwargs)
  2883. self.name = '-'
  2884. self.leaveWhitespace()
  2885. def __init__( self, exprs, savelist = True ):
  2886. super(And,self).__init__(exprs, savelist)
  2887. self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
  2888. self.setWhitespaceChars( self.exprs[0].whiteChars )
  2889. self.skipWhitespace = self.exprs[0].skipWhitespace
  2890. self.callPreparse = True
  2891. def parseImpl( self, instring, loc, doActions=True ):
  2892. # pass False as last arg to _parse for first element, since we already
  2893. # pre-parsed the string as part of our And pre-parsing
  2894. loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
  2895. errorStop = False
  2896. for e in self.exprs[1:]:
  2897. if isinstance(e, And._ErrorStop):
  2898. errorStop = True
  2899. continue
  2900. if errorStop:
  2901. try:
  2902. loc, exprtokens = e._parse( instring, loc, doActions )
  2903. except ParseSyntaxException:
  2904. raise
  2905. except ParseBaseException as pe:
  2906. pe.__traceback__ = None
  2907. raise ParseSyntaxException._from_exception(pe)
  2908. except IndexError:
  2909. raise ParseSyntaxException(instring, len(instring), self.errmsg, self)
  2910. else:
  2911. loc, exprtokens = e._parse( instring, loc, doActions )
  2912. if exprtokens or exprtokens.haskeys():
  2913. resultlist += exprtokens
  2914. return loc, resultlist
  2915. def __iadd__(self, other ):
  2916. if isinstance( other, basestring ):
  2917. other = ParserElement._literalStringClass( other )
  2918. return self.append( other ) #And( [ self, other ] )
  2919. def checkRecursion( self, parseElementList ):
  2920. subRecCheckList = parseElementList[:] + [ self ]
  2921. for e in self.exprs:
  2922. e.checkRecursion( subRecCheckList )
  2923. if not e.mayReturnEmpty:
  2924. break
  2925. def __str__( self ):
  2926. if hasattr(self,"name"):
  2927. return self.name
  2928. if self.strRepr is None:
  2929. self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}"
  2930. return self.strRepr
  2931. class Or(ParseExpression):
  2932. """
  2933. Requires that at least one C{ParseExpression} is found.
  2934. If two expressions match, the expression that matches the longest string will be used.
  2935. May be constructed using the C{'^'} operator.
  2936. Example::
  2937. # construct Or using '^' operator
  2938. number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums))
  2939. print(number.searchString("123 3.1416 789"))
  2940. prints::
  2941. [['123'], ['3.1416'], ['789']]
  2942. """
  2943. def __init__( self, exprs, savelist = False ):
  2944. super(Or,self).__init__(exprs, savelist)
  2945. if self.exprs:
  2946. self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
  2947. else:
  2948. self.mayReturnEmpty = True
  2949. def parseImpl( self, instring, loc, doActions=True ):
  2950. maxExcLoc = -1
  2951. maxException = None
  2952. matches = []
  2953. for e in self.exprs:
  2954. try:
  2955. loc2 = e.tryParse( instring, loc )
  2956. except ParseException as err:
  2957. err.__traceback__ = None
  2958. if err.loc > maxExcLoc:
  2959. maxException = err
  2960. maxExcLoc = err.loc
  2961. except IndexError:
  2962. if len(instring) > maxExcLoc:
  2963. maxException = ParseException(instring,len(instring),e.errmsg,self)
  2964. maxExcLoc = len(instring)
  2965. else:
  2966. # save match among all matches, to retry longest to shortest
  2967. matches.append((loc2, e))
  2968. if matches:
  2969. matches.sort(key=lambda x: -x[0])
  2970. for _,e in matches:
  2971. try:
  2972. return e._parse( instring, loc, doActions )
  2973. except ParseException as err:
  2974. err.__traceback__ = None
  2975. if err.loc > maxExcLoc:
  2976. maxException = err
  2977. maxExcLoc = err.loc
  2978. if maxException is not None:
  2979. maxException.msg = self.errmsg
  2980. raise maxException
  2981. else:
  2982. raise ParseException(instring, loc, "no defined alternatives to match", self)
  2983. def __ixor__(self, other ):
  2984. if isinstance( other, basestring ):
  2985. other = ParserElement._literalStringClass( other )
  2986. return self.append( other ) #Or( [ self, other ] )
  2987. def __str__( self ):
  2988. if hasattr(self,"name"):
  2989. return self.name
  2990. if self.strRepr is None:
  2991. self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}"
  2992. return self.strRepr
  2993. def checkRecursion( self, parseElementList ):
  2994. subRecCheckList = parseElementList[:] + [ self ]
  2995. for e in self.exprs:
  2996. e.checkRecursion( subRecCheckList )
  2997. class MatchFirst(ParseExpression):
  2998. """
  2999. Requires that at least one C{ParseExpression} is found.
  3000. If two expressions match, the first one listed is the one that will match.
  3001. May be constructed using the C{'|'} operator.
  3002. Example::
  3003. # construct MatchFirst using '|' operator
  3004. # watch the order of expressions to match
  3005. number = Word(nums) | Combine(Word(nums) + '.' + Word(nums))
  3006. print(number.searchString("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']]
  3007. # put more selective expression first
  3008. number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums)
  3009. print(number.searchString("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']]
  3010. """
  3011. def __init__( self, exprs, savelist = False ):
  3012. super(MatchFirst,self).__init__(exprs, savelist)
  3013. if self.exprs:
  3014. self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
  3015. else:
  3016. self.mayReturnEmpty = True
  3017. def parseImpl( self, instring, loc, doActions=True ):
  3018. maxExcLoc = -1
  3019. maxException = None
  3020. for e in self.exprs:
  3021. try:
  3022. ret = e._parse( instring, loc, doActions )
  3023. return ret
  3024. except ParseException as err:
  3025. if err.loc > maxExcLoc:
  3026. maxException = err
  3027. maxExcLoc = err.loc
  3028. except IndexError:
  3029. if len(instring) > maxExcLoc:
  3030. maxException = ParseException(instring,len(instring),e.errmsg,self)
  3031. maxExcLoc = len(instring)
  3032. # only got here if no expression matched, raise exception for match that made it the furthest
  3033. else:
  3034. if maxException is not None:
  3035. maxException.msg = self.errmsg
  3036. raise maxException
  3037. else:
  3038. raise ParseException(instring, loc, "no defined alternatives to match", self)
  3039. def __ior__(self, other ):
  3040. if isinstance( other, basestring ):
  3041. other = ParserElement._literalStringClass( other )
  3042. return self.append( other ) #MatchFirst( [ self, other ] )
  3043. def __str__( self ):
  3044. if hasattr(self,"name"):
  3045. return self.name
  3046. if self.strRepr is None:
  3047. self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}"
  3048. return self.strRepr
  3049. def checkRecursion( self, parseElementList ):
  3050. subRecCheckList = parseElementList[:] + [ self ]
  3051. for e in self.exprs:
  3052. e.checkRecursion( subRecCheckList )
  3053. class Each(ParseExpression):
  3054. """
  3055. Requires all given C{ParseExpression}s to be found, but in any order.
  3056. Expressions may be separated by whitespace.
  3057. May be constructed using the C{'&'} operator.
  3058. Example::
  3059. color = oneOf("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN")
  3060. shape_type = oneOf("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON")
  3061. integer = Word(nums)
  3062. shape_attr = "shape:" + shape_type("shape")
  3063. posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn")
  3064. color_attr = "color:" + color("color")
  3065. size_attr = "size:" + integer("size")
  3066. # use Each (using operator '&') to accept attributes in any order
  3067. # (shape and posn are required, color and size are optional)
  3068. shape_spec = shape_attr & posn_attr & Optional(color_attr) & Optional(size_attr)
  3069. shape_spec.runTests('''
  3070. shape: SQUARE color: BLACK posn: 100, 120
  3071. shape: CIRCLE size: 50 color: BLUE posn: 50,80
  3072. color:GREEN size:20 shape:TRIANGLE posn:20,40
  3073. '''
  3074. )
  3075. prints::
  3076. shape: SQUARE color: BLACK posn: 100, 120
  3077. ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']]
  3078. - color: BLACK
  3079. - posn: ['100', ',', '120']
  3080. - x: 100
  3081. - y: 120
  3082. - shape: SQUARE
  3083. shape: CIRCLE size: 50 color: BLUE posn: 50,80
  3084. ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']]
  3085. - color: BLUE
  3086. - posn: ['50', ',', '80']
  3087. - x: 50
  3088. - y: 80
  3089. - shape: CIRCLE
  3090. - size: 50
  3091. color: GREEN size: 20 shape: TRIANGLE posn: 20,40
  3092. ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']]
  3093. - color: GREEN
  3094. - posn: ['20', ',', '40']
  3095. - x: 20
  3096. - y: 40
  3097. - shape: TRIANGLE
  3098. - size: 20
  3099. """
  3100. def __init__( self, exprs, savelist = True ):
  3101. super(Each,self).__init__(exprs, savelist)
  3102. self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
  3103. self.skipWhitespace = True
  3104. self.initExprGroups = True
  3105. def parseImpl( self, instring, loc, doActions=True ):
  3106. if self.initExprGroups:
  3107. self.opt1map = dict((id(e.expr),e) for e in self.exprs if isinstance(e,Optional))
  3108. opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ]
  3109. opt2 = [ e for e in self.exprs if e.mayReturnEmpty and not isinstance(e,Optional)]
  3110. self.optionals = opt1 + opt2
  3111. self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ]
  3112. self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ]
  3113. self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]
  3114. self.required += self.multirequired
  3115. self.initExprGroups = False
  3116. tmpLoc = loc
  3117. tmpReqd = self.required[:]
  3118. tmpOpt = self.optionals[:]
  3119. matchOrder = []
  3120. keepMatching = True
  3121. while keepMatching:
  3122. tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
  3123. failed = []
  3124. for e in tmpExprs:
  3125. try:
  3126. tmpLoc = e.tryParse( instring, tmpLoc )
  3127. except ParseException:
  3128. failed.append(e)
  3129. else:
  3130. matchOrder.append(self.opt1map.get(id(e),e))
  3131. if e in tmpReqd:
  3132. tmpReqd.remove(e)
  3133. elif e in tmpOpt:
  3134. tmpOpt.remove(e)
  3135. if len(failed) == len(tmpExprs):
  3136. keepMatching = False
  3137. if tmpReqd:
  3138. missing = ", ".join(_ustr(e) for e in tmpReqd)
  3139. raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
  3140. # add any unmatched Optionals, in case they have default values defined
  3141. matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt]
  3142. resultlist = []
  3143. for e in matchOrder:
  3144. loc,results = e._parse(instring,loc,doActions)
  3145. resultlist.append(results)
  3146. finalResults = sum(resultlist, ParseResults([]))
  3147. return loc, finalResults
  3148. def __str__( self ):
  3149. if hasattr(self,"name"):
  3150. return self.name
  3151. if self.strRepr is None:
  3152. self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}"
  3153. return self.strRepr
  3154. def checkRecursion( self, parseElementList ):
  3155. subRecCheckList = parseElementList[:] + [ self ]
  3156. for e in self.exprs:
  3157. e.checkRecursion( subRecCheckList )
  3158. class ParseElementEnhance(ParserElement):
  3159. """
  3160. Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens.
  3161. """
  3162. def __init__( self, expr, savelist=False ):
  3163. super(ParseElementEnhance,self).__init__(savelist)
  3164. if isinstance( expr, basestring ):
  3165. if issubclass(ParserElement._literalStringClass, Token):
  3166. expr = ParserElement._literalStringClass(expr)
  3167. else:
  3168. expr = ParserElement._literalStringClass(Literal(expr))
  3169. self.expr = expr
  3170. self.strRepr = None
  3171. if expr is not None:
  3172. self.mayIndexError = expr.mayIndexError
  3173. self.mayReturnEmpty = expr.mayReturnEmpty
  3174. self.setWhitespaceChars( expr.whiteChars )
  3175. self.skipWhitespace = expr.skipWhitespace
  3176. self.saveAsList = expr.saveAsList
  3177. self.callPreparse = expr.callPreparse
  3178. self.ignoreExprs.extend(expr.ignoreExprs)
  3179. def parseImpl( self, instring, loc, doActions=True ):
  3180. if self.expr is not None:
  3181. return self.expr._parse( instring, loc, doActions, callPreParse=False )
  3182. else:
  3183. raise ParseException("",loc,self.errmsg,self)
  3184. def leaveWhitespace( self ):
  3185. self.skipWhitespace = False
  3186. self.expr = self.expr.copy()
  3187. if self.expr is not None:
  3188. self.expr.leaveWhitespace()
  3189. return self
  3190. def ignore( self, other ):
  3191. if isinstance( other, Suppress ):
  3192. if other not in self.ignoreExprs:
  3193. super( ParseElementEnhance, self).ignore( other )
  3194. if self.expr is not None:
  3195. self.expr.ignore( self.ignoreExprs[-1] )
  3196. else:
  3197. super( ParseElementEnhance, self).ignore( other )
  3198. if self.expr is not None:
  3199. self.expr.ignore( self.ignoreExprs[-1] )
  3200. return self
  3201. def streamline( self ):
  3202. super(ParseElementEnhance,self).streamline()
  3203. if self.expr is not None:
  3204. self.expr.streamline()
  3205. return self
  3206. def checkRecursion( self, parseElementList ):
  3207. if self in parseElementList:
  3208. raise RecursiveGrammarException( parseElementList+[self] )
  3209. subRecCheckList = parseElementList[:] + [ self ]
  3210. if self.expr is not None:
  3211. self.expr.checkRecursion( subRecCheckList )
  3212. def validate( self, validateTrace=[] ):
  3213. tmp = validateTrace[:]+[self]
  3214. if self.expr is not None:
  3215. self.expr.validate(tmp)
  3216. self.checkRecursion( [] )
  3217. def __str__( self ):
  3218. try:
  3219. return super(ParseElementEnhance,self).__str__()
  3220. except Exception:
  3221. pass
  3222. if self.strRepr is None and self.expr is not None:
  3223. self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )
  3224. return self.strRepr
  3225. class FollowedBy(ParseElementEnhance):
  3226. """
  3227. Lookahead matching of the given parse expression. C{FollowedBy}
  3228. does I{not} advance the parsing position within the input string, it only
  3229. verifies that the specified parse expression matches at the current
  3230. position. C{FollowedBy} always returns a null token list.
  3231. Example::
  3232. # use FollowedBy to match a label only if it is followed by a ':'
  3233. data_word = Word(alphas)
  3234. label = data_word + FollowedBy(':')
  3235. attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
  3236. OneOrMore(attr_expr).parseString("shape: SQUARE color: BLACK posn: upper left").pprint()
  3237. prints::
  3238. [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']]
  3239. """
  3240. def __init__( self, expr ):
  3241. super(FollowedBy,self).__init__(expr)
  3242. self.mayReturnEmpty = True
  3243. def parseImpl( self, instring, loc, doActions=True ):
  3244. self.expr.tryParse( instring, loc )
  3245. return loc, []
  3246. class NotAny(ParseElementEnhance):
  3247. """
  3248. Lookahead to disallow matching with the given parse expression. C{NotAny}
  3249. does I{not} advance the parsing position within the input string, it only
  3250. verifies that the specified parse expression does I{not} match at the current
  3251. position. Also, C{NotAny} does I{not} skip over leading whitespace. C{NotAny}
  3252. always returns a null token list. May be constructed using the '~' operator.
  3253. Example::
  3254. """
  3255. def __init__( self, expr ):
  3256. super(NotAny,self).__init__(expr)
  3257. #~ self.leaveWhitespace()
  3258. self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs
  3259. self.mayReturnEmpty = True
  3260. self.errmsg = "Found unwanted token, "+_ustr(self.expr)
  3261. def parseImpl( self, instring, loc, doActions=True ):
  3262. if self.expr.canParseNext(instring, loc):
  3263. raise ParseException(instring, loc, self.errmsg, self)
  3264. return loc, []
  3265. def __str__( self ):
  3266. if hasattr(self,"name"):
  3267. return self.name
  3268. if self.strRepr is None:
  3269. self.strRepr = "~{" + _ustr(self.expr) + "}"
  3270. return self.strRepr
  3271. class _MultipleMatch(ParseElementEnhance):
  3272. def __init__( self, expr, stopOn=None):
  3273. super(_MultipleMatch, self).__init__(expr)
  3274. self.saveAsList = True
  3275. ender = stopOn
  3276. if isinstance(ender, basestring):
  3277. ender = ParserElement._literalStringClass(ender)
  3278. self.not_ender = ~ender if ender is not None else None
  3279. def parseImpl( self, instring, loc, doActions=True ):
  3280. self_expr_parse = self.expr._parse
  3281. self_skip_ignorables = self._skipIgnorables
  3282. check_ender = self.not_ender is not None
  3283. if check_ender:
  3284. try_not_ender = self.not_ender.tryParse
  3285. # must be at least one (but first see if we are the stopOn sentinel;
  3286. # if so, fail)
  3287. if check_ender:
  3288. try_not_ender(instring, loc)
  3289. loc, tokens = self_expr_parse( instring, loc, doActions, callPreParse=False )
  3290. try:
  3291. hasIgnoreExprs = (not not self.ignoreExprs)
  3292. while 1:
  3293. if check_ender:
  3294. try_not_ender(instring, loc)
  3295. if hasIgnoreExprs:
  3296. preloc = self_skip_ignorables( instring, loc )
  3297. else:
  3298. preloc = loc
  3299. loc, tmptokens = self_expr_parse( instring, preloc, doActions )
  3300. if tmptokens or tmptokens.haskeys():
  3301. tokens += tmptokens
  3302. except (ParseException,IndexError):
  3303. pass
  3304. return loc, tokens
  3305. class OneOrMore(_MultipleMatch):
  3306. """
  3307. Repetition of one or more of the given expression.
  3308. Parameters:
  3309. - expr - expression that must match one or more times
  3310. - stopOn - (default=C{None}) - expression for a terminating sentinel
  3311. (only required if the sentinel would ordinarily match the repetition
  3312. expression)
  3313. Example::
  3314. data_word = Word(alphas)
  3315. label = data_word + FollowedBy(':')
  3316. attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join))
  3317. text = "shape: SQUARE posn: upper left color: BLACK"
  3318. OneOrMore(attr_expr).parseString(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']]
  3319. # use stopOn attribute for OneOrMore to avoid reading label string as part of the data
  3320. attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
  3321. OneOrMore(attr_expr).parseString(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']]
  3322. # could also be written as
  3323. (attr_expr * (1,)).parseString(text).pprint()
  3324. """
  3325. def __str__( self ):
  3326. if hasattr(self,"name"):
  3327. return self.name
  3328. if self.strRepr is None:
  3329. self.strRepr = "{" + _ustr(self.expr) + "}..."
  3330. return self.strRepr
  3331. class ZeroOrMore(_MultipleMatch):
  3332. """
  3333. Optional repetition of zero or more of the given expression.
  3334. Parameters:
  3335. - expr - expression that must match zero or more times
  3336. - stopOn - (default=C{None}) - expression for a terminating sentinel
  3337. (only required if the sentinel would ordinarily match the repetition
  3338. expression)
  3339. Example: similar to L{OneOrMore}
  3340. """
  3341. def __init__( self, expr, stopOn=None):
  3342. super(ZeroOrMore,self).__init__(expr, stopOn=stopOn)
  3343. self.mayReturnEmpty = True
  3344. def parseImpl( self, instring, loc, doActions=True ):
  3345. try:
  3346. return super(ZeroOrMore, self).parseImpl(instring, loc, doActions)
  3347. except (ParseException,IndexError):
  3348. return loc, []
  3349. def __str__( self ):
  3350. if hasattr(self,"name"):
  3351. return self.name
  3352. if self.strRepr is None:
  3353. self.strRepr = "[" + _ustr(self.expr) + "]..."
  3354. return self.strRepr
  3355. class _NullToken(object):
  3356. def __bool__(self):
  3357. return False
  3358. __nonzero__ = __bool__
  3359. def __str__(self):
  3360. return ""
  3361. _optionalNotMatched = _NullToken()
  3362. class Optional(ParseElementEnhance):
  3363. """
  3364. Optional matching of the given expression.
  3365. Parameters:
  3366. - expr - expression that must match zero or more times
  3367. - default (optional) - value to be returned if the optional expression is not found.
  3368. Example::
  3369. # US postal code can be a 5-digit zip, plus optional 4-digit qualifier
  3370. zip = Combine(Word(nums, exact=5) + Optional('-' + Word(nums, exact=4)))
  3371. zip.runTests('''
  3372. # traditional ZIP code
  3373. 12345
  3374. # ZIP+4 form
  3375. 12101-0001
  3376. # invalid ZIP
  3377. 98765-
  3378. ''')
  3379. prints::
  3380. # traditional ZIP code
  3381. 12345
  3382. ['12345']
  3383. # ZIP+4 form
  3384. 12101-0001
  3385. ['12101-0001']
  3386. # invalid ZIP
  3387. 98765-
  3388. ^
  3389. FAIL: Expected end of text (at char 5), (line:1, col:6)
  3390. """
  3391. def __init__( self, expr, default=_optionalNotMatched ):
  3392. super(Optional,self).__init__( expr, savelist=False )
  3393. self.saveAsList = self.expr.saveAsList
  3394. self.defaultValue = default
  3395. self.mayReturnEmpty = True
  3396. def parseImpl( self, instring, loc, doActions=True ):
  3397. try:
  3398. loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
  3399. except (ParseException,IndexError):
  3400. if self.defaultValue is not _optionalNotMatched:
  3401. if self.expr.resultsName:
  3402. tokens = ParseResults([ self.defaultValue ])
  3403. tokens[self.expr.resultsName] = self.defaultValue
  3404. else:
  3405. tokens = [ self.defaultValue ]
  3406. else:
  3407. tokens = []
  3408. return loc, tokens
  3409. def __str__( self ):
  3410. if hasattr(self,"name"):
  3411. return self.name
  3412. if self.strRepr is None:
  3413. self.strRepr = "[" + _ustr(self.expr) + "]"
  3414. return self.strRepr
  3415. class SkipTo(ParseElementEnhance):
  3416. """
  3417. Token for skipping over all undefined text until the matched expression is found.
  3418. Parameters:
  3419. - expr - target expression marking the end of the data to be skipped
  3420. - include - (default=C{False}) if True, the target expression is also parsed
  3421. (the skipped text and target expression are returned as a 2-element list).
  3422. - ignore - (default=C{None}) used to define grammars (typically quoted strings and
  3423. comments) that might contain false matches to the target expression
  3424. - failOn - (default=C{None}) define expressions that are not allowed to be
  3425. included in the skipped test; if found before the target expression is found,
  3426. the SkipTo is not a match
  3427. Example::
  3428. report = '''
  3429. Outstanding Issues Report - 1 Jan 2000
  3430. # | Severity | Description | Days Open
  3431. -----+----------+-------------------------------------------+-----------
  3432. 101 | Critical | Intermittent system crash | 6
  3433. 94 | Cosmetic | Spelling error on Login ('log|n') | 14
  3434. 79 | Minor | System slow when running too many reports | 47
  3435. '''
  3436. integer = Word(nums)
  3437. SEP = Suppress('|')
  3438. # use SkipTo to simply match everything up until the next SEP
  3439. # - ignore quoted strings, so that a '|' character inside a quoted string does not match
  3440. # - parse action will call token.strip() for each matched token, i.e., the description body
  3441. string_data = SkipTo(SEP, ignore=quotedString)
  3442. string_data.setParseAction(tokenMap(str.strip))
  3443. ticket_expr = (integer("issue_num") + SEP
  3444. + string_data("sev") + SEP
  3445. + string_data("desc") + SEP
  3446. + integer("days_open"))
  3447. for tkt in ticket_expr.searchString(report):
  3448. print tkt.dump()
  3449. prints::
  3450. ['101', 'Critical', 'Intermittent system crash', '6']
  3451. - days_open: 6
  3452. - desc: Intermittent system crash
  3453. - issue_num: 101
  3454. - sev: Critical
  3455. ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14']
  3456. - days_open: 14
  3457. - desc: Spelling error on Login ('log|n')
  3458. - issue_num: 94
  3459. - sev: Cosmetic
  3460. ['79', 'Minor', 'System slow when running too many reports', '47']
  3461. - days_open: 47
  3462. - desc: System slow when running too many reports
  3463. - issue_num: 79
  3464. - sev: Minor
  3465. """
  3466. def __init__( self, other, include=False, ignore=None, failOn=None ):
  3467. super( SkipTo, self ).__init__( other )
  3468. self.ignoreExpr = ignore
  3469. self.mayReturnEmpty = True
  3470. self.mayIndexError = False
  3471. self.includeMatch = include
  3472. self.asList = False
  3473. if isinstance(failOn, basestring):
  3474. self.failOn = ParserElement._literalStringClass(failOn)
  3475. else:
  3476. self.failOn = failOn
  3477. self.errmsg = "No match found for "+_ustr(self.expr)
  3478. def parseImpl( self, instring, loc, doActions=True ):
  3479. startloc = loc
  3480. instrlen = len(instring)
  3481. expr = self.expr
  3482. expr_parse = self.expr._parse
  3483. self_failOn_canParseNext = self.failOn.canParseNext if self.failOn is not None else None
  3484. self_ignoreExpr_tryParse = self.ignoreExpr.tryParse if self.ignoreExpr is not None else None
  3485. tmploc = loc
  3486. while tmploc <= instrlen:
  3487. if self_failOn_canParseNext is not None:
  3488. # break if failOn expression matches
  3489. if self_failOn_canParseNext(instring, tmploc):
  3490. break
  3491. if self_ignoreExpr_tryParse is not None:
  3492. # advance past ignore expressions
  3493. while 1:
  3494. try:
  3495. tmploc = self_ignoreExpr_tryParse(instring, tmploc)
  3496. except ParseBaseException:
  3497. break
  3498. try:
  3499. expr_parse(instring, tmploc, doActions=False, callPreParse=False)
  3500. except (ParseException, IndexError):
  3501. # no match, advance loc in string
  3502. tmploc += 1
  3503. else:
  3504. # matched skipto expr, done
  3505. break
  3506. else:
  3507. # ran off the end of the input string without matching skipto expr, fail
  3508. raise ParseException(instring, loc, self.errmsg, self)
  3509. # build up return values
  3510. loc = tmploc
  3511. skiptext = instring[startloc:loc]
  3512. skipresult = ParseResults(skiptext)
  3513. if self.includeMatch:
  3514. loc, mat = expr_parse(instring,loc,doActions,callPreParse=False)
  3515. skipresult += mat
  3516. return loc, skipresult
  3517. class Forward(ParseElementEnhance):
  3518. """
  3519. Forward declaration of an expression to be defined later -
  3520. used for recursive grammars, such as algebraic infix notation.
  3521. When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator.
  3522. Note: take care when assigning to C{Forward} not to overlook precedence of operators.
  3523. Specifically, '|' has a lower precedence than '<<', so that::
  3524. fwdExpr << a | b | c
  3525. will actually be evaluated as::
  3526. (fwdExpr << a) | b | c
  3527. thereby leaving b and c out as parseable alternatives. It is recommended that you
  3528. explicitly group the values inserted into the C{Forward}::
  3529. fwdExpr << (a | b | c)
  3530. Converting to use the '<<=' operator instead will avoid this problem.
  3531. See L{ParseResults.pprint} for an example of a recursive parser created using
  3532. C{Forward}.
  3533. """
  3534. def __init__( self, other=None ):
  3535. super(Forward,self).__init__( other, savelist=False )
  3536. def __lshift__( self, other ):
  3537. if isinstance( other, basestring ):
  3538. other = ParserElement._literalStringClass(other)
  3539. self.expr = other
  3540. self.strRepr = None
  3541. self.mayIndexError = self.expr.mayIndexError
  3542. self.mayReturnEmpty = self.expr.mayReturnEmpty
  3543. self.setWhitespaceChars( self.expr.whiteChars )
  3544. self.skipWhitespace = self.expr.skipWhitespace
  3545. self.saveAsList = self.expr.saveAsList
  3546. self.ignoreExprs.extend(self.expr.ignoreExprs)
  3547. return self
  3548. def __ilshift__(self, other):
  3549. return self << other
  3550. def leaveWhitespace( self ):
  3551. self.skipWhitespace = False
  3552. return self
  3553. def streamline( self ):
  3554. if not self.streamlined:
  3555. self.streamlined = True
  3556. if self.expr is not None:
  3557. self.expr.streamline()
  3558. return self
  3559. def validate( self, validateTrace=[] ):
  3560. if self not in validateTrace:
  3561. tmp = validateTrace[:]+[self]
  3562. if self.expr is not None:
  3563. self.expr.validate(tmp)
  3564. self.checkRecursion([])
  3565. def __str__( self ):
  3566. if hasattr(self,"name"):
  3567. return self.name
  3568. return self.__class__.__name__ + ": ..."
  3569. # stubbed out for now - creates awful memory and perf issues
  3570. self._revertClass = self.__class__
  3571. self.__class__ = _ForwardNoRecurse
  3572. try:
  3573. if self.expr is not None:
  3574. retString = _ustr(self.expr)
  3575. else:
  3576. retString = "None"
  3577. finally:
  3578. self.__class__ = self._revertClass
  3579. return self.__class__.__name__ + ": " + retString
  3580. def copy(self):
  3581. if self.expr is not None:
  3582. return super(Forward,self).copy()
  3583. else:
  3584. ret = Forward()
  3585. ret <<= self
  3586. return ret
  3587. class _ForwardNoRecurse(Forward):
  3588. def __str__( self ):
  3589. return "..."
  3590. class TokenConverter(ParseElementEnhance):
  3591. """
  3592. Abstract subclass of C{ParseExpression}, for converting parsed results.
  3593. """
  3594. def __init__( self, expr, savelist=False ):
  3595. super(TokenConverter,self).__init__( expr )#, savelist )
  3596. self.saveAsList = False
  3597. class Combine(TokenConverter):
  3598. """
  3599. Converter to concatenate all matching tokens to a single string.
  3600. By default, the matching patterns must also be contiguous in the input string;
  3601. this can be disabled by specifying C{'adjacent=False'} in the constructor.
  3602. Example::
  3603. real = Word(nums) + '.' + Word(nums)
  3604. print(real.parseString('3.1416')) # -> ['3', '.', '1416']
  3605. # will also erroneously match the following
  3606. print(real.parseString('3. 1416')) # -> ['3', '.', '1416']
  3607. real = Combine(Word(nums) + '.' + Word(nums))
  3608. print(real.parseString('3.1416')) # -> ['3.1416']
  3609. # no match when there are internal spaces
  3610. print(real.parseString('3. 1416')) # -> Exception: Expected W:(0123...)
  3611. """
  3612. def __init__( self, expr, joinString="", adjacent=True ):
  3613. super(Combine,self).__init__( expr )
  3614. # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself
  3615. if adjacent:
  3616. self.leaveWhitespace()
  3617. self.adjacent = adjacent
  3618. self.skipWhitespace = True
  3619. self.joinString = joinString
  3620. self.callPreparse = True
  3621. def ignore( self, other ):
  3622. if self.adjacent:
  3623. ParserElement.ignore(self, other)
  3624. else:
  3625. super( Combine, self).ignore( other )
  3626. return self
  3627. def postParse( self, instring, loc, tokenlist ):
  3628. retToks = tokenlist.copy()
  3629. del retToks[:]
  3630. retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)
  3631. if self.resultsName and retToks.haskeys():
  3632. return [ retToks ]
  3633. else:
  3634. return retToks
  3635. class Group(TokenConverter):
  3636. """
  3637. Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions.
  3638. Example::
  3639. ident = Word(alphas)
  3640. num = Word(nums)
  3641. term = ident | num
  3642. func = ident + Optional(delimitedList(term))
  3643. print(func.parseString("fn a,b,100")) # -> ['fn', 'a', 'b', '100']
  3644. func = ident + Group(Optional(delimitedList(term)))
  3645. print(func.parseString("fn a,b,100")) # -> ['fn', ['a', 'b', '100']]
  3646. """
  3647. def __init__( self, expr ):
  3648. super(Group,self).__init__( expr )
  3649. self.saveAsList = True
  3650. def postParse( self, instring, loc, tokenlist ):
  3651. return [ tokenlist ]
  3652. class Dict(TokenConverter):
  3653. """
  3654. Converter to return a repetitive expression as a list, but also as a dictionary.
  3655. Each element can also be referenced using the first token in the expression as its key.
  3656. Useful for tabular report scraping when the first column can be used as a item key.
  3657. Example::
  3658. data_word = Word(alphas)
  3659. label = data_word + FollowedBy(':')
  3660. attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join))
  3661. text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
  3662. attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
  3663. # print attributes as plain groups
  3664. print(OneOrMore(attr_expr).parseString(text).dump())
  3665. # instead of OneOrMore(expr), parse using Dict(OneOrMore(Group(expr))) - Dict will auto-assign names
  3666. result = Dict(OneOrMore(Group(attr_expr))).parseString(text)
  3667. print(result.dump())
  3668. # access named fields as dict entries, or output as dict
  3669. print(result['shape'])
  3670. print(result.asDict())
  3671. prints::
  3672. ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap']
  3673. [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
  3674. - color: light blue
  3675. - posn: upper left
  3676. - shape: SQUARE
  3677. - texture: burlap
  3678. SQUARE
  3679. {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'}
  3680. See more examples at L{ParseResults} of accessing fields by results name.
  3681. """
  3682. def __init__( self, expr ):
  3683. super(Dict,self).__init__( expr )
  3684. self.saveAsList = True
  3685. def postParse( self, instring, loc, tokenlist ):
  3686. for i,tok in enumerate(tokenlist):
  3687. if len(tok) == 0:
  3688. continue
  3689. ikey = tok[0]
  3690. if isinstance(ikey,int):
  3691. ikey = _ustr(tok[0]).strip()
  3692. if len(tok)==1:
  3693. tokenlist[ikey] = _ParseResultsWithOffset("",i)
  3694. elif len(tok)==2 and not isinstance(tok[1],ParseResults):
  3695. tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i)
  3696. else:
  3697. dictvalue = tok.copy() #ParseResults(i)
  3698. del dictvalue[0]
  3699. if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.haskeys()):
  3700. tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i)
  3701. else:
  3702. tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i)
  3703. if self.resultsName:
  3704. return [ tokenlist ]
  3705. else:
  3706. return tokenlist
  3707. class Suppress(TokenConverter):
  3708. """
  3709. Converter for ignoring the results of a parsed expression.
  3710. Example::
  3711. source = "a, b, c,d"
  3712. wd = Word(alphas)
  3713. wd_list1 = wd + ZeroOrMore(',' + wd)
  3714. print(wd_list1.parseString(source))
  3715. # often, delimiters that are useful during parsing are just in the
  3716. # way afterward - use Suppress to keep them out of the parsed output
  3717. wd_list2 = wd + ZeroOrMore(Suppress(',') + wd)
  3718. print(wd_list2.parseString(source))
  3719. prints::
  3720. ['a', ',', 'b', ',', 'c', ',', 'd']
  3721. ['a', 'b', 'c', 'd']
  3722. (See also L{delimitedList}.)
  3723. """
  3724. def postParse( self, instring, loc, tokenlist ):
  3725. return []
  3726. def suppress( self ):
  3727. return self
  3728. class OnlyOnce(object):
  3729. """
  3730. Wrapper for parse actions, to ensure they are only called once.
  3731. """
  3732. def __init__(self, methodCall):
  3733. self.callable = _trim_arity(methodCall)
  3734. self.called = False
  3735. def __call__(self,s,l,t):
  3736. if not self.called:
  3737. results = self.callable(s,l,t)
  3738. self.called = True
  3739. return results
  3740. raise ParseException(s,l,"")
  3741. def reset(self):
  3742. self.called = False
  3743. def traceParseAction(f):
  3744. """
  3745. Decorator for debugging parse actions.
  3746. When the parse action is called, this decorator will print C{">> entering I{method-name}(line:I{current_source_line}, I{parse_location}, I{matched_tokens})".}
  3747. When the parse action completes, the decorator will print C{"<<"} followed by the returned value, or any exception that the parse action raised.
  3748. Example::
  3749. wd = Word(alphas)
  3750. @traceParseAction
  3751. def remove_duplicate_chars(tokens):
  3752. return ''.join(sorted(set(''.join(tokens)))
  3753. wds = OneOrMore(wd).setParseAction(remove_duplicate_chars)
  3754. print(wds.parseString("slkdjs sld sldd sdlf sdljf"))
  3755. prints::
  3756. >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {}))
  3757. <<leaving remove_duplicate_chars (ret: 'dfjkls')
  3758. ['dfjkls']
  3759. """
  3760. f = _trim_arity(f)
  3761. def z(*paArgs):
  3762. thisFunc = f.__name__
  3763. s,l,t = paArgs[-3:]
  3764. if len(paArgs)>3:
  3765. thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
  3766. sys.stderr.write( ">>entering %s(line: '%s', %d, %r)\n" % (thisFunc,line(l,s),l,t) )
  3767. try:
  3768. ret = f(*paArgs)
  3769. except Exception as exc:
  3770. sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )
  3771. raise
  3772. sys.stderr.write( "<<leaving %s (ret: %r)\n" % (thisFunc,ret) )
  3773. return ret
  3774. try:
  3775. z.__name__ = f.__name__
  3776. except AttributeError:
  3777. pass
  3778. return z
  3779. #
  3780. # global helpers
  3781. #
  3782. def delimitedList( expr, delim=",", combine=False ):
  3783. """
  3784. Helper to define a delimited list of expressions - the delimiter defaults to ','.
  3785. By default, the list elements and delimiters can have intervening whitespace, and
  3786. comments, but this can be overridden by passing C{combine=True} in the constructor.
  3787. If C{combine} is set to C{True}, the matching tokens are returned as a single token
  3788. string, with the delimiters included; otherwise, the matching tokens are returned
  3789. as a list of tokens, with the delimiters suppressed.
  3790. Example::
  3791. delimitedList(Word(alphas)).parseString("aa,bb,cc") # -> ['aa', 'bb', 'cc']
  3792. delimitedList(Word(hexnums), delim=':', combine=True).parseString("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE']
  3793. """
  3794. dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."
  3795. if combine:
  3796. return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)
  3797. else:
  3798. return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
  3799. def countedArray( expr, intExpr=None ):
  3800. """
  3801. Helper to define a counted list of expressions.
  3802. This helper defines a pattern of the form::
  3803. integer expr expr expr...
  3804. where the leading integer tells how many expr expressions follow.
  3805. The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.
  3806. If C{intExpr} is specified, it should be a pyparsing expression that produces an integer value.
  3807. Example::
  3808. countedArray(Word(alphas)).parseString('2 ab cd ef') # -> ['ab', 'cd']
  3809. # in this parser, the leading integer value is given in binary,
  3810. # '10' indicating that 2 values are in the array
  3811. binaryConstant = Word('01').setParseAction(lambda t: int(t[0], 2))
  3812. countedArray(Word(alphas), intExpr=binaryConstant).parseString('10 ab cd ef') # -> ['ab', 'cd']
  3813. """
  3814. arrayExpr = Forward()
  3815. def countFieldParseAction(s,l,t):
  3816. n = t[0]
  3817. arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
  3818. return []
  3819. if intExpr is None:
  3820. intExpr = Word(nums).setParseAction(lambda t:int(t[0]))
  3821. else:
  3822. intExpr = intExpr.copy()
  3823. intExpr.setName("arrayLen")
  3824. intExpr.addParseAction(countFieldParseAction, callDuringTry=True)
  3825. return ( intExpr + arrayExpr ).setName('(len) ' + _ustr(expr) + '...')
  3826. def _flatten(L):
  3827. ret = []
  3828. for i in L:
  3829. if isinstance(i,list):
  3830. ret.extend(_flatten(i))
  3831. else:
  3832. ret.append(i)
  3833. return ret
  3834. def matchPreviousLiteral(expr):
  3835. """
  3836. Helper to define an expression that is indirectly defined from
  3837. the tokens matched in a previous expression, that is, it looks
  3838. for a 'repeat' of a previous expression. For example::
  3839. first = Word(nums)
  3840. second = matchPreviousLiteral(first)
  3841. matchExpr = first + ":" + second
  3842. will match C{"1:1"}, but not C{"1:2"}. Because this matches a
  3843. previous literal, will also match the leading C{"1:1"} in C{"1:10"}.
  3844. If this is not desired, use C{matchPreviousExpr}.
  3845. Do I{not} use with packrat parsing enabled.
  3846. """
  3847. rep = Forward()
  3848. def copyTokenToRepeater(s,l,t):
  3849. if t:
  3850. if len(t) == 1:
  3851. rep << t[0]
  3852. else:
  3853. # flatten t tokens
  3854. tflat = _flatten(t.asList())
  3855. rep << And(Literal(tt) for tt in tflat)
  3856. else:
  3857. rep << Empty()
  3858. expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
  3859. rep.setName('(prev) ' + _ustr(expr))
  3860. return rep
  3861. def matchPreviousExpr(expr):
  3862. """
  3863. Helper to define an expression that is indirectly defined from
  3864. the tokens matched in a previous expression, that is, it looks
  3865. for a 'repeat' of a previous expression. For example::
  3866. first = Word(nums)
  3867. second = matchPreviousExpr(first)
  3868. matchExpr = first + ":" + second
  3869. will match C{"1:1"}, but not C{"1:2"}. Because this matches by
  3870. expressions, will I{not} match the leading C{"1:1"} in C{"1:10"};
  3871. the expressions are evaluated first, and then compared, so
  3872. C{"1"} is compared with C{"10"}.
  3873. Do I{not} use with packrat parsing enabled.
  3874. """
  3875. rep = Forward()
  3876. e2 = expr.copy()
  3877. rep <<= e2
  3878. def copyTokenToRepeater(s,l,t):
  3879. matchTokens = _flatten(t.asList())
  3880. def mustMatchTheseTokens(s,l,t):
  3881. theseTokens = _flatten(t.asList())
  3882. if theseTokens != matchTokens:
  3883. raise ParseException("",0,"")
  3884. rep.setParseAction( mustMatchTheseTokens, callDuringTry=True )
  3885. expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
  3886. rep.setName('(prev) ' + _ustr(expr))
  3887. return rep
  3888. def _escapeRegexRangeChars(s):
  3889. #~ escape these chars: ^-]
  3890. for c in r"\^-]":
  3891. s = s.replace(c,_bslash+c)
  3892. s = s.replace("\n",r"\n")
  3893. s = s.replace("\t",r"\t")
  3894. return _ustr(s)
  3895. def oneOf( strs, caseless=False, useRegex=True ):
  3896. """
  3897. Helper to quickly define a set of alternative Literals, and makes sure to do
  3898. longest-first testing when there is a conflict, regardless of the input order,
  3899. but returns a C{L{MatchFirst}} for best performance.
  3900. Parameters:
  3901. - strs - a string of space-delimited literals, or a collection of string literals
  3902. - caseless - (default=C{False}) - treat all literals as caseless
  3903. - useRegex - (default=C{True}) - as an optimization, will generate a Regex
  3904. object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or
  3905. if creating a C{Regex} raises an exception)
  3906. Example::
  3907. comp_oper = oneOf("< = > <= >= !=")
  3908. var = Word(alphas)
  3909. number = Word(nums)
  3910. term = var | number
  3911. comparison_expr = term + comp_oper + term
  3912. print(comparison_expr.searchString("B = 12 AA=23 B<=AA AA>12"))
  3913. prints::
  3914. [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']]
  3915. """
  3916. if caseless:
  3917. isequal = ( lambda a,b: a.upper() == b.upper() )
  3918. masks = ( lambda a,b: b.upper().startswith(a.upper()) )
  3919. parseElementClass = CaselessLiteral
  3920. else:
  3921. isequal = ( lambda a,b: a == b )
  3922. masks = ( lambda a,b: b.startswith(a) )
  3923. parseElementClass = Literal
  3924. symbols = []
  3925. if isinstance(strs,basestring):
  3926. symbols = strs.split()
  3927. elif isinstance(strs, collections.Iterable):
  3928. symbols = list(strs)
  3929. else:
  3930. warnings.warn("Invalid argument to oneOf, expected string or iterable",
  3931. SyntaxWarning, stacklevel=2)
  3932. if not symbols:
  3933. return NoMatch()
  3934. i = 0
  3935. while i < len(symbols)-1:
  3936. cur = symbols[i]
  3937. for j,other in enumerate(symbols[i+1:]):
  3938. if ( isequal(other, cur) ):
  3939. del symbols[i+j+1]
  3940. break
  3941. elif ( masks(cur, other) ):
  3942. del symbols[i+j+1]
  3943. symbols.insert(i,other)
  3944. cur = other
  3945. break
  3946. else:
  3947. i += 1
  3948. if not caseless and useRegex:
  3949. #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] ))
  3950. try:
  3951. if len(symbols)==len("".join(symbols)):
  3952. return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) ).setName(' | '.join(symbols))
  3953. else:
  3954. return Regex( "|".join(re.escape(sym) for sym in symbols) ).setName(' | '.join(symbols))
  3955. except Exception:
  3956. warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
  3957. SyntaxWarning, stacklevel=2)
  3958. # last resort, just use MatchFirst
  3959. return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols))
  3960. def dictOf( key, value ):
  3961. """
  3962. Helper to easily and clearly define a dictionary by specifying the respective patterns
  3963. for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens
  3964. in the proper order. The key pattern can include delimiting markers or punctuation,
  3965. as long as they are suppressed, thereby leaving the significant key text. The value
  3966. pattern can include named results, so that the C{Dict} results can include named token
  3967. fields.
  3968. Example::
  3969. text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
  3970. attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
  3971. print(OneOrMore(attr_expr).parseString(text).dump())
  3972. attr_label = label
  3973. attr_value = Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)
  3974. # similar to Dict, but simpler call format
  3975. result = dictOf(attr_label, attr_value).parseString(text)
  3976. print(result.dump())
  3977. print(result['shape'])
  3978. print(result.shape) # object attribute access works too
  3979. print(result.asDict())
  3980. prints::
  3981. [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
  3982. - color: light blue
  3983. - posn: upper left
  3984. - shape: SQUARE
  3985. - texture: burlap
  3986. SQUARE
  3987. SQUARE
  3988. {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'}
  3989. """
  3990. return Dict( ZeroOrMore( Group ( key + value ) ) )
  3991. def originalTextFor(expr, asString=True):
  3992. """
  3993. Helper to return the original, untokenized text for a given expression. Useful to
  3994. restore the parsed fields of an HTML start tag into the raw tag text itself, or to
  3995. revert separate tokens with intervening whitespace back to the original matching
  3996. input text. By default, returns astring containing the original parsed text.
  3997. If the optional C{asString} argument is passed as C{False}, then the return value is a
  3998. C{L{ParseResults}} containing any results names that were originally matched, and a
  3999. single token containing the original matched text from the input string. So if
  4000. the expression passed to C{L{originalTextFor}} contains expressions with defined
  4001. results names, you must set C{asString} to C{False} if you want to preserve those
  4002. results name values.
  4003. Example::
  4004. src = "this is test <b> bold <i>text</i> </b> normal text "
  4005. for tag in ("b","i"):
  4006. opener,closer = makeHTMLTags(tag)
  4007. patt = originalTextFor(opener + SkipTo(closer) + closer)
  4008. print(patt.searchString(src)[0])
  4009. prints::
  4010. ['<b> bold <i>text</i> </b>']
  4011. ['<i>text</i>']
  4012. """
  4013. locMarker = Empty().setParseAction(lambda s,loc,t: loc)
  4014. endlocMarker = locMarker.copy()
  4015. endlocMarker.callPreparse = False
  4016. matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end")
  4017. if asString:
  4018. extractText = lambda s,l,t: s[t._original_start:t._original_end]
  4019. else:
  4020. def extractText(s,l,t):
  4021. t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]]
  4022. matchExpr.setParseAction(extractText)
  4023. matchExpr.ignoreExprs = expr.ignoreExprs
  4024. return matchExpr
  4025. def ungroup(expr):
  4026. """
  4027. Helper to undo pyparsing's default grouping of And expressions, even
  4028. if all but one are non-empty.
  4029. """
  4030. return TokenConverter(expr).setParseAction(lambda t:t[0])
  4031. def locatedExpr(expr):
  4032. """
  4033. Helper to decorate a returned token with its starting and ending locations in the input string.
  4034. This helper adds the following results names:
  4035. - locn_start = location where matched expression begins
  4036. - locn_end = location where matched expression ends
  4037. - value = the actual parsed results
  4038. Be careful if the input text contains C{<TAB>} characters, you may want to call
  4039. C{L{ParserElement.parseWithTabs}}
  4040. Example::
  4041. wd = Word(alphas)
  4042. for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"):
  4043. print(match)
  4044. prints::
  4045. [[0, 'ljsdf', 5]]
  4046. [[8, 'lksdjjf', 15]]
  4047. [[18, 'lkkjj', 23]]
  4048. """
  4049. locator = Empty().setParseAction(lambda s,l,t: l)
  4050. return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end"))
  4051. # convenience constants for positional expressions
  4052. empty = Empty().setName("empty")
  4053. lineStart = LineStart().setName("lineStart")
  4054. lineEnd = LineEnd().setName("lineEnd")
  4055. stringStart = StringStart().setName("stringStart")
  4056. stringEnd = StringEnd().setName("stringEnd")
  4057. _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
  4058. _escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16)))
  4059. _escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8)))
  4060. _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(printables, excludeChars=r'\]', exact=1) | Regex(r"\w", re.UNICODE)
  4061. _charRange = Group(_singleChar + Suppress("-") + _singleChar)
  4062. _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
  4063. def srange(s):
  4064. r"""
  4065. Helper to easily define string ranges for use in Word construction. Borrows
  4066. syntax from regexp '[]' string range definitions::
  4067. srange("[0-9]") -> "0123456789"
  4068. srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
  4069. srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
  4070. The input string must be enclosed in []'s, and the returned string is the expanded
  4071. character set joined into a single string.
  4072. The values enclosed in the []'s may be:
  4073. - a single character
  4074. - an escaped character with a leading backslash (such as C{\-} or C{\]})
  4075. - an escaped hex character with a leading C{'\x'} (C{\x21}, which is a C{'!'} character)
  4076. (C{\0x##} is also supported for backwards compatibility)
  4077. - an escaped octal character with a leading C{'\0'} (C{\041}, which is a C{'!'} character)
  4078. - a range of any of the above, separated by a dash (C{'a-z'}, etc.)
  4079. - any combination of the above (C{'aeiouy'}, C{'a-zA-Z0-9_$'}, etc.)
  4080. """
  4081. _expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1))
  4082. try:
  4083. return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body)
  4084. except Exception:
  4085. return ""
  4086. def matchOnlyAtCol(n):
  4087. """
  4088. Helper method for defining parse actions that require matching at a specific
  4089. column in the input text.
  4090. """
  4091. def verifyCol(strg,locn,toks):
  4092. if col(locn,strg) != n:
  4093. raise ParseException(strg,locn,"matched token not at column %d" % n)
  4094. return verifyCol
  4095. def replaceWith(replStr):
  4096. """
  4097. Helper method for common parse actions that simply return a literal value. Especially
  4098. useful when used with C{L{transformString<ParserElement.transformString>}()}.
  4099. Example::
  4100. num = Word(nums).setParseAction(lambda toks: int(toks[0]))
  4101. na = oneOf("N/A NA").setParseAction(replaceWith(math.nan))
  4102. term = na | num
  4103. OneOrMore(term).parseString("324 234 N/A 234") # -> [324, 234, nan, 234]
  4104. """
  4105. return lambda s,l,t: [replStr]
  4106. def removeQuotes(s,l,t):
  4107. """
  4108. Helper parse action for removing quotation marks from parsed quoted strings.
  4109. Example::
  4110. # by default, quotation marks are included in parsed results
  4111. quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["'Now is the Winter of our Discontent'"]
  4112. # use removeQuotes to strip quotation marks from parsed results
  4113. quotedString.setParseAction(removeQuotes)
  4114. quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["Now is the Winter of our Discontent"]
  4115. """
  4116. return t[0][1:-1]
  4117. def tokenMap(func, *args):
  4118. """
  4119. Helper to define a parse action by mapping a function to all elements of a ParseResults list.If any additional
  4120. args are passed, they are forwarded to the given function as additional arguments after
  4121. the token, as in C{hex_integer = Word(hexnums).setParseAction(tokenMap(int, 16))}, which will convert the
  4122. parsed data to an integer using base 16.
  4123. Example (compare the last to example in L{ParserElement.transformString}::
  4124. hex_ints = OneOrMore(Word(hexnums)).setParseAction(tokenMap(int, 16))
  4125. hex_ints.runTests('''
  4126. 00 11 22 aa FF 0a 0d 1a
  4127. ''')
  4128. upperword = Word(alphas).setParseAction(tokenMap(str.upper))
  4129. OneOrMore(upperword).runTests('''
  4130. my kingdom for a horse
  4131. ''')
  4132. wd = Word(alphas).setParseAction(tokenMap(str.title))
  4133. OneOrMore(wd).setParseAction(' '.join).runTests('''
  4134. now is the winter of our discontent made glorious summer by this sun of york
  4135. ''')
  4136. prints::
  4137. 00 11 22 aa FF 0a 0d 1a
  4138. [0, 17, 34, 170, 255, 10, 13, 26]
  4139. my kingdom for a horse
  4140. ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE']
  4141. now is the winter of our discontent made glorious summer by this sun of york
  4142. ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York']
  4143. """
  4144. def pa(s,l,t):
  4145. return [func(tokn, *args) for tokn in t]
  4146. try:
  4147. func_name = getattr(func, '__name__',
  4148. getattr(func, '__class__').__name__)
  4149. except Exception:
  4150. func_name = str(func)
  4151. pa.__name__ = func_name
  4152. return pa
  4153. upcaseTokens = tokenMap(lambda t: _ustr(t).upper())
  4154. """(Deprecated) Helper parse action to convert tokens to upper case. Deprecated in favor of L{pyparsing_common.upcaseTokens}"""
  4155. downcaseTokens = tokenMap(lambda t: _ustr(t).lower())
  4156. """(Deprecated) Helper parse action to convert tokens to lower case. Deprecated in favor of L{pyparsing_common.downcaseTokens}"""
  4157. def _makeTags(tagStr, xml):
  4158. """Internal helper to construct opening and closing tag expressions, given a tag name"""
  4159. if isinstance(tagStr,basestring):
  4160. resname = tagStr
  4161. tagStr = Keyword(tagStr, caseless=not xml)
  4162. else:
  4163. resname = tagStr.name
  4164. tagAttrName = Word(alphas,alphanums+"_-:")
  4165. if (xml):
  4166. tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes )
  4167. openTag = Suppress("<") + tagStr("tag") + \
  4168. Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \
  4169. Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
  4170. else:
  4171. printablesLessRAbrack = "".join(c for c in printables if c not in ">")
  4172. tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack)
  4173. openTag = Suppress("<") + tagStr("tag") + \
  4174. Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \
  4175. Optional( Suppress("=") + tagAttrValue ) ))) + \
  4176. Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
  4177. closeTag = Combine(_L("</") + tagStr + ">")
  4178. openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % resname)
  4179. closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % resname)
  4180. openTag.tag = resname
  4181. closeTag.tag = resname
  4182. return openTag, closeTag
  4183. def makeHTMLTags(tagStr):
  4184. """
  4185. Helper to construct opening and closing tag expressions for HTML, given a tag name. Matches
  4186. tags in either upper or lower case, attributes with namespaces and with quoted or unquoted values.
  4187. Example::
  4188. text = '<td>More info at the <a href="http://pyparsing.wikispaces.com">pyparsing</a> wiki page</td>'
  4189. # makeHTMLTags returns pyparsing expressions for the opening and closing tags as a 2-tuple
  4190. a,a_end = makeHTMLTags("A")
  4191. link_expr = a + SkipTo(a_end)("link_text") + a_end
  4192. for link in link_expr.searchString(text):
  4193. # attributes in the <A> tag (like "href" shown here) are also accessible as named results
  4194. print(link.link_text, '->', link.href)
  4195. prints::
  4196. pyparsing -> http://pyparsing.wikispaces.com
  4197. """
  4198. return _makeTags( tagStr, False )
  4199. def makeXMLTags(tagStr):
  4200. """
  4201. Helper to construct opening and closing tag expressions for XML, given a tag name. Matches
  4202. tags only in the given upper/lower case.
  4203. Example: similar to L{makeHTMLTags}
  4204. """
  4205. return _makeTags( tagStr, True )
  4206. def withAttribute(*args,**attrDict):
  4207. """
  4208. Helper to create a validating parse action to be used with start tags created
  4209. with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag
  4210. with a required attribute value, to avoid false matches on common tags such as
  4211. C{<TD>} or C{<DIV>}.
  4212. Call C{withAttribute} with a series of attribute names and values. Specify the list
  4213. of filter attributes names and values as:
  4214. - keyword arguments, as in C{(align="right")}, or
  4215. - as an explicit dict with C{**} operator, when an attribute name is also a Python
  4216. reserved word, as in C{**{"class":"Customer", "align":"right"}}
  4217. - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )
  4218. For attribute names with a namespace prefix, you must use the second form. Attribute
  4219. names are matched insensitive to upper/lower case.
  4220. If just testing for C{class} (with or without a namespace), use C{L{withClass}}.
  4221. To verify that the attribute exists, but without specifying a value, pass
  4222. C{withAttribute.ANY_VALUE} as the value.
  4223. Example::
  4224. html = '''
  4225. <div>
  4226. Some text
  4227. <div type="grid">1 4 0 1 0</div>
  4228. <div type="graph">1,3 2,3 1,1</div>
  4229. <div>this has no type</div>
  4230. </div>
  4231. '''
  4232. div,div_end = makeHTMLTags("div")
  4233. # only match div tag having a type attribute with value "grid"
  4234. div_grid = div().setParseAction(withAttribute(type="grid"))
  4235. grid_expr = div_grid + SkipTo(div | div_end)("body")
  4236. for grid_header in grid_expr.searchString(html):
  4237. print(grid_header.body)
  4238. # construct a match with any div tag having a type attribute, regardless of the value
  4239. div_any_type = div().setParseAction(withAttribute(type=withAttribute.ANY_VALUE))
  4240. div_expr = div_any_type + SkipTo(div | div_end)("body")
  4241. for div_header in div_expr.searchString(html):
  4242. print(div_header.body)
  4243. prints::
  4244. 1 4 0 1 0
  4245. 1 4 0 1 0
  4246. 1,3 2,3 1,1
  4247. """
  4248. if args:
  4249. attrs = args[:]
  4250. else:
  4251. attrs = attrDict.items()
  4252. attrs = [(k,v) for k,v in attrs]
  4253. def pa(s,l,tokens):
  4254. for attrName,attrValue in attrs:
  4255. if attrName not in tokens:
  4256. raise ParseException(s,l,"no matching attribute " + attrName)
  4257. if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue:
  4258. raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" %
  4259. (attrName, tokens[attrName], attrValue))
  4260. return pa
  4261. withAttribute.ANY_VALUE = object()
  4262. def withClass(classname, namespace=''):
  4263. """
  4264. Simplified version of C{L{withAttribute}} when matching on a div class - made
  4265. difficult because C{class} is a reserved word in Python.
  4266. Example::
  4267. html = '''
  4268. <div>
  4269. Some text
  4270. <div class="grid">1 4 0 1 0</div>
  4271. <div class="graph">1,3 2,3 1,1</div>
  4272. <div>this &lt;div&gt; has no class</div>
  4273. </div>
  4274. '''
  4275. div,div_end = makeHTMLTags("div")
  4276. div_grid = div().setParseAction(withClass("grid"))
  4277. grid_expr = div_grid + SkipTo(div | div_end)("body")
  4278. for grid_header in grid_expr.searchString(html):
  4279. print(grid_header.body)
  4280. div_any_type = div().setParseAction(withClass(withAttribute.ANY_VALUE))
  4281. div_expr = div_any_type + SkipTo(div | div_end)("body")
  4282. for div_header in div_expr.searchString(html):
  4283. print(div_header.body)
  4284. prints::
  4285. 1 4 0 1 0
  4286. 1 4 0 1 0
  4287. 1,3 2,3 1,1
  4288. """
  4289. classattr = "%s:class" % namespace if namespace else "class"
  4290. return withAttribute(**{classattr : classname})
  4291. opAssoc = _Constants()
  4292. opAssoc.LEFT = object()
  4293. opAssoc.RIGHT = object()
  4294. def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ):
  4295. """
  4296. Helper method for constructing grammars of expressions made up of
  4297. operators working in a precedence hierarchy. Operators may be unary or
  4298. binary, left- or right-associative. Parse actions can also be attached
  4299. to operator expressions. The generated parser will also recognize the use
  4300. of parentheses to override operator precedences (see example below).
  4301. Note: if you define a deep operator list, you may see performance issues
  4302. when using infixNotation. See L{ParserElement.enablePackrat} for a
  4303. mechanism to potentially improve your parser performance.
  4304. Parameters:
  4305. - baseExpr - expression representing the most basic element for the nested
  4306. - opList - list of tuples, one for each operator precedence level in the
  4307. expression grammar; each tuple is of the form
  4308. (opExpr, numTerms, rightLeftAssoc, parseAction), where:
  4309. - opExpr is the pyparsing expression for the operator;
  4310. may also be a string, which will be converted to a Literal;
  4311. if numTerms is 3, opExpr is a tuple of two expressions, for the
  4312. two operators separating the 3 terms
  4313. - numTerms is the number of terms for this operator (must
  4314. be 1, 2, or 3)
  4315. - rightLeftAssoc is the indicator whether the operator is
  4316. right or left associative, using the pyparsing-defined
  4317. constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}.
  4318. - parseAction is the parse action to be associated with
  4319. expressions matching this operator expression (the
  4320. parse action tuple member may be omitted); if the parse action
  4321. is passed a tuple or list of functions, this is equivalent to
  4322. calling C{setParseAction(*fn)} (L{ParserElement.setParseAction})
  4323. - lpar - expression for matching left-parentheses (default=C{Suppress('(')})
  4324. - rpar - expression for matching right-parentheses (default=C{Suppress(')')})
  4325. Example::
  4326. # simple example of four-function arithmetic with ints and variable names
  4327. integer = pyparsing_common.signed_integer
  4328. varname = pyparsing_common.identifier
  4329. arith_expr = infixNotation(integer | varname,
  4330. [
  4331. ('-', 1, opAssoc.RIGHT),
  4332. (oneOf('* /'), 2, opAssoc.LEFT),
  4333. (oneOf('+ -'), 2, opAssoc.LEFT),
  4334. ])
  4335. arith_expr.runTests('''
  4336. 5+3*6
  4337. (5+3)*6
  4338. -2--11
  4339. ''', fullDump=False)
  4340. prints::
  4341. 5+3*6
  4342. [[5, '+', [3, '*', 6]]]
  4343. (5+3)*6
  4344. [[[5, '+', 3], '*', 6]]
  4345. -2--11
  4346. [[['-', 2], '-', ['-', 11]]]
  4347. """
  4348. ret = Forward()
  4349. lastExpr = baseExpr | ( lpar + ret + rpar )
  4350. for i,operDef in enumerate(opList):
  4351. opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]
  4352. termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr
  4353. if arity == 3:
  4354. if opExpr is None or len(opExpr) != 2:
  4355. raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions")
  4356. opExpr1, opExpr2 = opExpr
  4357. thisExpr = Forward().setName(termName)
  4358. if rightLeftAssoc == opAssoc.LEFT:
  4359. if arity == 1:
  4360. matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) )
  4361. elif arity == 2:
  4362. if opExpr is not None:
  4363. matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) )
  4364. else:
  4365. matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) )
  4366. elif arity == 3:
  4367. matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \
  4368. Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr )
  4369. else:
  4370. raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
  4371. elif rightLeftAssoc == opAssoc.RIGHT:
  4372. if arity == 1:
  4373. # try to avoid LR with this extra test
  4374. if not isinstance(opExpr, Optional):
  4375. opExpr = Optional(opExpr)
  4376. matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr )
  4377. elif arity == 2:
  4378. if opExpr is not None:
  4379. matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) )
  4380. else:
  4381. matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) )
  4382. elif arity == 3:
  4383. matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \
  4384. Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr )
  4385. else:
  4386. raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
  4387. else:
  4388. raise ValueError("operator must indicate right or left associativity")
  4389. if pa:
  4390. if isinstance(pa, (tuple, list)):
  4391. matchExpr.setParseAction(*pa)
  4392. else:
  4393. matchExpr.setParseAction(pa)
  4394. thisExpr <<= ( matchExpr.setName(termName) | lastExpr )
  4395. lastExpr = thisExpr
  4396. ret <<= lastExpr
  4397. return ret
  4398. operatorPrecedence = infixNotation
  4399. """(Deprecated) Former name of C{L{infixNotation}}, will be dropped in a future release."""
  4400. dblQuotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"').setName("string enclosed in double quotes")
  4401. sglQuotedString = Combine(Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("string enclosed in single quotes")
  4402. quotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"'|
  4403. Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("quotedString using single or double quotes")
  4404. unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal")
  4405. def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()):
  4406. """
  4407. Helper method for defining nested lists enclosed in opening and closing
  4408. delimiters ("(" and ")" are the default).
  4409. Parameters:
  4410. - opener - opening character for a nested list (default=C{"("}); can also be a pyparsing expression
  4411. - closer - closing character for a nested list (default=C{")"}); can also be a pyparsing expression
  4412. - content - expression for items within the nested lists (default=C{None})
  4413. - ignoreExpr - expression for ignoring opening and closing delimiters (default=C{quotedString})
  4414. If an expression is not provided for the content argument, the nested
  4415. expression will capture all whitespace-delimited content between delimiters
  4416. as a list of separate values.
  4417. Use the C{ignoreExpr} argument to define expressions that may contain
  4418. opening or closing characters that should not be treated as opening
  4419. or closing characters for nesting, such as quotedString or a comment
  4420. expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}.
  4421. The default is L{quotedString}, but if no expressions are to be ignored,
  4422. then pass C{None} for this argument.
  4423. Example::
  4424. data_type = oneOf("void int short long char float double")
  4425. decl_data_type = Combine(data_type + Optional(Word('*')))
  4426. ident = Word(alphas+'_', alphanums+'_')
  4427. number = pyparsing_common.number
  4428. arg = Group(decl_data_type + ident)
  4429. LPAR,RPAR = map(Suppress, "()")
  4430. code_body = nestedExpr('{', '}', ignoreExpr=(quotedString | cStyleComment))
  4431. c_function = (decl_data_type("type")
  4432. + ident("name")
  4433. + LPAR + Optional(delimitedList(arg), [])("args") + RPAR
  4434. + code_body("body"))
  4435. c_function.ignore(cStyleComment)
  4436. source_code = '''
  4437. int is_odd(int x) {
  4438. return (x%2);
  4439. }
  4440. int dec_to_hex(char hchar) {
  4441. if (hchar >= '0' && hchar <= '9') {
  4442. return (ord(hchar)-ord('0'));
  4443. } else {
  4444. return (10+ord(hchar)-ord('A'));
  4445. }
  4446. }
  4447. '''
  4448. for func in c_function.searchString(source_code):
  4449. print("%(name)s (%(type)s) args: %(args)s" % func)
  4450. prints::
  4451. is_odd (int) args: [['int', 'x']]
  4452. dec_to_hex (int) args: [['char', 'hchar']]
  4453. """
  4454. if opener == closer:
  4455. raise ValueError("opening and closing strings cannot be the same")
  4456. if content is None:
  4457. if isinstance(opener,basestring) and isinstance(closer,basestring):
  4458. if len(opener) == 1 and len(closer)==1:
  4459. if ignoreExpr is not None:
  4460. content = (Combine(OneOrMore(~ignoreExpr +
  4461. CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))
  4462. ).setParseAction(lambda t:t[0].strip()))
  4463. else:
  4464. content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS
  4465. ).setParseAction(lambda t:t[0].strip()))
  4466. else:
  4467. if ignoreExpr is not None:
  4468. content = (Combine(OneOrMore(~ignoreExpr +
  4469. ~Literal(opener) + ~Literal(closer) +
  4470. CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
  4471. ).setParseAction(lambda t:t[0].strip()))
  4472. else:
  4473. content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) +
  4474. CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
  4475. ).setParseAction(lambda t:t[0].strip()))
  4476. else:
  4477. raise ValueError("opening and closing arguments must be strings if no content expression is given")
  4478. ret = Forward()
  4479. if ignoreExpr is not None:
  4480. ret <<= Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) )
  4481. else:
  4482. ret <<= Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) )
  4483. ret.setName('nested %s%s expression' % (opener,closer))
  4484. return ret
  4485. def indentedBlock(blockStatementExpr, indentStack, indent=True):
  4486. """
  4487. Helper method for defining space-delimited indentation blocks, such as
  4488. those used to define block statements in Python source code.
  4489. Parameters:
  4490. - blockStatementExpr - expression defining syntax of statement that
  4491. is repeated within the indented block
  4492. - indentStack - list created by caller to manage indentation stack
  4493. (multiple statementWithIndentedBlock expressions within a single grammar
  4494. should share a common indentStack)
  4495. - indent - boolean indicating whether block must be indented beyond the
  4496. the current level; set to False for block of left-most statements
  4497. (default=C{True})
  4498. A valid block must contain at least one C{blockStatement}.
  4499. Example::
  4500. data = '''
  4501. def A(z):
  4502. A1
  4503. B = 100
  4504. G = A2
  4505. A2
  4506. A3
  4507. B
  4508. def BB(a,b,c):
  4509. BB1
  4510. def BBA():
  4511. bba1
  4512. bba2
  4513. bba3
  4514. C
  4515. D
  4516. def spam(x,y):
  4517. def eggs(z):
  4518. pass
  4519. '''
  4520. indentStack = [1]
  4521. stmt = Forward()
  4522. identifier = Word(alphas, alphanums)
  4523. funcDecl = ("def" + identifier + Group( "(" + Optional( delimitedList(identifier) ) + ")" ) + ":")
  4524. func_body = indentedBlock(stmt, indentStack)
  4525. funcDef = Group( funcDecl + func_body )
  4526. rvalue = Forward()
  4527. funcCall = Group(identifier + "(" + Optional(delimitedList(rvalue)) + ")")
  4528. rvalue << (funcCall | identifier | Word(nums))
  4529. assignment = Group(identifier + "=" + rvalue)
  4530. stmt << ( funcDef | assignment | identifier )
  4531. module_body = OneOrMore(stmt)
  4532. parseTree = module_body.parseString(data)
  4533. parseTree.pprint()
  4534. prints::
  4535. [['def',
  4536. 'A',
  4537. ['(', 'z', ')'],
  4538. ':',
  4539. [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]],
  4540. 'B',
  4541. ['def',
  4542. 'BB',
  4543. ['(', 'a', 'b', 'c', ')'],
  4544. ':',
  4545. [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]],
  4546. 'C',
  4547. 'D',
  4548. ['def',
  4549. 'spam',
  4550. ['(', 'x', 'y', ')'],
  4551. ':',
  4552. [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]]
  4553. """
  4554. def checkPeerIndent(s,l,t):
  4555. if l >= len(s): return
  4556. curCol = col(l,s)
  4557. if curCol != indentStack[-1]:
  4558. if curCol > indentStack[-1]:
  4559. raise ParseFatalException(s,l,"illegal nesting")
  4560. raise ParseException(s,l,"not a peer entry")
  4561. def checkSubIndent(s,l,t):
  4562. curCol = col(l,s)
  4563. if curCol > indentStack[-1]:
  4564. indentStack.append( curCol )
  4565. else:
  4566. raise ParseException(s,l,"not a subentry")
  4567. def checkUnindent(s,l,t):
  4568. if l >= len(s): return
  4569. curCol = col(l,s)
  4570. if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]):
  4571. raise ParseException(s,l,"not an unindent")
  4572. indentStack.pop()
  4573. NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress())
  4574. INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT')
  4575. PEER = Empty().setParseAction(checkPeerIndent).setName('')
  4576. UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT')
  4577. if indent:
  4578. smExpr = Group( Optional(NL) +
  4579. #~ FollowedBy(blockStatementExpr) +
  4580. INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT)
  4581. else:
  4582. smExpr = Group( Optional(NL) +
  4583. (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )
  4584. blockStatementExpr.ignore(_bslash + LineEnd())
  4585. return smExpr.setName('indented block')
  4586. alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
  4587. punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
  4588. anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:").setName('any tag'))
  4589. _htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(),'><& "\''))
  4590. commonHTMLEntity = Regex('&(?P<entity>' + '|'.join(_htmlEntityMap.keys()) +");").setName("common HTML entity")
  4591. def replaceHTMLEntity(t):
  4592. """Helper parser action to replace common HTML entities with their special characters"""
  4593. return _htmlEntityMap.get(t.entity)
  4594. # it's easy to get these comment structures wrong - they're very common, so may as well make them available
  4595. cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/').setName("C style comment")
  4596. "Comment of the form C{/* ... */}"
  4597. htmlComment = Regex(r"<!--[\s\S]*?-->").setName("HTML comment")
  4598. "Comment of the form C{<!-- ... -->}"
  4599. restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line")
  4600. dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").setName("// comment")
  4601. "Comment of the form C{// ... (to end of line)}"
  4602. cppStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/'| dblSlashComment).setName("C++ style comment")
  4603. "Comment of either form C{L{cStyleComment}} or C{L{dblSlashComment}}"
  4604. javaStyleComment = cppStyleComment
  4605. "Same as C{L{cppStyleComment}}"
  4606. pythonStyleComment = Regex(r"#.*").setName("Python style comment")
  4607. "Comment of the form C{# ... (to end of line)}"
  4608. _commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') +
  4609. Optional( Word(" \t") +
  4610. ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")
  4611. commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList")
  4612. """(Deprecated) Predefined expression of 1 or more printable words or quoted strings, separated by commas.
  4613. This expression is deprecated in favor of L{pyparsing_common.comma_separated_list}."""
  4614. # some other useful expressions - using lower-case class name since we are really using this as a namespace
  4615. class pyparsing_common:
  4616. """
  4617. Here are some common low-level expressions that may be useful in jump-starting parser development:
  4618. - numeric forms (L{integers<integer>}, L{reals<real>}, L{scientific notation<sci_real>})
  4619. - common L{programming identifiers<identifier>}
  4620. - network addresses (L{MAC<mac_address>}, L{IPv4<ipv4_address>}, L{IPv6<ipv6_address>})
  4621. - ISO8601 L{dates<iso8601_date>} and L{datetime<iso8601_datetime>}
  4622. - L{UUID<uuid>}
  4623. - L{comma-separated list<comma_separated_list>}
  4624. Parse actions:
  4625. - C{L{convertToInteger}}
  4626. - C{L{convertToFloat}}
  4627. - C{L{convertToDate}}
  4628. - C{L{convertToDatetime}}
  4629. - C{L{stripHTMLTags}}
  4630. - C{L{upcaseTokens}}
  4631. - C{L{downcaseTokens}}
  4632. Example::
  4633. pyparsing_common.number.runTests('''
  4634. # any int or real number, returned as the appropriate type
  4635. 100
  4636. -100
  4637. +100
  4638. 3.14159
  4639. 6.02e23
  4640. 1e-12
  4641. ''')
  4642. pyparsing_common.fnumber.runTests('''
  4643. # any int or real number, returned as float
  4644. 100
  4645. -100
  4646. +100
  4647. 3.14159
  4648. 6.02e23
  4649. 1e-12
  4650. ''')
  4651. pyparsing_common.hex_integer.runTests('''
  4652. # hex numbers
  4653. 100
  4654. FF
  4655. ''')
  4656. pyparsing_common.fraction.runTests('''
  4657. # fractions
  4658. 1/2
  4659. -3/4
  4660. ''')
  4661. pyparsing_common.mixed_integer.runTests('''
  4662. # mixed fractions
  4663. 1
  4664. 1/2
  4665. -3/4
  4666. 1-3/4
  4667. ''')
  4668. import uuid
  4669. pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))
  4670. pyparsing_common.uuid.runTests('''
  4671. # uuid
  4672. 12345678-1234-5678-1234-567812345678
  4673. ''')
  4674. prints::
  4675. # any int or real number, returned as the appropriate type
  4676. 100
  4677. [100]
  4678. -100
  4679. [-100]
  4680. +100
  4681. [100]
  4682. 3.14159
  4683. [3.14159]
  4684. 6.02e23
  4685. [6.02e+23]
  4686. 1e-12
  4687. [1e-12]
  4688. # any int or real number, returned as float
  4689. 100
  4690. [100.0]
  4691. -100
  4692. [-100.0]
  4693. +100
  4694. [100.0]
  4695. 3.14159
  4696. [3.14159]
  4697. 6.02e23
  4698. [6.02e+23]
  4699. 1e-12
  4700. [1e-12]
  4701. # hex numbers
  4702. 100
  4703. [256]
  4704. FF
  4705. [255]
  4706. # fractions
  4707. 1/2
  4708. [0.5]
  4709. -3/4
  4710. [-0.75]
  4711. # mixed fractions
  4712. 1
  4713. [1]
  4714. 1/2
  4715. [0.5]
  4716. -3/4
  4717. [-0.75]
  4718. 1-3/4
  4719. [1.75]
  4720. # uuid
  4721. 12345678-1234-5678-1234-567812345678
  4722. [UUID('12345678-1234-5678-1234-567812345678')]
  4723. """
  4724. convertToInteger = tokenMap(int)
  4725. """
  4726. Parse action for converting parsed integers to Python int
  4727. """
  4728. convertToFloat = tokenMap(float)
  4729. """
  4730. Parse action for converting parsed numbers to Python float
  4731. """
  4732. integer = Word(nums).setName("integer").setParseAction(convertToInteger)
  4733. """expression that parses an unsigned integer, returns an int"""
  4734. hex_integer = Word(hexnums).setName("hex integer").setParseAction(tokenMap(int,16))
  4735. """expression that parses a hexadecimal integer, returns an int"""
  4736. signed_integer = Regex(r'[+-]?\d+').setName("signed integer").setParseAction(convertToInteger)
  4737. """expression that parses an integer with optional leading sign, returns an int"""
  4738. fraction = (signed_integer().setParseAction(convertToFloat) + '/' + signed_integer().setParseAction(convertToFloat)).setName("fraction")
  4739. """fractional expression of an integer divided by an integer, returns a float"""
  4740. fraction.addParseAction(lambda t: t[0]/t[-1])
  4741. mixed_integer = (fraction | signed_integer + Optional(Optional('-').suppress() + fraction)).setName("fraction or mixed integer-fraction")
  4742. """mixed integer of the form 'integer - fraction', with optional leading integer, returns float"""
  4743. mixed_integer.addParseAction(sum)
  4744. real = Regex(r'[+-]?\d+\.\d*').setName("real number").setParseAction(convertToFloat)
  4745. """expression that parses a floating point number and returns a float"""
  4746. sci_real = Regex(r'[+-]?\d+([eE][+-]?\d+|\.\d*([eE][+-]?\d+)?)').setName("real number with scientific notation").setParseAction(convertToFloat)
  4747. """expression that parses a floating point number with optional scientific notation and returns a float"""
  4748. # streamlining this expression makes the docs nicer-looking
  4749. number = (sci_real | real | signed_integer).streamline()
  4750. """any numeric expression, returns the corresponding Python type"""
  4751. fnumber = Regex(r'[+-]?\d+\.?\d*([eE][+-]?\d+)?').setName("fnumber").setParseAction(convertToFloat)
  4752. """any int or real number, returned as float"""
  4753. identifier = Word(alphas+'_', alphanums+'_').setName("identifier")
  4754. """typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')"""
  4755. ipv4_address = Regex(r'(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}').setName("IPv4 address")
  4756. "IPv4 address (C{0.0.0.0 - 255.255.255.255})"
  4757. _ipv6_part = Regex(r'[0-9a-fA-F]{1,4}').setName("hex_integer")
  4758. _full_ipv6_address = (_ipv6_part + (':' + _ipv6_part)*7).setName("full IPv6 address")
  4759. _short_ipv6_address = (Optional(_ipv6_part + (':' + _ipv6_part)*(0,6)) + "::" + Optional(_ipv6_part + (':' + _ipv6_part)*(0,6))).setName("short IPv6 address")
  4760. _short_ipv6_address.addCondition(lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8)
  4761. _mixed_ipv6_address = ("::ffff:" + ipv4_address).setName("mixed IPv6 address")
  4762. ipv6_address = Combine((_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).setName("IPv6 address")).setName("IPv6 address")
  4763. "IPv6 address (long, short, or mixed form)"
  4764. mac_address = Regex(r'[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}').setName("MAC address")
  4765. "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)"
  4766. @staticmethod
  4767. def convertToDate(fmt="%Y-%m-%d"):
  4768. """
  4769. Helper to create a parse action for converting parsed date string to Python datetime.date
  4770. Params -
  4771. - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%d"})
  4772. Example::
  4773. date_expr = pyparsing_common.iso8601_date.copy()
  4774. date_expr.setParseAction(pyparsing_common.convertToDate())
  4775. print(date_expr.parseString("1999-12-31"))
  4776. prints::
  4777. [datetime.date(1999, 12, 31)]
  4778. """
  4779. def cvt_fn(s,l,t):
  4780. try:
  4781. return datetime.strptime(t[0], fmt).date()
  4782. except ValueError as ve:
  4783. raise ParseException(s, l, str(ve))
  4784. return cvt_fn
  4785. @staticmethod
  4786. def convertToDatetime(fmt="%Y-%m-%dT%H:%M:%S.%f"):
  4787. """
  4788. Helper to create a parse action for converting parsed datetime string to Python datetime.datetime
  4789. Params -
  4790. - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%dT%H:%M:%S.%f"})
  4791. Example::
  4792. dt_expr = pyparsing_common.iso8601_datetime.copy()
  4793. dt_expr.setParseAction(pyparsing_common.convertToDatetime())
  4794. print(dt_expr.parseString("1999-12-31T23:59:59.999"))
  4795. prints::
  4796. [datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)]
  4797. """
  4798. def cvt_fn(s,l,t):
  4799. try:
  4800. return datetime.strptime(t[0], fmt)
  4801. except ValueError as ve:
  4802. raise ParseException(s, l, str(ve))
  4803. return cvt_fn
  4804. iso8601_date = Regex(r'(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))?)?').setName("ISO8601 date")
  4805. "ISO8601 date (C{yyyy-mm-dd})"
  4806. iso8601_datetime = Regex(r'(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ](?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z|[+-]\d\d:?\d\d)?').setName("ISO8601 datetime")
  4807. "ISO8601 datetime (C{yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)}) - trailing seconds, milliseconds, and timezone optional; accepts separating C{'T'} or C{' '}"
  4808. uuid = Regex(r'[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}').setName("UUID")
  4809. "UUID (C{xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx})"
  4810. _html_stripper = anyOpenTag.suppress() | anyCloseTag.suppress()
  4811. @staticmethod
  4812. def stripHTMLTags(s, l, tokens):
  4813. """
  4814. Parse action to remove HTML tags from web page HTML source
  4815. Example::
  4816. # strip HTML links from normal text
  4817. text = '<td>More info at the <a href="http://pyparsing.wikispaces.com">pyparsing</a> wiki page</td>'
  4818. td,td_end = makeHTMLTags("TD")
  4819. table_text = td + SkipTo(td_end).setParseAction(pyparsing_common.stripHTMLTags)("body") + td_end
  4820. print(table_text.parseString(text).body) # -> 'More info at the pyparsing wiki page'
  4821. """
  4822. return pyparsing_common._html_stripper.transformString(tokens[0])
  4823. _commasepitem = Combine(OneOrMore(~Literal(",") + ~LineEnd() + Word(printables, excludeChars=',')
  4824. + Optional( White(" \t") ) ) ).streamline().setName("commaItem")
  4825. comma_separated_list = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("comma separated list")
  4826. """Predefined expression of 1 or more printable words or quoted strings, separated by commas."""
  4827. upcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).upper()))
  4828. """Parse action to convert tokens to upper case."""
  4829. downcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).lower()))
  4830. """Parse action to convert tokens to lower case."""
  4831. if __name__ == "__main__":
  4832. selectToken = CaselessLiteral("select")
  4833. fromToken = CaselessLiteral("from")
  4834. ident = Word(alphas, alphanums + "_$")
  4835. columnName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)
  4836. columnNameList = Group(delimitedList(columnName)).setName("columns")
  4837. columnSpec = ('*' | columnNameList)
  4838. tableName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)
  4839. tableNameList = Group(delimitedList(tableName)).setName("tables")
  4840. simpleSQL = selectToken("command") + columnSpec("columns") + fromToken + tableNameList("tables")
  4841. # demo runTests method, including embedded comments in test string
  4842. simpleSQL.runTests("""
  4843. # '*' as column list and dotted table name
  4844. select * from SYS.XYZZY
  4845. # caseless match on "SELECT", and casts back to "select"
  4846. SELECT * from XYZZY, ABC
  4847. # list of column names, and mixed case SELECT keyword
  4848. Select AA,BB,CC from Sys.dual
  4849. # multiple tables
  4850. Select A, B, C from Sys.dual, Table2
  4851. # invalid SELECT keyword - should fail
  4852. Xelect A, B, C from Sys.dual
  4853. # incomplete command - should fail
  4854. Select
  4855. # invalid column name - should fail
  4856. Select ^^^ frox Sys.dual
  4857. """)
  4858. pyparsing_common.number.runTests("""
  4859. 100
  4860. -100
  4861. +100
  4862. 3.14159
  4863. 6.02e23
  4864. 1e-12
  4865. """)
  4866. # any int or real number, returned as float
  4867. pyparsing_common.fnumber.runTests("""
  4868. 100
  4869. -100
  4870. +100
  4871. 3.14159
  4872. 6.02e23
  4873. 1e-12
  4874. """)
  4875. pyparsing_common.hex_integer.runTests("""
  4876. 100
  4877. FF
  4878. """)
  4879. import uuid
  4880. pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))
  4881. pyparsing_common.uuid.runTests("""
  4882. 12345678-1234-5678-1234-567812345678
  4883. """)