# /^e/ { printf "s/%s %b/\&%s;/g\n", $3, utf($3), $7 } # /^e/ { printf "s/&%s;/%s/g\n", $7, utf($3) } # # why doesn't this work ? # /usr/local/plan9/bin/awk ' BEGIN { FS="[^a-z0-9]" } # /^e/ { print $3 " " utf($3) " " $7 } # ' /var/www/werc/bin/contrib/to_html_entities.awk BEGIN { e[utf(39)]="apos" e[utf(34)]="quot" e[utf(38)]="amp" e[utf(60)]="lt" e[utf(62)]="gt" e[utf(160)]="nbsp" e[utf(161)]="iexcl" e[utf(162)]="cent" e[utf(163)]="pound" e[utf(164)]="curren" e[utf(165)]="yen" e[utf(166)]="brvbar" e[utf(167)]="sect" e[utf(168)]="uml" e[utf(169)]="copy" e[utf(170)]="ordf" e[utf(171)]="laquo" e[utf(172)]="not" e[utf(173)]="shy" e[utf(174)]="reg" e[utf(175)]="macr" e[utf(176)]="deg" e[utf(177)]="plusmn" e[utf(178)]="sup2" e[utf(179)]="sup3" e[utf(180)]="acute" e[utf(181)]="micro" e[utf(182)]="para" e[utf(183)]="middot" e[utf(184)]="cedil" e[utf(185)]="sup1" e[utf(186)]="ordm" e[utf(187)]="raquo" e[utf(188)]="frac14" e[utf(189)]="frac12" e[utf(190)]="frac34" e[utf(191)]="iquest" e[utf(192)]="Agrave" e[utf(193)]="Aacute" e[utf(194)]="Acirc" e[utf(195)]="Atilde" e[utf(196)]="Auml" e[utf(197)]="Aring" e[utf(198)]="AElig" e[utf(199)]="Ccedil" e[utf(200)]="Egrave" e[utf(201)]="Eacute" e[utf(202)]="Ecirc" e[utf(203)]="Euml" e[utf(204)]="Igrave" e[utf(205)]="Iacute" e[utf(206)]="Icirc" e[utf(207)]="Iuml" e[utf(208)]="ETH" e[utf(209)]="Ntilde" e[utf(210)]="Ograve" e[utf(211)]="Oacute" e[utf(212)]="Ocirc" e[utf(213)]="Otilde" e[utf(214)]="Ouml" e[utf(215)]="times" e[utf(216)]="Oslash" e[utf(217)]="Ugrave" e[utf(218)]="Uacute" e[utf(219)]="Ucirc" e[utf(220)]="Uuml" e[utf(221)]="Yacute" e[utf(222)]="THORN" e[utf(223)]="szlig" e[utf(224)]="agrave" e[utf(225)]="aacute" e[utf(226)]="acirc" e[utf(227)]="atilde" e[utf(228)]="auml" e[utf(229)]="aring" e[utf(230)]="aelig" e[utf(231)]="ccedil" e[utf(232)]="egrave" e[utf(233)]="eacute" e[utf(234)]="ecirc" e[utf(235)]="euml" e[utf(236)]="igrave" e[utf(237)]="iacute" e[utf(238)]="icirc" e[utf(239)]="iuml" e[utf(240)]="eth" e[utf(241)]="ntilde" e[utf(242)]="ograve" e[utf(243)]="oacute" e[utf(244)]="ocirc" e[utf(245)]="otilde" e[utf(246)]="ouml" e[utf(247)]="divide" e[utf(248)]="oslash" e[utf(249)]="ugrave" e[utf(250)]="uacute" e[utf(251)]="ucirc" e[utf(252)]="uuml" e[utf(253)]="yacute" e[utf(254)]="thorn" e[utf(255)]="yuml" e[utf(338)]="OElig" e[utf(339)]="oelig" e[utf(352)]="Scaron" e[utf(353)]="scaron" e[utf(376)]="Yuml" e[utf(402)]="fnof" e[utf(710)]="circ" e[utf(732)]="tilde" e[utf(913)]="Alpha" e[utf(914)]="Beta" e[utf(915)]="Gamma" e[utf(916)]="Delta" e[utf(917)]="Epsilon" e[utf(918)]="Zeta" e[utf(919)]="Eta" e[utf(920)]="Theta" e[utf(921)]="Iota" e[utf(922)]="Kappa" e[utf(923)]="Lambda" e[utf(924)]="Mu" e[utf(925)]="Nu" e[utf(926)]="Xi" e[utf(927)]="Omicron" e[utf(928)]="Pi" e[utf(929)]="Rho" e[utf(931)]="Sigma" e[utf(932)]="Tau" e[utf(933)]="Upsilon" e[utf(934)]="Phi" e[utf(935)]="Chi" e[utf(936)]="Psi" e[utf(937)]="Omega" e[utf(945)]="alpha" e[utf(946)]="beta" e[utf(947)]="gamma" e[utf(948)]="delta" e[utf(949)]="epsilon" e[utf(950)]="zeta" e[utf(951)]="eta" e[utf(952)]="theta" e[utf(953)]="iota" e[utf(954)]="kappa" e[utf(955)]="lambda" e[utf(956)]="mu" e[utf(957)]="nu" e[utf(958)]="xi" e[utf(959)]="omicron" e[utf(960)]="pi" e[utf(961)]="rho" e[utf(962)]="sigmaf" e[utf(963)]="sigma" e[utf(964)]="tau" e[utf(965)]="upsilon" e[utf(966)]="phi" e[utf(967)]="chi" e[utf(968)]="psi" e[utf(969)]="omega" e[utf(977)]="thetasym" e[utf(978)]="upsih" e[utf(982)]="piv" e[utf(8194)]="ensp" e[utf(8195)]="emsp" e[utf(8201)]="thinsp" e[utf(8204)]="zwnj" e[utf(8205)]="zwj" e[utf(8206)]="lrm" e[utf(8207)]="rlm" e[utf(8211)]="ndash" e[utf(8212)]="mdash" e[utf(8216)]="lsquo" e[utf(8217)]="rsquo" e[utf(8218)]="sbquo" e[utf(8220)]="ldquo" e[utf(8221)]="rdquo" e[utf(8222)]="bdquo" e[utf(8224)]="dagger" e[utf(8225)]="Dagger" e[utf(8226)]="bull" e[utf(8230)]="hellip" e[utf(8240)]="permil" e[utf(8242)]="prime" e[utf(8243)]="Prime" e[utf(8249)]="lsaquo" e[utf(8250)]="rsaquo" e[utf(8254)]="oline" e[utf(8260)]="frasl" e[utf(8364)]="euro" e[utf(8465)]="image" e[utf(8472)]="weierp" e[utf(8476)]="real" e[utf(8482)]="trade" e[utf(8501)]="alefsym" e[utf(8592)]="larr" e[utf(8593)]="uarr" e[utf(8594)]="rarr" e[utf(8595)]="darr" e[utf(8596)]="harr" e[utf(8629)]="crarr" e[utf(8656)]="lArr" e[utf(8657)]="uArr" e[utf(8658)]="rArr" e[utf(8659)]="dArr" e[utf(8660)]="hArr" e[utf(8704)]="forall" e[utf(8706)]="part" e[utf(8707)]="exist" e[utf(8709)]="empty" e[utf(8711)]="nabla" e[utf(8712)]="isin" e[utf(8713)]="notin" e[utf(8715)]="ni" e[utf(8719)]="prod" e[utf(8721)]="sum" e[utf(8722)]="minus" e[utf(8727)]="lowast" e[utf(8730)]="radic" e[utf(8733)]="prop" e[utf(8734)]="infin" e[utf(8736)]="ang" e[utf(8743)]="and" e[utf(8744)]="or" e[utf(8745)]="cap" e[utf(8746)]="cup" e[utf(8747)]="int" e[utf(8756)]="there4" e[utf(8764)]="sim" e[utf(8773)]="cong" e[utf(8776)]="asymp" e[utf(8800)]="ne" e[utf(8801)]="equiv" e[utf(8804)]="le" e[utf(8805)]="ge" e[utf(8834)]="sub" e[utf(8835)]="sup" e[utf(8836)]="nsub" e[utf(8838)]="sube" e[utf(8839)]="supe" e[utf(8853)]="oplus" e[utf(8855)]="otimes" e[utf(8869)]="perp" e[utf(8901)]="sdot" e[utf(8968)]="lceil" e[utf(8969)]="rceil" e[utf(8970)]="lfloor" e[utf(8971)]="rfloor" e[utf(9001)]="lang" e[utf(9002)]="rang" e[utf(9674)]="loz" e[utf(9824)]="spades" e[utf(9827)]="clubs" e[utf(9829)]="hearts" e[utf(9830)]="diams" } { for(i=1; i <= length; i++) { c = substr($0, i, 1) if(e[c]) printf "&%s;", e[c] else printf "%c", c } printf "\n" }