html2txt in E: Sanity Check

Mark S. Miller markm@erights.org
Thu, 15 Apr 1999 21:50:21 -0700


At 06:03 PM 4/15/99 , Mark S. Miller wrote:
>define html2txt(html) {
>    define result := ""
>    while (html != "") {
>        switch (html) {
>            match rx`(?ms)(@text.*?)(@thing(<.*?>|&.*?;))(@rest.*)` {
>                result += text
>                switch (thing) {
>                    match `<@_>` {}
>                    match `&@entity;` {
>                        result += entity2txt(entity)
>                    }
>                }
>                html := rest
>            }
>            match _ {
>                result += html
>                html := ""
>            }
>        }
>    }
>    result
>}

Sorry, I'm an idiot.  Here's a better version:


define html2txt(html) {
    define result := ""
    while (html =~ rx`(?ms)(@text.*?)(@thing(<.*?>|&.*?;))(@rest.*)` {
	result += text
	switch (thing) {
	    match `<@_>` {}
	    match `&@entity;` {
		result += entity2txt(entity)
	    }
	}
	html := rest
    }
    result + html
}