; #### RTF2XML version 0.9 ; #### Copyright (C) 1997-2000 Rick Geimer ; #### This program is free software; you can redistribute it and/or ; #### modify it under the terms of the GNU General Public Licence ; #### as published by the Free Software Foundation; either version ; #### 2 of the licence, or (at your option) any later version. ; #### This program is distributed in the hope that it will be useful, ; #### but WITHOUT ANY WARRANTY; without even the implied warranty of ; #### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See ; #### the GNU General Public Licence for more details. ; #### You should have received a copy of the GNU General Public Licence ; #### along with this program; if not, write to the Free Software ; #### Foundation, Inc., 675 Mass Ave, Cambridge, MA, 02139, USA. ; #### The above licence is also available online at "http://www.gnu.org/". ; #### Rick Geimer ; #### rtf2xml@xmeta.com ; #### 2401 Huran Drive ; #### San Jose, CA, 95122 ; #### USA include "patmacs.xin" include "macros.xin" include "vars.xin" define function get-input() as output-to (sgml and sgml-log) when sgml-log is attached output (file "%g(resource-path)rtfdoc.dec") || '%n' || "%n" repeat over #command-line-names using group parse-rtf submit file #command-line-names again process-start put #error "RTF2XML version 0.9" || "%n" do when active output-sgml activate escape-unicode done do when active (escape-unicode or no-unicode) activate ansi-stylenames done process open sgml-log with domain-free as "%g(sgml-log)" when sgml-log is attached do sgml-parse document scan input get-input() using group parse-rtf output "%c" done ; FIND RULES group #implied find "\" ["\{}"] = rtf-name output "%x(rtf-name)" group parse-rtf find "\" value-elements-pattern=tag-pat "-"? digit+=num-pat value-rtf-end splb output "<%ux(tag-pat) VALUE=%"%x(num-pat)%"/>" find "\" static-elements-pattern=tag-pat static-rtf-end splb output "<%ux(tag-pat)/>" find "\subdocument" digit+=file-id-pat value-rtf-end splb when active link-subdocs output "" find "\subdocument" digit+=file-id-pat value-rtf-end splb when not active link-subdocs using file-name-table key "%x(file-id-pat)" submit file "%g(file-name-table)" when file "%g(file-name-table)" exists using file-table key "%x(file-id-pat)" submit file "%g(file-table)" when file "%g(file-table)" exists and not file "%g(file-name-table)" exists using file-table key "%x(file-id-pat)" output "" when file "%g(file-table)" exists and not file "%g(file-name-table)" exists find "\uc" digit+=uni-bytes-pat value-rtf-end splb reset uni-bytes to uni-bytes-pat find ("\u" digit+ splb (("\'" [digit or "ABCDEFabcdef"]{2} splb) or [any-text except "\{}"] or ("\\" or "\{" or "\}" splb)){"%d(uni-bytes)"} )+ = unicode-string output "%x(unicode-string)" find "\" rtf-ents => entity-name static-rtf-end splb output entity-table key entity-name find "\" ((any-value-rtf value-rtf-end) or (any-static-rtf static-rtf-end) ) = rtf-name splb output "" find "{" any-line-break? lookahead not "\*" when element is FONTTBL output "%n" find ("\f" digit+ value-rtf-end splb [any except ";"]+ ";")=fontspec-pattern when element is FONTTBL output "%n" submit fontspec-pattern output "" find ("{" any-line-break? "\*" splb lookahead ("\cs" digit+) or "{" any-line-break? lookahead not "\*" ) when element is STYLESHEET output "%n" find ";" when element is (FONTSPEC or STYLSPEC or COLORTBL) find "{" any-line-break? "\" static-group-pattern = tag-pat static-rtf-end splb output "<%ux(tag-pat)>" find "{" any-line-break? "\" value-group-pattern => tag-pat digit+=value-pat value-rtf-end splb output "<%ux(tag-pat) VALUE=%"%x(value-pat)%">" find "{" any-line-break? "\*" splb "\" static-dest-pattern = tag-pat static-rtf-end splb output "<%ux(tag-pat)>" find "\'" ([digit or "ABCDEFabcdef"]{2})=val-pat output "&char-%x(val-pat);" find ( "<" or ">" or "&" or "'" or '"' or "\~" or ("\" ("-" or "_")) ) => entity-name output entity-table key entity-name find "\ " ; remove useless RTF tags find "%0#" ; remove any null characters find (["%1#" to "%8#"] or "%11#" or "%12#" or ["%14#" to "%31#"] or "%127#" or "%255#")=the-char local counter ascii-val reset ascii-val to the-char binary 0 output "&char-%16rd(ascii-val);" find "{" any-line-break? "\rtf" digit+=rtf-vers value-rtf-end splb output "%n" find "{" any-line-break? "\*" splb "\" letter+ => dest-name "-"? digit+=val-pat value-rtf-end splb output "" find "{" any-line-break? "\*" splb "\" letter+ => dest-name static-rtf-end splb output "" find ( "{" or "}" ) = group-tag-pat output group-tags key group-tag-pat find any-line-break when not open element is PICT group #implied include "elements.xin"