class Ronn::RoffFilter

Filter for converting HTML to ROFF

Constants

HTML_ROFF_ENTITIES

Public Class Methods

new(html_fragment, name, section, tagline, manual = nil, version = nil, date = nil) click to toggle source

Convert Ronn HTML to roff. The html input is an HTML fragment, not a complete document

   # File lib/ronn/roff.rb
11 def initialize(html_fragment, name, section, tagline, manual = nil,
12                version = nil, date = nil)
13   @buf = []
14   title_heading name, section, tagline, manual, version, date
15   doc = Nokogiri::HTML.fragment(html_fragment)
16   remove_extraneous_elements! doc
17   normalize_whitespace! doc
18   block_filter doc
19   write "\n"
20 end

Public Instance Methods

to_s() click to toggle source
   # File lib/ronn/roff.rb
22 def to_s
23   @buf.join.gsub(/[ \t]+$/, '')
24 end

Protected Instance Methods

block_filter(node) click to toggle source
    # File lib/ronn/roff.rb
 88 def block_filter(node)
 89   return if node.nil?
 90 
 91   if node.is_a?(Array) || node.is_a?(Nokogiri::XML::NodeSet)
 92     node.each { |ch| block_filter(ch) }
 93 
 94   elsif node.document? || node.fragment?
 95     block_filter(node.children)
 96 
 97   elsif node.text?
 98     # This hack is necessary to support mixed-child-type dd's
 99     inline_filter(node)
100 
101   elsif node.elem?
102     case node.name
103     when 'html', 'body'
104       block_filter(node.children)
105     when 'div'
106       block_filter(node.children)
107     when 'h1'
108       # discard
109       nop
110     when 'h2'
111       macro 'SH', quote(escape(node.inner_html))
112     when 'h3'
113       macro 'SS', quote(escape(node.inner_html))
114 
115     when 'p'
116       prev = previous(node)
117       if prev && %w[dd li blockquote].include?(node.parent.name)
118         macro 'IP'
119       elsif prev && !%w[h1 h2 h3].include?(prev.name)
120         macro 'P'
121       elsif node.previous&.text?
122         macro 'IP'
123       end
124       inline_filter(node.children)
125 
126     when 'blockquote'
127       prev = previous(node)
128       indent = prev.nil? || !%w[h1 h2 h3].include?(prev.name)
129       macro 'IP', %w["" 4] if indent
130       block_filter(node.children)
131       macro 'IP', %w["" 0] if indent
132 
133     when 'pre'
134       prev = previous(node)
135       indent = prev.nil? || !%w[h1 h2 h3].include?(prev.name)
136       macro 'IP', %w["" 4] if indent
137       macro 'nf'
138       # HACK: strip an initial \n to avoid extra spacing
139       if node.children && node.children[0].text?
140         text = node.children[0].to_s
141         node.children[0].replace(text[1..-1]) if text.start_with? "\n"
142       end
143       inline_filter(node.children)
144       macro 'fi'
145       macro 'IP', %w["" 0] if indent
146 
147     when 'dl'
148       macro 'TP'
149       block_filter(node.children)
150     when 'dt'
151       prev = previous(node)
152       macro 'TP' unless prev.nil?
153       inline_filter(node.children)
154       write "\n"
155     when 'dd'
156       if node.at('p')
157         block_filter(node.children)
158       else
159         inline_filter(node.children)
160       end
161       write "\n"
162 
163     when 'ol', 'ul'
164       block_filter(node.children)
165       macro 'IP', %w["" 0]
166     when 'li'
167       case node.parent.name
168       when 'ol'
169         macro 'IP', %W["#{node.parent.children.index(node) + 1}." 4]
170       when 'ul'
171         macro 'IP', ['"\\[ci]"', '4']
172       else
173         raise "List element found as a child of non-list parent element: #{node.inspect}"
174       end
175       if node.at('p,ol,ul,dl,div')
176         block_filter(node.children)
177       else
178         inline_filter(node.children)
179       end
180       write "\n"
181 
182     when 'span', 'code', 'b', 'strong', 'kbd', 'samp', 'var', 'em', 'i',
183          'u', 'br', 'a'
184       inline_filter(node)
185 
186     when 'table'
187       macro 'TS'
188       write "allbox;\n"
189       block_filter(node.children)
190       macro 'TE'
191     when 'thead'
192       # Convert to format section and first row
193       tr = node.children[0]
194       header_contents = []
195       cell_formats = []
196       tr.children.each do |th|
197         style = th['style']
198         cell_format = case style
199                       when 'text-align:left;'
200                         'l'
201                       when 'text-align:right;'
202                         'r'
203                       when 'text-align:center;'
204                         'c'
205                       else
206                         'l'
207                       end
208         header_contents << th.inner_html
209         cell_formats << cell_format
210       end
211       write cell_formats.join(' ') + ".\n"
212       write header_contents.join("\t") + "\n"
213     when 'th'
214       raise 'internal error: unexpected <th> element'
215     when 'tbody'
216       # Let the 'tr' handle it
217       block_filter(node.children)
218     when 'tr'
219       # Convert to a table data row
220       node.children.each do |child|
221         block_filter(child)
222         write "\t"
223       end
224       write "\n"
225     when 'td'
226       inline_filter(node.children)
227 
228     else
229       warn 'unrecognized block tag: %p', node.name
230     end
231 
232   elsif node.is_a?(Nokogiri::XML::DTD)
233     # Ignore
234     nop
235   elsif node.is_a?(Nokogiri::XML::Comment)
236     # Ignore
237     nop
238   else
239     raise "unexpected node: #{node.inspect}"
240   end
241 end
comment(text) click to toggle source
    # File lib/ronn/roff.rb
371 def comment(text)
372   writeln %(.\\" #{text})
373 end
escape(text) click to toggle source
    # File lib/ronn/roff.rb
334 def escape(text)
335   return text.to_s if text.nil? || text.empty?
336   ent = HTML_ROFF_ENTITIES
337   text = text.dup
338   text.gsub!(/&#x([0-9A-Fa-f]+);/) { $1.to_i(16).chr }  # hex entities
339   text.gsub!(/&#(\d+);/) { $1.to_i.chr }                # dec entities
340   text.gsub!('\\', '\e')                                # backslash
341   text.gsub!('...', '\|.\|.\|.')                        # ellipses
342   text.gsub!(/['.-]/) { |m| "\\#{m}" }                  # control chars
343   ent.each do |key, val|
344     text.gsub!(key, val)
345   end
346   text.gsub!('&amp;', '&')                              # amps
347   text
348 end
inline_filter(node) click to toggle source
    # File lib/ronn/roff.rb
243 def inline_filter(node)
244   return unless node # is an empty node
245 
246   if node.is_a?(Array) || node.is_a?(Nokogiri::XML::NodeSet)
247     node.each { |ch| inline_filter(ch) }
248 
249   elsif node.text?
250     text = node.to_html.dup
251     write escape(text)
252 
253   elsif node.elem?
254     case node.name
255     when 'span'
256       inline_filter(node.children)
257     when 'code'
258       if child_of?(node, 'pre')
259         inline_filter(node.children)
260       else
261         write '\fB'
262         inline_filter(node.children)
263         write '\fR'
264       end
265 
266     when 'b', 'strong', 'kbd', 'samp'
267       write '\fB'
268       inline_filter(node.children)
269       write '\fR'
270 
271     when 'var', 'em', 'i', 'u'
272       write '\fI'
273       inline_filter(node.children)
274       write '\fR'
275 
276     when 'br'
277       macro 'br'
278 
279     when 'a'
280       if node.classes.include?('man-ref')
281         inline_filter(node.children)
282       elsif node.has_attribute?('data-bare-link')
283         write '\fI'
284         inline_filter(node.children)
285         write '\fR'
286       else
287         inline_filter(node.children)
288         write ' '
289         write '\fI'
290         write escape(node.attributes['href'].content)
291         write '\fR'
292       end
293 
294     when 'sup'
295       # This superscript equivalent is a big ugly hack.
296       write '^('
297       inline_filter(node.children)
298       write ')'
299 
300     else
301       warn 'unrecognized inline tag: %p', node.name
302     end
303 
304   else
305     raise "unexpected node: #{node.inspect}"
306   end
307 end
macro(name, value = nil) click to toggle source
    # File lib/ronn/roff.rb
313 def macro(name, value = nil)
314   maybe_new_line
315   writeln ".#{[name, value].compact.join(' ')}"
316 end
maybe_new_line() click to toggle source
    # File lib/ronn/roff.rb
309 def maybe_new_line
310   write "\n" if @buf.last && @buf.last[-1] != "\n"
311 end
nop() click to toggle source
    # File lib/ronn/roff.rb
379 def nop
380   # Do nothing
381 end
normalize_whitespace!(node) click to toggle source
   # File lib/ronn/roff.rb
52 def normalize_whitespace!(node)
53   if node.is_a?(Array) || node.is_a?(Nokogiri::XML::NodeSet)
54     node.to_a.dup.each { |ch| normalize_whitespace! ch }
55   elsif node.text?
56     preceding = node.previous
57     following = node.next
58     content = node.content.gsub(/[\n ]+/m, ' ')
59     if preceding.nil? || block_element?(preceding.name) ||
60        preceding.name == 'br'
61       content.lstrip!
62     end
63     if following.nil? || block_element?(following.name) ||
64        following.name == 'br'
65       content.rstrip!
66     end
67     if content.empty?
68       node.remove
69     else
70       node.content = content
71     end
72   elsif node.elem? && node.name == 'pre'
73     # stop traversing
74   elsif node.elem? && node.children
75     normalize_whitespace! node.children
76   elsif node.elem?
77     # element has no children
78   elsif node.document? || node.fragment?
79     normalize_whitespace! node.children
80   elsif node.is_a?(Nokogiri::XML::DTD) || node.is_a?(Nokogiri::XML::Comment)
81     # ignore
82     nop
83   else
84     warn 'unexpected node during whitespace normalization: %p', node
85   end
86 end
previous(node) click to toggle source
   # File lib/ronn/roff.rb
28 def previous(node)
29   return unless node.respond_to?(:previous)
30   prev = node.previous
31   prev = prev.previous until prev.nil? || prev.elem?
32   prev
33 end
quote(text) click to toggle source
    # File lib/ronn/roff.rb
350 def quote(text)
351   "\"#{text.gsub(/"/, '\\"')}\""
352 end
remove_extraneous_elements!(doc) click to toggle source
   # File lib/ronn/roff.rb
46 def remove_extraneous_elements!(doc)
47   doc.traverse do |node|
48     node.parent.children.delete(node) if node.comment?
49   end
50 end
title_heading(name, section, _tagline, manual, version, date) click to toggle source
   # File lib/ronn/roff.rb
35 def title_heading(name, section, _tagline, manual, version, date)
36   comment "generated with Ronn-NG/v#{Ronn.version}"
37   comment "http://github.com/apjanke/ronn-ng/tree/#{Ronn.revision}"
38   return if name.nil?
39   if manual
40     macro 'TH', %("#{escape(name.upcase)}" "#{section}" "#{date.strftime('%B %Y')}" "#{version}" "#{manual}")
41   else
42     macro 'TH', %("#{escape(name.upcase)}" "#{section}" "#{date.strftime('%B %Y')}" "#{version}")
43   end
44 end
warn(text, *args) click to toggle source
    # File lib/ronn/roff.rb
375 def warn(text, *args)
376   Kernel.warn format("warn: #{text}", args)
377 end
write(text) click to toggle source

write text to output buffer

    # File lib/ronn/roff.rb
355 def write(text)
356   return if text.nil? || text.empty?
357   # lines cannot start with a '.'. insert zero-width character before.
358   text = text.gsub(/\n\\\./, "\n\\\\&\\.")
359   buf_ends_in_newline = @buf.last && @buf.last[-1] == "\n"
360   @buf << '\&' if text[0, 2] == '\.' && buf_ends_in_newline
361   @buf << text
362 end
writeln(text) click to toggle source

write text to output buffer on a new line.

    # File lib/ronn/roff.rb
365 def writeln(text)
366   maybe_new_line
367   write text
368   write "\n"
369 end