150d49372f
### What changes were proposed in this pull request? This PR proposes to use Pygment compatible format by Rouge. As of https://github.com/apache/spark/pull/26521, we use Rouge instead of Pygment wrapper in Ruby. Rouge claims Pygment compatibility; and we should output as Pygment does. ```ruby Rouge::Formatters::HTMLPygments.new(formatter) ``` wraps codes with `<div class="highlight"><pre>...` properly. ### Why are the changes needed? To keep the documentation pretty and not broken. ### Does this PR introduce any user-facing change? Theoretically, no. This is rather a regression fix in documentation (that happens only by https://github.com/apache/spark/pull/26521 in master). See the malformed doc in preview - https://spark.apache.org/docs/3.0.0-preview2/sql-pyspark-pandas-with-arrow.html ### How was this patch tested? Manually built the doc. **Before:** ![Screen Shot 2020-01-13 at 10 21 28 AM](https://user-images.githubusercontent.com/6477701/72229159-ba766a80-35ef-11ea-9a5d-9583448e7c1c.png) **After:** ![Screen Shot 2020-01-13 at 10 26 33 AM](https://user-images.githubusercontent.com/6477701/72229157-b34f5c80-35ef-11ea-8b3a-492e8aa0f82a.png) Closes #27182 from HyukjinKwon/SPARK-28752-followup. Authored-by: HyukjinKwon <gurwls223@apache.org> Signed-off-by: HyukjinKwon <gurwls223@apache.org>
127 lines
4.3 KiB
Ruby
127 lines
4.3 KiB
Ruby
#
|
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
|
# contributor license agreements. See the NOTICE file distributed with
|
|
# this work for additional information regarding copyright ownership.
|
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
# (the "License"); you may not use this file except in compliance with
|
|
# the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
#
|
|
|
|
require 'liquid'
|
|
require 'rouge'
|
|
|
|
module Jekyll
|
|
class IncludeExampleTag < Liquid::Tag
|
|
|
|
def initialize(tag_name, markup, tokens)
|
|
@markup = markup
|
|
super
|
|
end
|
|
|
|
def render(context)
|
|
site = context.registers[:site]
|
|
config_dir = '../examples/src/main'
|
|
@code_dir = File.join(site.source, config_dir)
|
|
|
|
clean_markup = @markup.strip
|
|
|
|
parts = clean_markup.strip.split(' ')
|
|
if parts.length > 1 then
|
|
@snippet_label = ':' + parts[0]
|
|
snippet_file = parts[1]
|
|
else
|
|
@snippet_label = ''
|
|
snippet_file = parts[0]
|
|
end
|
|
|
|
@file = File.join(@code_dir, snippet_file)
|
|
@lang = snippet_file.split('.').last
|
|
|
|
begin
|
|
code = File.open(@file).read.encode("UTF-8")
|
|
rescue => e
|
|
# We need to explicitly exit on exceptions here because Jekyll will silently swallow
|
|
# them, leading to silent build failures (see https://github.com/jekyll/jekyll/issues/5104)
|
|
puts(e)
|
|
puts(e.backtrace)
|
|
exit 1
|
|
end
|
|
code = select_lines(code).strip
|
|
|
|
formatter = Rouge::Formatters::HTMLPygments.new(Rouge::Formatters::HTML.new)
|
|
lexer = Rouge::Lexer.find(@lang)
|
|
rendered_code = formatter.format(lexer.lex(code))
|
|
|
|
hint = "<div><small>Find full example code at " \
|
|
"\"examples/src/main/#{snippet_file}\" in the Spark repo.</small></div>"
|
|
|
|
rendered_code + hint
|
|
end
|
|
|
|
# Trim the code block so as to have the same indention, regardless of their positions in the
|
|
# code file.
|
|
def trim_codeblock(lines)
|
|
# Select the minimum indention of the current code block.
|
|
min_start_spaces = lines
|
|
.select { |l| l.strip.size !=0 }
|
|
.map { |l| l[/\A */].size }
|
|
.min
|
|
|
|
lines.map { |l| l.strip.size == 0 ? l : l[min_start_spaces .. -1] }
|
|
end
|
|
|
|
# Select lines according to labels in code. Currently we use "$example on$" and "$example off$"
|
|
# as labels. Note that code blocks identified by the labels should not overlap.
|
|
def select_lines(code)
|
|
lines = code.each_line.to_a
|
|
|
|
# Select the array of start labels from code.
|
|
startIndices = lines
|
|
.each_with_index
|
|
.select { |l, i| l.include? "$example on#{@snippet_label}$" }
|
|
.map { |l, i| i }
|
|
|
|
# Select the array of end labels from code.
|
|
endIndices = lines
|
|
.each_with_index
|
|
.select { |l, i| l.include? "$example off#{@snippet_label}$" }
|
|
.map { |l, i| i }
|
|
|
|
raise "Start indices amount is not equal to end indices amount, see #{@file}." \
|
|
unless startIndices.size == endIndices.size
|
|
|
|
raise "No code is selected by include_example, see #{@file}." \
|
|
if startIndices.size == 0
|
|
|
|
# Select and join code blocks together, with a space line between each of two continuous
|
|
# blocks.
|
|
lastIndex = -1
|
|
result = ""
|
|
startIndices.zip(endIndices).each do |start, endline|
|
|
raise "Overlapping between two example code blocks are not allowed, see #{@file}." \
|
|
if start <= lastIndex
|
|
raise "$example on$ should not be in the same line with $example off$, see #{@file}." \
|
|
if start == endline
|
|
lastIndex = endline
|
|
range = Range.new(start + 1, endline - 1)
|
|
trimmed = trim_codeblock(lines[range])
|
|
# Filter out possible example tags of overlapped labels.
|
|
taggs_filtered = trimmed.select { |l| !l.include? '$example ' }
|
|
result += taggs_filtered.join
|
|
result += "\n"
|
|
end
|
|
result
|
|
end
|
|
end
|
|
end
|
|
|
|
Liquid::Template.register_tag('include_example', Jekyll::IncludeExampleTag)
|