# simple-prettyprinter.pkg
#
# A very simple package for "prettyprinting" -- formatting
# source code (or whatever) in reasonably well indented fashion.
#
# It is derived from the prettyprinter included with MLRISC,
# extensively overhauled May 2011 by Cynbe [1].
# Compiled by:
#
src/lib/std/standard.libstipulate
package f8b = eight_byte_float; # eight_byte_float is from
src/lib/std/eight-byte-float.pkgherein
package simple_prettyprinter
: Simple_Prettyprinter # Simple_Prettyprinter is from
src/lib/prettyprint/simple/simple-prettyprinter.api {
# This datastructure is public:
#
Prettyprint_Expression # Holds a collection of pretty-printed text.
#
# Leaf values for prettyprint expression trees:
#
= INT Int # 31-bit signed integer.
| INT1 one_word_int::Int
# 32-bit signed integer.
| INTEGER multiword_int::Int
# Indefinition-precision signed integer
| UNT Unt
# 31-bit unsigned integer.
| UNT1 one_word_unt::Unt
# 32-bit unsigned integer.
| FLOAT Float
# 64-bit floating-point number.
| BOOL Bool
#
| CHAR Char
#
| STRING String
#
| NOP
# Placeholder; produces no text output whatever.
# Leaf values containing identifiers like "foo" or "=".
#
# The critical difference between 'ALPHABETIC' and 'PUNCTUATION'
# is just that 'ALPHABETIC' will automatically insert a leading blank
# if it follows an alphabetic, number or string, but 'PUNCTUATION'
# does no such automatic blank insertion. (Sometimes we use
# 'alphabetic' to force blank insertion even on non-alphabetic
# identifiers like "=".)
#
| ALPHABETIC String
# Append a blank if preceding token was an alphabetic, number or string token, then append given string.
| PUNCTUATION String
# Append given string to buffer.
# Explicit whitespace:
#
| MAYBE_BLANK
# Insert a blank, except do nothing if previous token was a blank or newline. Was 'sp'
| NEWLINE
# Start new line, set 'current column' to zero.
# Concatenating multiple expressions
# to make a single expression:
#
| CONS (Prettyprint_Expression, Prettyprint_Expression)
# Print two expressions in given order. Clients typically assign the infix op ++ as a synonym for this.
| CAT List( Prettyprint_Expression )
# Print list of expressions in given order.
# Indented blocks:
#
| ENTER_INDENTED_BLOCK
# Start block indented four spaces relative to enclosing block.
| ENTER_DEEPLY_INDENTED_BLOCK
# Start block indented to current column. was enter_intented_block'
| LEAVE_INDENTED_BLOCK
# Exit block started by either of above two commands.
| INDENT
# Space over to column for innermost indented block.
| INDENT_OFFSET Int
# Space over to column for innermost indented block, plus 'Int' (indent_offset). was indent'
| SET_WRAP_COLUMN Int
# Defaults to 80.
| INDENTED_BLOCK Prettyprint_Expression
# == ENTER_INDENTED_BLOCK ++ prettyprint_expression ++ LEAVE_INDENTED_BLOCK;
| INDENTED_LINE Prettyprint_Expression
# == INDENT ++ prettyprint_expression ++ NEWLINE;
#
| MAYBE_LINEWRAP
# If current column + right_margin > wrap column, insert newline and space over to current indent level + indent_offset.
{ right_margin: Int,
indent_offset: Int
}
# User-defined modes. The modestack
# starts out as ["default"] and is entirely
# for client use; the internal prettyprinter
# package code makes no use of it. This is
# currently heavily used (only) in
#
src/lib/compiler/back/low/tools/adl-syntax/adl-raw-syntax-unparser.pkg # where we print items differently
# in "code" vs "default" modes:
#
| PUSH_MODE String
# Push arbitrary user string on user-controlled mode stack.
| POP_MODE
# Pop top entry from modestack; throws DIE exception if modestack is empty.
| PER_MODE (String -> Prettyprint_Expression)
# User-supplied function will select prettyprint expression based on current mode (i.e., top string on modestack).
# PER_MODE raises exception DIE if modestack is empty at rendering time.
# Random convenience functions:
#
| IN_PARENTHESES Prettyprint_Expression
# == PUNCTUATION "(" ++ prettyprint_expression ++ PUNCTUATION ")";
#
| LIST
# Format a list with given bracket and separator strings, e.g. ["foo","bar"] as "[foo,bar]"
{ leftbracket: Prettyprint_Expression,
separator: Prettyprint_Expression,
rightbracket: Prettyprint_Expression,
elements: List( Prettyprint_Expression )
}
# Print a construct like
#
# {
# ...
# }
#
| BRACKETED_BLOCK
{ leftbracket: String, # Opening bracket for block. Printed using 'punctuation'.
body: Prettyprint_Expression, # Body of block, indented between brackets.
rightbracket: String # Closing bracket for block.
};
# Except for the final functiona
#
# prettyprint_expression_to_string
#
# the rest of this file is private:
block_indentation = 4; # Number of blanks to indent each block.
# Token types. We distinguish ALPHABETIC from NONALPHABETIC
# identifiers because we need to make sure to put a blank
# between successive alphabetic identifiers to keep them
# from merging:
#
package type {
#
Token = STRING # Double-quoted string constants like "abc", also single-quoted char constants like 'x'.
| NUMBER
# Integer or float.
| PUNCTUATION
# Punctuation and operators like ->
| ALPHABETIC
# Vanilla identifiers like "foo".
| SPACE
| NEWLINE
;
};
fun prettyprint_expression_to_string prettyprint_expression
=
{ do prettyprint_expression;
#
string::cat (reverse *strings); # Return contents of buffer as a single string. Most recent string is first in 'strings' list, hence the 'reverse'.
}
where
strings = REF []; # Contents of buffer -- start it out empty.
blockstack = REF []; # Contents of indented-block stack -- start out unindented.
modestack = REF ["default"]; # Contents of user-controlled mode stack.
column = REF 0;
wrap_column = REF 80;
type_of_last_token = REF type::NEWLINE;
fun append_to_buf (string, token_type)
=
{ strings := string ! *strings;
column := *column + size string;
#
type_of_last_token := token_type;
};
stipulate
fun space_if predicate ()
=
if (predicate *type_of_last_token)
#
append_to_buf (" ", type::SPACE);
fi;
herein
sp = space_if \\ (type::SPACE
| type::NEWLINE ) => FALSE; _ => TRUE; end;
# Append a space to buf unless buf already ends in whitespace.
space = space_if \\ (type::SPACE
| type::NEWLINE | type::PUNCTUATION) => FALSE; _ => TRUE; end;
# Append a space to buf unless buf ends in whitespace/punctuation. Keeps two successive alphabetic ids from merging.
end;
stipulate
fun number n
=
{ space ();
append_to_buf (n, type::NUMBER);
};
herein
int = number o int::to_string;
one_word_int = number o one_word_int::to_string;
integer = number o multiword_int::to_string;
unt = number o (\\ w = "0ux" + unt::to_string w);
one_word_unt = number o (\\ w = "0ux" + one_word_unt::to_string w);
float = number o f8b::to_string;
end;
fun alphabetic string = { space (); append_to_buf (string, type::ALPHABETIC ); };
fun punctuation string = append_to_buf (string, type::PUNCTUATION);
bool = alphabetic o bool::to_string;
fun string s = append_to_buf ("\"" + string::to_string s + "\"", type::STRING);
fun char c = append_to_buf ("'" + char::to_string c + "'", type::STRING);
fun newline () # Start new line, set 'current column' to zero.
=
{ strings := "\n" ! *strings;
column := 0;
#
type_of_last_token := type::NEWLINE;
};
fun enter_indented_block () # Start block indented by four spaces relative to enclosing block.
=
case *blockstack
#
[] => blockstack := [block_indentation];
#
column ! _ => blockstack := (column + block_indentation) ! *blockstack;
esac;
fun enter_deeply_indented_block () # Start block indented to current column.
=
blockstack := *column ! *blockstack;
fun leave_indented_block () # Pop blockstack.
=
case blockstack
#
REF (_ ! rest) => blockstack := rest;
_ => raise exception DIE "leave_indented_block";
esac;
fun indent indent_offset # Space over to column for innermost indented block, plus 'indent_offset'.
=
{ next_tab_column
=
case *blockstack innermost_indent ! _ => innermost_indent; # Indent of innermost indented block, if any.
_ => 0; # Leftmost column if no indented blocks at moment.
esac;
column_to_tab_to = next_tab_column + indent_offset;
columns_to_move = column_to_tab_to - *column;
if (columns_to_move > 0)
#
append_to_buf (number_string::pad_left ' ' columns_to_move "", type::SPACE);
fi;
};
# If current column + right_margin > wrap column
# then append newline and indent to column for
# innermost indented block, plus indent_offset:
#
fun maybe_linewrap { right_margin, indent_offset }
=
if (*column + right_margin >= *wrap_column)
#
newline ();
indent indent_offset;
fi;
fun pop_modestack () # Pop the mode stack.
=
case *modestack
#
(mode ! modes) => modestack := modes;
_ => raise exception DIE "pop_mode";
esac;
fun per_mode f # Give current mode (== top string on modestack) to client fn 'f' to get mode-appropriate prettyprint expression.
=
case modestack
#
REF (current_mode ! _) => f current_mode;
_ => raise exception DIE "per_mode: Modestack is empty.";
esac;
# These are our core recursive functions to format a
# Prettyprint_Expression as indented text:
#
fun do prettyprint_expression
=
case prettyprint_expression
#
INT i => int i; # 31-bit signed integer.
INT1 i => one_word_int i; # 32-bit signed integer.
INTEGER i => integer i; # Indefinition-precision signed integer
UNT u => unt u; # 31-bit unsigned integer.
UNT1 u => one_word_unt u; # 32-bit unsigned integer.
FLOAT f => float f; # 64-bit floating-point number.
BOOL b => bool b; #
CHAR c => char c; #
STRING s => string s; #
NOP => (); # Placeholder; produces no text output whatever.
ALPHABETIC s => alphabetic s; # Append a blank if preceding token was an alphabetic, number or string token, then append given string.
PUNCTUATION s => punctuation s; # Append given string to buffer.
MAYBE_BLANK => sp (); # Insert a blank, except do nothing if previous token was a blank or newline. Was 'sp'
NEWLINE => newline (); # Start new line, set 'current column' to zero.
CONS (a, b) => { do a; do b; }; # Print two expressions in given order. Clients typically assign the infix op ++ as a synonym for this.
CAT exps => apply do exps; # Print list of expressions in given order.
ENTER_INDENTED_BLOCK => enter_indented_block (); # Start block indented four spaces relative to enclosing block.
ENTER_DEEPLY_INDENTED_BLOCK => enter_deeply_indented_block (); # Start block indented to current column. was enter_intented_block'
LEAVE_INDENTED_BLOCK => leave_indented_block (); # Exit block started by either of above two commands.
INDENT => indent 0; # Space over to column for innermost indented block.
INDENT_OFFSET i => indent i; # Space over to column for innermost indented block, plus 'i' (indent_offset).
SET_WRAP_COLUMN c => wrap_column := c; # Set column at which wrapping takes place. Defaults to 80.
INDENTED_BLOCK exp => indented_block exp; # == ENTER_INDENTED_BLOCK ++ prettyprint_expression ++ LEAVE_INDENTED_BLOCK;
INDENTED_LINE exp => indented_line exp; # == INDENT ++ prettyprint_expression ++ NEWLINE;
IN_PARENTHESES exp => in_parentheses exp; # == PUNCTUATION "(" ++ prettyprint_expression ++ IDENT ++ PUNCTUATION ")";
MAYBE_LINEWRAP args => maybe_linewrap args; # If near right margin, start new line and indent.
PUSH_MODE mode => modestack := mode ! *modestack; # Push arbitrary user string on user-controlled mode stack.
POP_MODE => pop_modestack (); # Pop top entry from modestack; throws DIE exception if modestack is empty.
PER_MODE f => do (per_mode f); # Give current mode (top string on modestack) to 'f' to get mode-appropriate prettyprint expression to render.
LIST args => list args; # Format a list with given bracket and separator strings, e.g. ["foo","bar"] as "[foo,bar]"
BRACKETED_BLOCK args => bracketed_block args;
esac # fun do
also fun indented_block prettyprint_expression = { enter_indented_block(); do prettyprint_expression; leave_indented_block (); }
also fun indented_line prettyprint_expression = { indent 0; do prettyprint_expression; newline (); }
also fun in_parentheses prettyprint_expression = { punctuation "("; do prettyprint_expression; indent 0; punctuation ")"; }
also
fun list # Format a list with given bracket and separator strings, e.g. ["foo","bar"] as "[foo,bar]"
{ leftbracket, separator, rightbracket, elements } # Say, (punctuation "[", puncturation ",", puncturation "]"), or (alphabetic "begin", alphabetic "then", alphabetic "end")
=
{ do leftbracket;
show elements;
do rightbracket;
}
where
fun show [] => ();
show [exp] => do exp;
show (exp ! exps) => { do exp; do separator; show exps; };
end;
end
# Print a construct like
#
# {
# ...
# }
#
also
fun bracketed_block { leftbracket, body, rightbracket }
=
{ enter_deeply_indented_block ();
punctuation leftbracket;
enter_deeply_indented_block ();
do body;
leave_indented_block ();
indent 0;
punctuation rightbracket;
leave_indented_block ();
};
end; # fun prettyprint_expression_to_string
fun longest_line_in_prettyprint_expression prettyprint_expression
=
{ text = prettyprint_expression_to_string prettyprint_expression;
#
lines = string::tokens {. #c == '\n'; } text;
#
fold_backward (\\ (line,len) = max (string::length_in_bytes line, len)) 0 lines;
};
};
end;
########################################################################################
# Notes
#
###############
# Note [1]:
#
# The original MLRISC package was done 'combinator style', which is to say,
# the prettyprint expression trees were represented as a tree of
# closures generated by partial application of curried functions.
#
# This approach has the minor advantage of speed (the Prettyprint_Expression
# is directly executed as native code rather than interpreted) and the major
# disadvantage of producing Prettyprint_Expressions which are totally opaque,
# making it impossible to write support tools which inspect, tweak or rewrite
# Prettyprint_Expressions.
#
# Consequently I've turned Prettyprint_Expressions into a conventional sumtype.
# I've also done quite a bit of renaming for clarity. -- Cynbe, 2011-05-03