## core-prettyprinter-g.pkg
#
# This file is intended to contain just the core prettyprint mill code.
# Convenience code for the benefit of code clients belongs in wapper pkg
#
#
src/lib/prettyprint/big/src/base-prettyprinter-g.pkg#
# For a general motivation and overview see Note[1] at bottom of:
#
#
src/lib/prettyprint/big/src/core-prettyprinter.api#
# Our only code client is the wrapper which makes us more palatable to client code:
#
#
src/lib/prettyprint/big/src/base-prettyprinter-g.pkg#
# The package usually of directly interest to application programmers is
#
#
src/lib/prettyprint/big/src/standard-prettyprinter.pkg#
#
#############################################################################################
# Ok, this whole prettyprinter thing is coming into focus.
#
# ##########################################################
# It is all about boxes and squibs which do different things
# depending on whether the box is monoline or multiline.
# ##########################################################
#
# The generic box record should maybe include
#
# .enter_box_stuff_when_monoline: Prettyprinter -> Void, # Or maybe these should be Ref(List(Prettyprinter -> Void)) to allow easy incremental updates?
# .enter_box_stuff_when_multiline: Prettyprinter -> Void,
#
# .exit_box_stuff_when_monoline: Prettyprinter -> Void,
# .exit_box_stuff_when_multiline: Prettyprinter -> Void
#
# to allow arbitrary enter-box
# and we should have a generic squib
#
# pp.custom: { monoline: Void -> { tab: Int, text: String }, # We don't provide 'column' because we want tabbing behavior abstract.
# multiline: Void -> { indent: Int, text: String } #
# }
#
# to do one set of commands when the box is monoline
# and another set of commands when the box is multiline.
#
# On the specific command front, I think we need a
#
# pp.pad n
#
# command which emits 'n' blanks when the box is MULTI-line
# and does nothing when it is monoline.
#
# We also want I think more prettyprinter defaults governing
# weird hacks like newline/indent collapsing, and a
#
# pp.box' -> List( Default_Overrides ) -> ...
#
# which allows setting all the box behaviors explicitly on a
# box-by-box basis.
#
# ##########################################################
# Tabs are a global coherency issue; they should be set once
# globally in the prettyprinter and squibs should work in
# terms of tabs.
# For my standard layout:
# tabstops should be every '2'
# .tab 1 should move one tabstop.
# .tab 2 should move to an even-number tabstop -- NOT JUST TWO TABS.
#
# .indent i should be just like .tab except relative to left margin not cursor.
# It will ultimately do a newline iff column > target location.
# .tab and .indent should both just set a virtual location,
# NOT actually output newlines and blanks.
# ##########################################################
#
#
#############################################################################################
#
#
# The core engine for the system prettyprinter.
#
# See ../README.
#
# Concise overview:
#
# -> The only purpose of the prettyprinter is to decide where
# to put newlines, whitespace and indentation.
#
# -> The prettyprinter views its input stream as consisting
# of known-width 'styled_strings' containing the useful text to be
# printed, and of 'breaks', which mark places where it is
# allowed to insert a newline.
#
# -> The desired structure is represented as a set of nested
# 'boxes', where a box essentially indicates the newline
# insertion policy to be followed for some stretch of text.
# Each box also has some default indentation for each line,
# which will typically increase with box nesting level.
#
# -> The simplest box type is 'h' (horizontal)', in which
# breaks are -never- converted to newlines.
#
# -> The next simplest box type is 'v' (vertical)', in which
# breaks are -always- converted to newlines.
#
# -> The 'line' box type is slightly more sophisticated: It
# behaves as an 'h' box if the result will fit on a line,
# else as a v box: In simple cases, this results in the
# styled_strings in the box all being placed in a line, either
# horizontal or vertical. In an line box, either all the
# breaks produce newlines, or else none do.
#
# -> The 'wrap' box type is the most commonly used, and implements
# the familiar word-wrap algorithm: A break produces a newline
# iff this is required to keep the line length within width limits.
#
# 2007-09-10 CrT: Completely rewritten. Twice. ;-)
# 2012-11-02 CrT: Completely rewritten yet again - twice!
#
# Design goals:
#
# I) For simplicity and consistency, layout of a box should depend
# only on its contents, not on anything outside of it, such as
# how far it is indented.
#
# II) For readability, the inked part of a line (i.e., the part left
# after stripping leading and trailing whitespace) should be about
# 10-100 chars long. For example, we don't want to degenerate
# to vertical columns of short words against the right margin.
#
# III) We are willing to have output extend arbitrarily far to the
# right (everyone has scrollbars these days) if necessary,
# but we want to minimize rightward drift due to indentation
# where reasonably possible.
#
# See also:
#
src/lib/prettyprint/big/src/standard-prettyprinter.pkg# Compiled by:
#
src/lib/prettyprint/big/prettyprinter.lib### "This task was appointed to you, Frodo of the Shire.
### If you do not find a way, no one will."
### --Galadriel
stipulate
package fil = file__premicrothread; # file__premicrothread is from
src/lib/std/src/posix/file--premicrothread.pkg package l2s = list_to_string; # list_to_string is from
src/lib/src/list-to-string.pkg package ns = number_string; # number_string is from
src/lib/std/src/number-string.pkgherein
# This generic is invoked (only) from
#
#
src/lib/prettyprint/big/src/base-prettyprinter-g.pkg #
generic package core_prettyprinter_g ( #
# ====================
# # "tt" == "traitful text"
package tt: Traitful_Text; # Traitful_Text is from
src/lib/prettyprint/big/src/traitful-text.api package out: Prettyprint_Output_Stream; # Prettyprint_Output_Stream is from
src/lib/prettyprint/big/src/out/prettyprint-output-stream.api # out will be something like html_prettyprint_output_stream from
src/lib/prettyprint/big/src/out/html-prettyprint-output-stream.pkg sharing tt::Texttraits == out::Texttraits;
)
# : (weak)
# api {
# include api Core_Prettyprinter; # Core_Prettyprinter is from
src/lib/prettyprint/big/src/core-prettyprinter.api#
# prettyprint_prettyprinter
# :
# (fil::Output_Stream, Prettyprinter)
# ->
# Void;
# # This api is commented out because at the moment I don't see a way
# open_box # to use it export open_box, since it depends on
# (Prettyprinter, :Pp, left_margin_is, format) # Left_Margin_Is and Break_Policy which come from typ. (Yes, we could move them out of typ.)
# }
{
debug_prints = FALSE; # When debugging I usually just replace debug_prints by *log::debugging throughout this file.
# Usually you'll want to do the same in
src/lib/prettyprint/big/src/core-prettyprinter-box-formatting-policies-g.pkg # Note that with debug_prints==FALSE, 'if debug_prints ... fi;' will optimize to no code produced courtesy of dead-code removal.
too_long = 8888; # A box-length value picked to be large
# enough to not fit in any plausible box,
# but small enough that adding a few
# together won't produce integer overflow.
package typ # Our core datastructures are parameterized over 'out' and 'tt',
= #
core_prettyprinter_types_g ( # core_prettyprinter_types_g is from
src/lib/prettyprint/big/src/core-prettyprinter-types-g.pkg #
package out = out; # package out is from
src/lib/prettyprint/big/src/standard-prettyprinter.pkg );
package dbg # Our datastructure prettyprinter depends on our datastructures.
=
core_prettyprinter_debug_g ( # core_prettyprinter_debug_g is from
src/lib/prettyprint/big/src/core-prettyprinter-debug-g.pkg #
package typ = typ;
);
package box #
=
core_prettyprinter_box_formatting_policies_g ( # core_prettyprinter_box_formatting_policies_g is from
src/lib/prettyprint/big/src/core-prettyprinter-box-formatting-policies-g.pkg #
package typ = typ;
package dbg = dbg;
too_long = too_long;
);
prettyprint_prettyprinter = dbg::prettyprint_prettyprinter;
default_tabstops_are_every = 2; # This can be overridden via TABSTOPS_ARE_EVERY.
default_target_box_width = 100; # This can be overridden via DEFAULT_TARGET_BOX_WIDTH.
max_box_nesting = 1000; # Purely to catch prettyprint infinite recursions.
Prettyprint_Output_Stream = out::Prettyprint_Output_Stream; # Handles device-specific aspects (e.g. selecting bold/bright/green/... text) of writing to ansi terminal, plain ascii stream, html stream or whatever.
Texttraits = tt::Texttraits; # Text attributes like color, underline, blink etc.
Traitful_Text = tt::Traitful_Text; # A Traitful_Text contains a String plus whatever Textstyle information is required to render it in HTML or on an ansi terminal or whatever.
# We build up a prettyprint expression as a tree
# of nested_boxes until we are flushed, at which
# point we actually format and print it.
#
# At any given time, the currently open box is 'box',
# the one enclosing it is first in the nested_boxes
# list, and the root of the box tree is last in the
# nested_boxes list. (Keeping the top of the stack
# in a separate variable lets us communicate to the
# type system that we always have at least one box
# on the stack, and thus avoid a lot of spurious
# checks for stack-empty.)
#
# We don't actually use the texttraits_stack
# for anything in this module -- it is
# purely an opaque-to-us customer
# convenience:
Prettyprinter = typ::Prettyprinter; # Export for code clients.
Pp = typ::Prettyprinter; # For when the brevity bug bites.
Npp = Null_Or( typ::Prettyprinter ); # We pass this around pervasively as a flag/conduit for verbose compiler debug output.
# *** UTILITY FUNCTIONS ***
fun put_blanks (pp:Pp, n)
=
out::put_string (pp.output_stream, ns::pad_left ' ' n "");
fun put_newline (pp:Pp)
=
out::put_string (pp.output_stream, "\n");
fun current_texttraits (pp:Pp as { texttraits_stack => REF (texttraits ! _), ... } ) => texttraits; # Current texttraits are the top entry on the textstyle stack,
current_texttraits (pp:Pp as { texttraits_stack => REF [], output_stream, ... } ) => out::default_texttraits output_stream; # or else the default texttraits if the stack is empty:
end;
nblanks = box::nblanks;
tablen = box::tablen;
breaklen = box::breaklen;
my wrap_box_contents_all_or_none
= box::wrap_box_contents_all_or_none;
my wrap_box_contents_as_needed
= box::wrap_box_contents_as_needed;
horizontal = { name => "vertical", code => wrap_box_contents_all_or_none box::NONE }: typ::Wrap_Policy;
vertical = { name => "horizontal", code => wrap_box_contents_all_or_none box::ALL }: typ::Wrap_Policy;
normal = { name => "normal", code => wrap_box_contents_all_or_none box::ALL_OR_NONE }: typ::Wrap_Policy;
ragged_right = { name => "ragged_right", code => wrap_box_contents_as_needed }: typ::Wrap_Policy;
fun die msg = { log::fatal msg; raise exception DIE msg; };
# This is the heart of the facility.
# We prettyprint in multiple passes,
# allowing each pass to stay simple:
#
fun prettyprint_box (pp:Pp, box: typ::Box)
=
{ decide_which_breaks_to_wrap (box, pp);
#
if debug_prints
printf "\nPrinting state of prettyprinter after deciding which breaks to wrap\n";
dbg::prettyprint_prettyprinter (fil::stdout, pp);
fi;
tokens = expand_out_boxes_breaks_tabs_and_indents box; # This pass eliminates BOX, BREAK and TAB tokens, expanding them into simpler tokens.
if debug_prints printf "prettyprint box: after expand_out_boxes_breaks_tabs_and_indents: '%s'\n" (dbg::phase2_tokens_to_string tokens); fi;
tokens = expand_out_endlit_tokens tokens; # This pass moves ENDLIT tokens to their final destination and turns them into plain LIT tokens.
if debug_prints printf "prettyprint box: after expand_out_endlit_tokens: '%s'\n" (dbg::phase3_tokens_to_string tokens); fi;
tokens = simplify_tokens tokens; # Combine two adjacent LIT tokens into one LIT token, ditto with adjacent BLANKS, other peephole optimizations.
if debug_prints printf "prettyprint box: after simplify_tokens: '%s'\n" (dbg::phase3_tokens_to_string tokens); fi;
lines = group_tokens_into_lines tokens; # This pass eliminates NEWLINE tokens, converting from list-of-tokens to list-of-lists-of-tokens representation.
if debug_prints printf "prettyprint box: after group_tokens_into_lines: '%s'\n" (dbg::phase4_lines_to_string lines); fi;
# This pass combines XXXXXX
lines = combine_nonoverlapping_lines lines; # YYYYYY
# into XXXXXX YYYYYY
if debug_prints printf "prettyprint box: after combine_nonoverlapping_lines: '%s'\n" (dbg::phase4_lines_to_string lines); fi;
tokens = flatten_lines_back_to_tokens lines; # This pass re-introduces NEWLINE tokens, converting back to list-of-tokens representation.
if debug_prints printf "prettyprint box: after flatten_lines_back_to_tokens: '%s'\n" (dbg::phase3_tokens_to_string tokens); fi;
write_tokens_to_output_stream tokens;
}
where
print_box_debug_info
=
case (posixlib::getenv "MYTHRYL_PRETTYPRINT_SHOW_RULES") NULL => FALSE;
_ => TRUE;
esac;
fun decide_which_breaks_to_wrap (box: typ::Box, pp:Pp)
=
{ # Start by recursively wrapping all sub-boxes
# of this box. When this is done, we know for
# each subbox its width and whether it is multiline.
#
apply per_token *box.contents
where
fun per_token (typ::BOX box) => decide_which_breaks_to_wrap (box,pp);
per_token _ => ();
end;
end;
# With the wrap decisions for our sub-boxes all
# made, we now have enough information in hand
# to make those decisions for our own box:
stipulate
target_width = box.target_width;
box_contents = *box.contents;
herein
(box.wrap_policy.code { target_width, box_contents })
->
{ actual_box_width, is_multiline };
end;
box.actual_width := actual_box_width;
box.is_multiline := (*box.is_multiline or is_multiline);
}; # fun decide_which_breaks_to_wrap
fun expand_out_boxes_breaks_tabs_and_indents (box: typ::Box) # On this pass we eliminate BOX, TAB, BREAK and INDENT tokens, expanding them into simpler tokens.
=
{ expand_out_boxes_breaks_tabs_and_indents'
{ box,
loc => { left_margin => 0,
actual_column => 0,
virtual_column => 0
}
};
#
reverse *tokens;
}
where
tokens = REF ([]: List( typ::b::Phase2_Token )); # We'll accumulate our output token list in this.
# # Define a few utility fns for making entries in above:
fun nl () = tokens := typ::b::NEWLINE ! *tokens;
fun lit s = tokens := typ::b::LIT s ! *tokens;
fun endlit s = tokens := typ::b::ENDLIT s ! *tokens;
fun push t = tokens := typ::b::PUSH_TT t ! *tokens;
fun pop () = tokens := typ::b::POP_TT ! *tokens;
fun ctl c = tokens := typ::b::CONTROL c ! *tokens;
fun blanks n = if (n > 0) tokens := typ::b::BLANKS n ! *tokens; fi;
# Track left margin and which column the cursor is in.
# We defer issuing blanks and newlines so as to make it
# possible to (sometimes) implement the 'exdent' operator
# (moving cursor left) by reducing the virtual column:
#
Loc = { left_margin: Int, # Currently defined left margin for current box.
actual_column: Int, # current column as defined by LITs seen, ignoring TABs and BLANKS.
virtual_column: Int # Current column as with TABs and BLANKs included.
};
fun loc_to_string (loc: Loc)
=
sprintf "{ left_margin => %d, actual_column => %d, virtual_column => %d }" loc.left_margin loc.actual_column loc.virtual_column;
fun set_left_margin ( loc as { actual_column, ... }: Loc, # Move from this location
left_margin: Int # to this new left margin.
)
=
{ left_margin, actual_column, virtual_column => left_margin };
fun actualize_loc (loc as { left_margin, actual_column, virtual_column }: Loc)
=
{ virtual_column = (virtual_column < left_margin) ?? left_margin # Make sure virtual_column is >= left_margin.
:: virtual_column;
if (virtual_column < actual_column)
if debug_prints printf "\nactualize_loc adding a newline\n"; fi;
nl();
blanks virtual_column;
else
blanks (virtual_column - actual_column);
fi;
if debug_prints printf "actual_loc mid \tloc = %s \n" (loc_to_string loc); fi;
loc = { left_margin, actual_column => virtual_column, virtual_column };
if debug_prints printf "actual_loc bot \tloc = %s \n" (loc_to_string loc); fi;
loc;
};
fun add_blanks_to_loc ({ left_margin, actual_column, virtual_column }: Loc, blanks: Int)
=
{ left_margin, actual_column, virtual_column => virtual_column + blanks };
fun add_chars_to_loc (loc: Loc, chars: Int)
=
{ loc = actualize_loc loc;
#
loc -> { left_margin, actual_column, virtual_column }; # virtual_column == actual_column after a call to actualize_loc.
{ left_margin,
actual_column => actual_column + chars,
virtual_column => virtual_column + chars
};
};
fun add_newline_to_loc ({ left_margin, ... }: Loc)
=
{ left_margin,
actual_column => 0,
virtual_column => left_margin
};
fun expand_out_boxes_breaks_tabs_and_indents' {
box: typ::Box,
loc: Loc
}
=
{
if debug_prints printf "\nexpand entering box %s.%d: \tloc = %s "
*box.rulename box.id (loc_to_string loc);
printf " *is_multiline=%B" *box.is_multiline;
printf " left_margin_is=%s\n" (dbg::left_margin_is_to_string box.left_margin_is);
fi;
loc -> { left_margin => left_margin', actual_column, virtual_column };
# if debug_prints
# case box.left_margin_is
# typ::BOX_RELATIVE r => { printf "expand_out_boxes_breaks_tabs_and_indents'/BOX_RELATIVE breaklen (left_margin'=>%d, r)) d=%d\n"
# left_margin' (breaklen (left_margin', r));
# };
# typ::CURSOR_RELATIVE r => { printf "expand_out_boxes_breaks_tabs_and_indents'/CURSOR_RELATIVE breaklen (virtual_column=>%d, r)) d=%d\n"
# virtual_column (breaklen (virtual_column, r));
# };
# esac;
# fi;
loc = if (not *box.is_multiline)
#
# if debug_prints printf "expand_out_boxes_breaks_tabs_and_indents'/mid: box is NOT multiline\n"; fi;
loc; # We're not a multiline box, leave left margin unchanged.
else
# if debug_prints printf "expand_out_boxes_breaks_tabs_and_indents'/mid: box IS multiline\n"; fi;
case box.left_margin_is
#
typ::BOX_RELATIVE r => set_left_margin (loc, left_margin' + breaklen (left_margin', r)); # We're a multiline box, set new left margin relative to left margin of enclosing box.
typ::CURSOR_RELATIVE r => set_left_margin (loc, virtual_column + breaklen (virtual_column, r)); # We're a multiline box, set new left margin relative to cursor, tabbing over for consistent positioning on page.
esac;
fi;
if debug_prints printf "expand after margin adjustment: \tloc = %s\n" (loc_to_string loc); fi;
if print_box_debug_info
#
lit (string::cat [ " [", *box.rulename, ".", (int::to_string box.id), "[ " ]);
blanks 1;
fi;
loc = do_tokens { loc, rest => *box.contents, box, box_left_margin => loc.left_margin }; # <============= Where everything happens.
if debug_prints printf "\nexpand exiting box %s.%d/top: \tloc = %s\n"
*box.rulename box.id (loc_to_string loc); fi;
if print_box_debug_info
#
blanks 1;
lit (string::cat [ " ]", *box.rulename, ".", (int::to_string box.id), "] " ]);
fi;
loc = if (not *box.is_multiline) loc; #
else
{ left_margin => left_margin', # Return to left_margin' of enclosing box.
actual_column => loc.actual_column,
virtual_column => left_margin'
};
fi;
if debug_prints printf "expand exiting box %s.%d/bot: \tloc = %s\n"
*box.rulename box.id (loc_to_string loc); fi;
loc;
}
also
fun do_tokens
{
rest => token ! rest, # Remaining tokens to process.
loc, # Current actual column (based on stuff written to output), virtual column (based on stuff read) and left margin.
box, # Current box, used to tell whether (for example) box is multiline.
box_left_margin # Original absolute left margin of current box, used to implement "pp.ind 0" which resets current left margin to this.
}
=>
{
if debug_prints printf "\ndo_tokens/top: \tloc = %s\n" (loc_to_string loc); fi;
fun update_loc (loc: Loc, token: typ::Phase1_Token)
=
case token
#
typ::BLANKS i => add_blanks_to_loc (loc, i);
typ::TAB t => add_blanks_to_loc (loc, breaklen (loc.virtual_column, t) );
typ::NEWLINE => add_newline_to_loc loc;
#
typ::LIT s => { if debug_prints printf "update_loc/LIT '%s': \tloc = %s\n" s (loc_to_string loc); fi; loc =
add_chars_to_loc (loc, string::length_in_bytes s);
if debug_prints printf "update_loc/LIT '%s': now \tloc = %s\n" s (loc_to_string loc); fi; loc;
};
typ::ENDLIT s => { if debug_prints printf "update_loc/ENDLIT '%s': \tloc = %s\n" s (loc_to_string loc); fi; loc =
add_chars_to_loc (loc, string::length_in_bytes s);
if debug_prints printf "update_loc/ENDLIT '%s': now\tloc = %s\n" s (loc_to_string loc); fi; loc;
};
typ::PUSH_TT _ => loc;
typ::POP_TT => loc;
typ::CONTROL _ => loc;
#
typ::INDENT _ => die "INDENT not supported by update_loc()"; # Our caller is expected to handle this one.
typ::BREAK _ => die "BREAK not supported by update_loc()"; # Our caller is expected to handle this one.
typ::BOX _ => die "BOX not supported by update_loc()"; # Our caller is expected to handle this one.
esac;
case token
#
typ::BLANKS _ => { loc = update_loc (loc, token); do_tokens { rest, loc, box, box_left_margin }; };
typ::TAB _ => { loc = update_loc (loc, token); do_tokens { rest, loc, box, box_left_margin }; };
typ::NEWLINE => { loc = update_loc (loc, token); nl (); do_tokens { rest, loc, box, box_left_margin }; };
#
typ::LIT s => { loc = update_loc (loc, token); lit s; do_tokens { rest, loc, box, box_left_margin }; };
typ::ENDLIT s => { loc = update_loc (loc, token); endlit s; do_tokens { rest, loc, box, box_left_margin }; };
#
typ::PUSH_TT t => { push t; do_tokens { rest, loc, box, box_left_margin }; };
typ::POP_TT => { pop (); do_tokens { rest, loc, box, box_left_margin }; };
typ::CONTROL f => { ctl f; do_tokens { rest, loc, box, box_left_margin }; };
typ::BOX b
=>
{ loc = expand_out_boxes_breaks_tabs_and_indents' { box => b, loc };
#
do_tokens { rest, loc, box, box_left_margin };
};
typ::INDENT indent
=>
{ loc -> { left_margin, actual_column, virtual_column };
#
# Primary function of pp.ind is to change current left margin:
#
left_margin = if (not *box.is_multiline)
#
left_margin; # We're a no-op on monoline boxes.
else
case indent
#
0 => box_left_margin; # We abuse pp.ind 0; to mean "reset to default indentation for box".
_ => left_margin + indent; # Normal case. Note that 'indent' may be negative.
esac;
fi;
# As a secondary effect of pp.ind, we'd like to guarantee that # This is either a weird side-effect that I'll regret in due course,
# the virtual column is not to the left of of the left margin: # or else a natural way of maintaining the invariant that all printing
# # is done to the right of the left margin. I'm hoping the latter. :-)
virtual_column = if (not *box.is_multiline) virtual_column;
elif (virtual_column > left_margin) virtual_column;
else left_margin;
fi;
loc = { left_margin, actual_column, virtual_column };
do_tokens { rest, loc, box, box_left_margin };
};
typ::BREAK b
=>
{
if debug_prints printf "do_tokens/BREAK/top: \tloc = %s break=%s\n" (loc_to_string loc) (dbg::break_to_string b); fi;
if *b.wrap
#
blen = breaklen (loc.left_margin, b.ifwrap);
if debug_prints printf "do_tokens/BREAK/top: blen = %d box=%s.%d(%s)\n"
blen *box.rulename box.id (*box.is_multiline ?? "MULTILINE" :: "monoline"); fi;
loc -> { left_margin, actual_column, ... };
loc = { left_margin, actual_column, virtual_column => left_margin + blen };
if debug_prints printf "do_tokens/BREAK/ZZ1: \tloc = %s\n" (loc_to_string loc); fi;
do_tokens { rest, loc, box, box_left_margin };
else
loc = add_blanks_to_loc (loc, breaklen (loc.virtual_column, b.ifnotwrap) );
if debug_prints printf "do_tokens/BREAK/ZZ2: \tloc = %s\n" (loc_to_string loc); fi;
do_tokens { rest, loc, box, box_left_margin };
fi;
};
esac;
};
do_tokens { rest => [], loc, ... } => loc;
end; # fun do_tokens
end;
fun expand_out_endlit_tokens (tokens: List( typ::b::Phase2_Token )) # Move each typ::b::ENDLIT to immediately after the preceding type::b::LIT and convert it to a typ::c::LIT.
=
migrate_endlits (reverse tokens, [], [])
where
fun migrate_endlits (token ! rest, movedlits, result)
=>
case token
#
typ::b::BLANKS i => migrate_endlits (rest, movedlits, typ::c::BLANKS i ! result );
typ::b::ENDLIT s => migrate_endlits (rest, typ::c::LIT s ! movedlits, result );
typ::b::NEWLINE => migrate_endlits (rest, movedlits, typ::c::NEWLINE ! result );
typ::b::PUSH_TT t => migrate_endlits (rest, movedlits, typ::c::PUSH_TT t ! result );
typ::b::POP_TT => migrate_endlits (rest, movedlits, typ::c::POP_TT ! result );
typ::b::CONTROL control_fn => migrate_endlits (rest, movedlits, typ::c::CONTROL control_fn ! result );
typ::b::LIT s => migrate_endlits (rest, [], typ::c::LIT s ! (movedlits @ result) );
esac;
migrate_endlits ([], movedlits, result)
=>
movedlits @ result;
end;
end;
fun simplify_tokens (tokens: List( typ::c::Phase3_Token )) # A little peephole optimization pass.
=
combine (tokens, [], FALSE)
where
fun combine (typ::c::BLANKS i ! typ::c::BLANKS j ! rest, result, progress) => combine (typ::c::BLANKS (i + j) ! rest, result, TRUE); # Combine adjacent BLANKS tokens.
combine (typ::c::LIT s ! typ::c::LIT t ! rest, result, progress) => combine (typ::c::LIT (s + t) ! rest, result, TRUE); # Combine adjacent LIT tokens.
combine (typ::c::NEWLINE ! typ::c::NEWLINE ! rest, result, progress) => combine (typ::c::NEWLINE ! rest, result, TRUE); # Combine adjacent NEWLINE tokens (== drop extra newlines). This might be a mistake, but extra newlines are a common problem.
combine (typ::c::BLANKS i ! typ::c::NEWLINE ! rest, result, progress) => combine (typ::c::NEWLINE ! rest, result, TRUE); # Drop trailing blanks.
combine (typ::c::PUSH_TT _ ! typ::c::POP_TT ! rest, result, progress) => combine ( rest, result, TRUE); # Drop useless push/pop token pairs.
combine ( typ::c::BLANKS 0 ! rest, result, progress) => combine ( rest, result, TRUE); # Drop useless BLANKS tokens.
combine ( typ::c::LIT "" ! rest, result, progress) => combine ( rest, result, TRUE); # Drop useless LIT tokens.
combine ( token ! rest, result, progress) => combine ( rest, token ! result, progress); # Nothing to see here, move along.
#
combine ([], result, progress)
=>
if progress combine (reverse result, [], FALSE); # Made some progress, which may have created opportunities for additional progress, so do another pass.
else reverse result; # Done! Restore tokens to original order and return.
fi;
end;
end;
fun group_tokens_into_lines (tokens: List( typ::c::Phase3_Token )) # Result has list::length(lines) == number of NEWLINE tokens, plus one. (Terminal NEWLINE produces empty last line.)
= # Result contains no NEWLINE tokens.
group (tokens, [], [])
where
fun group (typ::c::BLANKS i ! rest, line, lines) => group (rest, typ::d::BLANKS i ! line, lines);
group (typ::c::LIT s ! rest, line, lines) => group (rest, typ::d::LIT s ! line, lines);
group (typ::c::NEWLINE ! rest, line, lines) => group (rest, [], (reverse line) ! lines);
#
group (typ::c::PUSH_TT t ! rest, line, lines) => group (rest, typ::d::PUSH_TT t ! line, lines);
group (typ::c::POP_TT ! rest, line, lines) => group (rest, typ::d::POP_TT ! line, lines);
group (typ::c::CONTROL f ! rest, line, lines) => group (rest, typ::d::CONTROL f ! line, lines);
#
group ([], line, lines) => reverse ( [ reverse line ] @ lines);
end;
end;
fun flatten_lines_back_to_tokens (lines: List( List( typ::d::Phase4_Token ) ) ) # Result will have a NEWLINE between every pair of input lines -- one less NEWLINE than there were lines. (Input lines can be empty.)
=
flatten (lines, [])
where
fun convert_line (typ::d::BLANKS i ! rest, result) => convert_line (rest, typ::c::BLANKS i ! result);
convert_line (typ::d::LIT s ! rest, result) => convert_line (rest, typ::c::LIT s ! result);
#
convert_line (typ::d::PUSH_TT t ! rest, result) => convert_line (rest, typ::c::PUSH_TT t ! result);
convert_line (typ::d::POP_TT ! rest, result) => convert_line (rest, typ::c::POP_TT ! result);
convert_line (typ::d::CONTROL f ! rest, result) => convert_line (rest, typ::c::CONTROL f ! result);
#
convert_line ([], result) => result;
end;
fun flatten (line1 ! line2 ! rest, result) => flatten (line2 ! rest, typ::c::NEWLINE ! convert_line (line1, result)); # By adding NEWLINEs here we allow the
flatten ( line ! rest, result) => flatten ( rest, convert_line (line, result)); # final line to lack a newline.
flatten ( [], result) => reverse result;
end;
end;
fun combine_nonoverlapping_lines (lines: List( List( typ::d::Phase4_Token ) ) ) # The idea here is to combine lines like
= # xxxxx
combine (lines, []) # yyyyyyy
where # into xxxxx yyyyyyy
# So we're looking for line_length(line1) < leading_blanks(line2)
# Note that simplify_tokens() will have combined consecutive
# BLANKS and dropped trailing BLANKS, so things are simple:
fun line_length (tokens: List( typ::d::Phase4_Token )) # A little utility fn to sum the lengths of the tokens in a list.
=
len (tokens, 0)
where
fun len (typ::d::BLANKS i ! rest, len_so_far) => len (rest, len_so_far + i );
len (typ::d::LIT s ! rest, len_so_far) => len (rest, len_so_far + string::length_in_bytes(s) );
len (other ! rest, len_so_far) => len (rest, len_so_far );
#
len ([], len_so_far) => len_so_far;
end;
end;
fun combine (line1 ! line2 ! rest, result)
=>
case line2
#
(typ::d::BLANKS leading_blanks_on_line2 ! rest_of_line2)
=>
{ length_of_line1 = line_length line1;
#
if (line_length(line1) < leading_blanks_on_line2)
# # Paydirt!
shortened_line2 = typ::d::BLANKS (leading_blanks_on_line2 - length_of_line1) ! rest_of_line2; # YYYYYYY stripped of some leading blanks.
combined_line = line1 @ shortened_line2; # XXXXXXX YYYYYYY.
combine (combined_line ! rest, result); # Push combined line back on input to allow it to possibly be further combined.
else
combine (line2 ! rest, line1 ! result);
fi;
};
_ => combine (line2 ! rest, line1 ! result);
esac;
combine (line ! rest, result)
=> combine ( rest, line ! result);
combine ([], result) => reverse result;
end;
end;
fun write_tokens_to_output_stream (tokens: List( typ::c::Phase3_Token ))
=
apply write_token tokens
where
fun write_token token
=
case token
#
typ::c::BLANKS i => out::put_string (pp.output_stream, nblanks i);
typ::c::LIT s => out::put_string (pp.output_stream, s);
typ::c::NEWLINE => out::put_string (pp.output_stream, "\n");
#
typ::c::PUSH_TT t => out::push_texttraits (pp.output_stream, t);
typ::c::POP_TT => out::pop_texttraits pp.output_stream;
typ::c::CONTROL f => f pp.output_stream;
esac;
end;
end; # fun prettyprint_box
fun add_token (pp:Pp as { box => REF box, ... }, token) # Add a token to the reversed_contents of currently-open box.
=
box.reversed_contents := token ! *box.reversed_contents;
fun add_lit (pp:Pp, string)
=
add_token (pp, typ::LIT string);
fun add_endlit (pp:Pp, string)
=
add_token (pp, typ::ENDLIT string);
fun open_box (pp:Pp, left_margin_is, wrap_policy, target_width) # This is a main entrypoint for
src/lib/prettyprint/big/src/standard-prettyprinter-g.pkg =
{ id = *pp.next_box_id;
#
pp.next_box_id := id + 1;
new_box # Set up empty record for new box.
=
{ wrap_policy,
left_margin_is,
target_width,
id,
rulename => REF "", #
is_multiline => REF FALSE,
actual_width => REF 0,
contents => REF [],
reversed_contents => REF []
};
# Add new child box to reversed_contents
# of previously open box:
{ (*pp.box) -> { reversed_contents, ... };
#
reversed_contents := typ::BOX new_box ! *reversed_contents;
};
pp.nested_boxes := *pp.box ! *pp.nested_boxes; # Push currently open box on stack.
pp.box_nesting := *pp.box_nesting + 1; # Remember new stack depth.
pp.box := new_box; # Establish new (empty) currently-open box.
if (*pp.box_nesting > max_box_nesting) # Catch prettyprint infinite loops.
raise exception DIE "max box nesting depth exceeded -- core-prettyprinter-g.pkg";
fi;
};
fun finalize_and_pop_current_box (pp:Pp as { box => REF box, nested_boxes as REF (topbox ! rest), ... })
=>
{ box.contents := reverse *box.reversed_contents; # We've accumulated the box contents in reverse order; this produces the contents in their proper order.
#
pp.box := topbox; # These three pop the box stack.
nested_boxes := rest;
pp.box_nesting := *pp.box_nesting - 1;
};
finalize_and_pop_current_box (pp:Pp as { nested_boxes as REF [], ... } )
=>
{ /*raise exception DIE*/ print "User error: Attempted to close nonexistent box!";
();
};
end;
fun prettyprint_break (pp:Pp as { box => REF { reversed_contents, ... }, ... }, { blanks, indent_on_wrap } )
=
reversed_contents := (typ::BREAK { wrap => REF FALSE,
ifnotwrap => { blanks, tab_to => 9, tabstops_are_every => 0 },
ifwrap => { blanks => indent_on_wrap, tab_to => 9, tabstops_are_every => 4 }
}
)
!
*reversed_contents;
fun indent (pp:Pp as { box => REF { reversed_contents, ... }, ... }, i)
=
reversed_contents := (typ::INDENT i) ! *reversed_contents;
fun break' (pp:Pp as { box => REF { reversed_contents, ... }, ... }, { ifnotwrap, ifwrap } )
=
reversed_contents := (typ::BREAK { wrap => REF FALSE, ifnotwrap, ifwrap }) ! *reversed_contents;
fun prettyprint_flush (pp:Pp as { box, nested_boxes, output_stream, next_box_id, ... })
=
{ end_boxes ()
where
fun end_boxes ()
=
case *nested_boxes
#
[] => # NB: To avoid special cases, we always leave one box on the stack.
{
(*box) -> { contents, reversed_contents, actual_width, is_multiline, ... };
contents := reverse *reversed_contents; # Box contents accumulate in reverse order.
# Normally we correct for this by reversing
# the contents list when we close a box, but
# the root box never gets closed, so we have
# to reverse the contents here, right before
# prettyprinting them.
if debug_prints
printf "\nStart of call to prettyprint_box on outermost box -- prettyprint_flush in prettyprinter-g.pkg\n";
printf "Printing state of prettyprinter -- prettyprint_flush in prettyprinter-g.pkg\n";
dbg::prettyprint_prettyprinter (fil::stdout, pp);
printf "Calling prettyprint_box on outermost box -- prettyprint_flush in prettyprinter-g.pkg\n";
fi;
prettyprint_box (pp, *box); # Prettyprint outermost box.
# This is where all the actual formatting work gets done.
if debug_prints
printf "Back from calling prettyprint_box on outermost box -- prettyprint_flush in prettyprinter-g.pkg\n";
fi;
contents := []; # Clear out the prettyprint stuff so
reversed_contents := []; # we don't wind up printing it again.
actual_width := 0; #
is_multiline := FALSE;
next_box_id := 1;
};
topbox ! rest
=>
{ finalize_and_pop_current_box pp;
end_boxes ();
};
esac;
end;
out::flush output_stream;
};
# *** EXPORTED FUNCTIONS ***
stipulate
default__tabstops_are_every = 4; # This can be overridden via TABSTOPS_ARE_EVERY.
default__target_box_width = 100; # This can be overridden via DEFAULT_TARGET_BOX_WIDTH.
default__wrap_policy = normal; # This can be overridden via DEFAULT_WRAP_POLICY
default__left_margin_is # This can be overridden via DEFAULT_LEFT_MARGIN_IS
=
typ::BOX_RELATIVE {
blanks => 0,
tab_to => 0,
tabstops_are_every => default__tabstops_are_every
};
herein
fun process_mill_options options
=
{ # Set up default values of all optional parameters:
#
my_default_target_box_width = REF default__target_box_width;
my_default_wrap_policy = REF default__wrap_policy;
my_default_left_margin_is = REF default__left_margin_is;
my_tabstops_are_every = REF default__tabstops_are_every;
# Let any supplied optional arguments override the above defaults:
#
apply note_optional_arg options
where
fun note_optional_arg (typ::DEFAULT_WRAP_POLICY p) => my_default_wrap_policy := p;
note_optional_arg (typ::DEFAULT_LEFT_MARGIN_IS m) => my_default_left_margin_is := m;
note_optional_arg (typ::DEFAULT_TARGET_BOX_WIDTH i) => my_default_target_box_width := i;
note_optional_arg (typ::TABSTOPS_ARE_EVERY i) => my_tabstops_are_every := i;
end;
end;
{ default_target_box_width => *my_default_target_box_width,
default_wrap_policy => *my_default_wrap_policy,
default_left_margin_is => *my_default_left_margin_is,
tabstops_are_every => *my_tabstops_are_every
};
};
end;
fun make_prettyprinter prettyprint_output_stream options
=
{
(process_mill_options options)
->
{ default_target_box_width,
default_wrap_policy,
default_left_margin_is,
tabstops_are_every
};
# Construct actual prettyprinter record to return:
#
{ output_stream => prettyprint_output_stream,
output_stream_is_closed => REF FALSE,
#
texttraits_stack => REF [],
box_nesting => REF 0,
next_box_id => REF 1,
nested_boxes => REF [],
box => REF { left_margin_is => default_left_margin_is,
target_width => default_target_box_width,
id => 0,
rulename => REF "/",
actual_width => REF 0,
is_multiline => REF FALSE,
contents => REF [],
reversed_contents => REF [],
wrap_policy => default_wrap_policy
}
};
};
fun flush_prettyprinter prettyprinter
=
prettyprint_flush prettyprinter;
fun close_prettyprinter (pp:Pp)
=
{ flush_prettyprinter pp;
#
pp.output_stream_is_closed := TRUE;
};
fun get_prettyprint_output_stream (pp:Pp)
=
pp.output_stream;
fun traitful_text (pp:Pp) traitful_text
=
{ traitful_text_texttraits = tt::texttraits traitful_text;
#
if (out::same_texttraits (current_texttraits pp, traitful_text_texttraits))
#
add_lit (pp, tt::string traitful_text);
else
add_token (pp, typ::PUSH_TT traitful_text_texttraits);
add_lit (pp, tt::string traitful_text);
add_token (pp, typ::POP_TT);
fi;
};
fun push_texttraits (pp:Pp, texttraits)
=
{ if (not (out::same_texttraits (current_texttraits pp, texttraits)))
#
add_token (pp, typ::PUSH_TT texttraits);
fi;
pp.texttraits_stack := texttraits ! *pp.texttraits_stack;
};
fun pop_texttraits (pp:Pp)
=
case *pp.texttraits_stack
#
[] => { /*raise exception DIE*/ print "User error: pp: unmatched pop_texttraits\n";
};
(sty ! rest)
=>
{ pp.texttraits_stack := rest;
#
if (not (out::same_texttraits (current_texttraits pp, sty)))
#
add_token (pp, typ::POP_TT);
fi;
};
esac;
fun set_rulename_for_current_box ({ box => REF box, ... }:Pp, name: String)
=
box.rulename := name;
};
end;