Working insertion of hash values; added a few labels in spec

Hash values are now computed for each "paragraph" starting with \LMHash
(which includes subsequent grammar, dartCode, itemize blocks, but stops
at \section-like commands).  Now addlatexhash.dart expects three arguments
(first the source latex file, then the destination simplified and
hash-value-annotated latex source file, and finally a file name used to
create the list of hash values emitted).  Adjusted testing accordingly.
Added a test for robustness of the hash value generation: It is checked
that lots of different "unimportant" changes make no difference for the
generated hash values (e.g., we can add/remove comments, change white
space, add \commentary{..} etc. without changing the hash values).

In order to ensure that all "structure" commands in the spec have a label,
I added an \LMLabel{..} a handful of places, following the style which is
used throughout the spec.

In dart.sty, the \renewcommand that made \LMHash{} produce a fixed
hash value has been removed such that the actual hash values are now
inserted into the generated spec PDF/DVI file.  Tests have been adjusted
to handle this difference between the spec with and without hash values
when comparing the two.

R=gbracha@google.com, lrn@google.com, ricow@google.com

Review URL: https://codereview.chromium.org//652993005

git-svn-id: https://dart.googlecode.com/svn/branches/bleeding_edge/dart@41658 260f80e4-7a28-3924-810f-c04153c831b5
This commit is contained in:
eernst@google.com 2014-11-11 10:08:12 +00:00
parent 8edab4fba5
commit 9570a582eb
6 changed files with 1388 additions and 86 deletions

42
docs/language/Makefile Normal file
View file

@ -0,0 +1,42 @@
NAME=dartLangSpec
SPEC=$(NAME).tex
HASH=$(NAME)-hash.tex
LIST=$(NAME)-list.txt
HASHER=../../tools/addlatexhash.dart
pdf:
pdflatex $(SPEC)
pdflatex $(SPEC)
pdflatex $(SPEC)
pdfhash: hash_and_list
pdflatex $(HASH)
pdflatex $(HASH)
pdflatex $(HASH)
dvi:
latex $(SPEC)
latex $(SPEC)
latex $(SPEC)
dvihash: hash_and_list
latex $(HASH)
latex $(HASH)
latex $(HASH)
hash_and_list:
dart $(HASHER) $(SPEC) $(HASH) $(LIST)
help:
@echo "Goals:"
@echo " pdf, dvi: generate the pdf/dvi file containing the spec"
@echo " pdfhash, dvihash: ditto, with location markers filled in"
@echo " cleanish: remove [pdf]latex generated intermediate files"
@echo " clean: remove all generated files"
cleanish:
rm -f *.aux *.log *.toc *.out
clean: cleanish
rm -f *.dvi *.pdf $(HASH) $(LIST)

View file

@ -118,7 +118,7 @@
% ----------------------------------------------------------------------
% Support for hash valued Location Markers
% very small font, to enable 33 char hash values in the margin
% very small font, to enable 40 char hash values in the margin
\makeatletter
\ifcase \@ptsize \relax% 10pt
\newcommand{\miniscule}{\@setfontsize\miniscule{2}{3}}% \tiny: 5/6
@ -141,11 +141,7 @@
% define a label, and show the associated logical location marker
\newcommand{\LMLabel}[1]{%
\vspace{-\baselineskip}\hspace{0pt}\OriginalLMHash{\raisebox{10ex}{sec:#1}}%
\vspace{-\baselineskip}\hspace{0pt}\OriginalLMHash{\raisebox{10ex}{#1}}%
\label{#1}}
% dummy version of LMHash, always shows the same arbitrary hash value
\renewcommand{\LMHash}[1]{\OriginalLMHash{ba01b04d58c8c4e259764498f823cc65}}
% ----------------------------------------------------------------------

View file

@ -249,6 +249,7 @@ Thus, if the name of a library begins with an underscore, it has no effect on th
Privacy is indicated by the name of a declaration - hence privacy and naming are not orthogonal. This has the advantage that both humans and machines can recognize access to private declarations at the point of use without knowledge of the context from which the declaration is derived.}
\subsection{Concurrency}
\LMLabel{concurrency}
Dart code is always single threaded. There is no shared-state concurrency in Dart. Concurrency is supported via actor-like entities called {\em isolates}.
@ -636,6 +637,7 @@ A {\em required formal parameter} may be specified in one of three ways:
\end{grammar}
%\subsubsection{Rest Formals}
%\LMLabel{restFormals}
%A rest formal $R$ must be the last parameter in a formal parameter list. If a type $T$ is specified for $R$, it signifies that the type of $R$ is $T[]$.
@ -1127,6 +1129,7 @@ A generative constructor may be {\em redirecting}, in which case its only action
%\Q{We now have generative constructors with no bodies as well.}
\paragraph{Initializer Lists}
\LMLabel{initializerLists}
An initializer list begins with a colon, and consists of a comma-separated list of individual {\em initializers}. There are two kinds of initializers.
\begin{itemize}
@ -3275,6 +3278,7 @@ the static type of $i$ is the declared return type of $F$.
%\end{itemize}
\subsection{ Lookup}
\LMLabel{lookup}
\subsubsection{Method Lookup}
\label{methodLookup}
@ -6374,6 +6378,7 @@ The scope of a documentation comment immediately preceding the declaration of a
%\subsection{Grammar}
%\LMLabel{grammar}
\subsection{Operator Precedence}
\label{operatorPrecedence}
@ -6419,10 +6424,12 @@ Assignment & =, *=, /=, +=, -= ,\&=, \^{}= etc. & Right & 1\\
\end{tabular}
}
%\subsection{Glossary}
%\LMLabel{glossary}
%\bibliographystyle{alpha}
%\bibliography{/users/gilad/research/bibs/master}
\section*{Appendix: Naming Conventions}
\LMLabel{namingConventions}
\commentary{
The following naming conventions are customary in Dart programs.

78
tests/standalone/io/addlatexhash_test.dart Normal file → Executable file
View file

@ -1,3 +1,4 @@
#!/usr/bin/env dart
// Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
@ -54,11 +55,68 @@ testSisp() {
oneTestSisp(sispIsDartEnd, "End", "whatever else ..", false);
}
// Check that the hash values of paragraphs in the specially prepared
// LaTeX source 'addlatexhash_test_src.tex' are identical in groups
// of eight (so we get 8 identical hash values, then another hash
// value 8 times, etc.)
testSameHash() {
// set up temporary directory to hold output
final tmpDir = Directory.systemTemp.createTempSync("addlatexhash_test");
final tmpDirPath = tmpDir.path;
// file names/paths for file containing groups of 8 variants of a paragraph
const par8timesName = "addlatexhash_test_src";
const par8timesFileName = "$par8timesName.tex";
final par8timesDirPath = path.join(dartRootDir, "tests", "standalone", "io");
final par8timesPath = path.join(par8timesDirPath, par8timesFileName);
final tmpPar8timesPath = path.join(tmpDirPath, par8timesFileName);
// file names paths for output
final hashName = par8timesName + "-hash";
final hashFileName = "$hashName.tex";
final hashPath = path.join(tmpDirPath, hashFileName);
final listName = par8timesName + "-list";
final listFileName = "$listName.txt";
final listPath = path.join(tmpDirPath, listFileName);
// actions to take
runAddHash() =>
Process.runSync("dart",
[path.join(dartRootPath, "tools", "addlatexhash.dart"),
tmpPar8timesPath,
hashPath,
listPath]);
// perform test
new File(par8timesPath).copySync(tmpPar8timesPath);
checkAction(runAddHash(), "addlatexhash.dart failed");
var listFile = new File(listPath);
var listLines = listFile.readAsLinesSync();
var latestLine = null;
var sameCount = 0;
for (var line in listLines) {
if (!line.startsWith(" ")) continue; // section marker
if (line.startsWith(" %")) continue; // transformed text "comment"
if (line != latestLine) {
// new hash, check for number of equal hashes, then reset
if (sameCount % 8 == 0) {
// saw zero or more blocks of 8 identical hash values: OK
latestLine = line;
sameCount = 1;
} else {
throw "normalization failed to produce same result";
}
} else {
sameCount++;
}
}
}
// Check that the LaTeX source transformation done by addlatexhash.dart
// does not affect the generated output, as seen via dvi2tty and diff.
// NB: Not part of normal testing (only local): latex and dvi2tty are
// not installed in the standard test environment.
testNoChange() {
testSameDVI() {
// set up /tmp directory to hold output
final tmpDir = Directory.systemTemp.createTempSync("addlatexhash_test");
final tmpDirPath = tmpDir.path;
@ -83,6 +141,10 @@ testNoChange() {
final hashPath = path.join(tmpDirPath, hashFileName);
final hashDviPath = path.join(tmpDirPath, "$hashName.dvi");
final listName = "$specName-list";
final listFileName = "$listName.txt";
final listPath = path.join(tmpDirPath, listFileName);
// actions to take
runLatex(fileName,workingDirectory) =>
Process.runSync("latex", [fileName], workingDirectory: workingDirectory);
@ -91,7 +153,8 @@ testNoChange() {
Process.runSync("dart",
[path.join(dartRootPath, "tools", "addlatexhash.dart"),
tmpSpecPath,
hashPath]);
hashPath,
listPath]);
runDvi2tty(dviFile) =>
Process.runSync("dvi2tty", [dviFile], workingDirectory: tmpDir.path);
@ -100,12 +163,16 @@ testNoChange() {
checkAction(runDvi2tty(file), "dvitty on $subject failed");
// perform test
new File(styPath).copySync(tmpStyPath);
var renewLMHashCmd = r"\renewcommand{\LMHash}[1]{\OriginalLMHash{xxxx}}";
new File(styPath)
.copySync(tmpStyPath)
.writeAsStringSync(renewLMHashCmd, mode: FileMode.APPEND);
new File(specPath).copySync(tmpSpecPath);
checkAction(runAddHash(),"addlatexhash.dart failed");
for (var i = 0; i < 5; i++) {
checkAction(runLatex(specName, tmpDirPath), "LaTeX on spec failed");
}
checkAction(runAddHash(),"addlatexhash.dart failed");
for (var i = 0; i < 5; i++) {
checkAction(runLatex(hashFileName, tmpDirPath), "LaTeX on output failed");
}
@ -117,6 +184,7 @@ testNoChange() {
main([args]) {
testCutMatch();
testSisp();
testSameHash();
// latex and dvi2tty are not installed in the standard test environment
if (args.length > 0 && args[0] == "local") testNoChange();
if (args.length > 0 && args[0] == "local") testSameDVI();
}

View file

@ -0,0 +1,825 @@
\documentclass{article}
\usepackage{epsfig}
\usepackage{color}
\usepackage{dart}
\usepackage{bnf}
\usepackage{hyperref}
\usepackage{lmodern}
\newcommand{\code}[1]{{\sf #1}}
\title{Test File used by addlatexhash}
\begin{document}
\maketitle
\tableofcontents
\newpage
\pagestyle{myheadings}
\markright{Test file used by addlatexhash}
% begin Ecma boilerplate
\section{Scope}
\LMLabel{ecmaScope}
% Selected certain paragraphs from the spec, created exactly eight
% copies of each of them, modifying the copies in ways that should
% not affect the hash values; to verify correct behavior, grep for
% '^ *\\LMHash' in the output LaTeX source or take every 2nd line
% from the list file, and check that the sequence of hashes consists
% of subsequences of identical values, all with length eight.
% Test a "normal" paragraph -------------------------------------------
%0 original
\LMHash{}
This Ecma standard specifies the syntax and semantics of the Dart programming language. It does not specify the APIs of the Dart libraries except where those library elements are essential to the correct functioning of the language itself (e.g., the existence of class \cd{Object} with methods such as \cd{noSuchMethod}, \cd{runtimeType}).
%1 enlarge white space; NB: cannot add new white space, just enlarge
\LMHash{}
This Ecma standard specifies the syntax and semantics of the Dart programming language. It does not specify the APIs of the Dart libraries except where those library elements are essential to the correct functioning of the language itself (e.g., the existence of class \cd{Object} with methods such as \cd{noSuchMethod}, \cd{runtimeType}).
%2 insert extra newlines
\LMHash{}
This Ecma standard specifies the syntax and semantics of the Dart
programming language. It does not specify the APIs of the Dart
libraries except where those library elements are essential to the
correct functioning of the language itself (e.g., the existence of
class \cd{Object} with methods such as \cd{noSuchMethod},
\cd{runtimeType}).
%3 insert comments
\LMHash{}
This Ecma standard specifies the syntax and semantics of the Dart % blah
programming language. It does not specify the APIs of the Dart%blah
libraries except where those library elements are essential to the% blah
correct functioning of the language itself (e.g., the existence of
class \cd{Object} with methods such as \cd{noSuchMethod},
\cd{runtimeType}). % blah blah
%4 insert commentary
\LMHash{}
This Ecma standard specifies the syntax and semantics of the Dart programming language. It does not specify the APIs of the Dart libraries except where those library elements are \commentary{whatever} essential to the correct functioning of the language itself (e.g., the existence of class \cd{Object} with methods such as \cd{noSuchMethod}, \cd{runtimeType}).
%5 insert rationale
\LMHash{}
This Ecma standard specifies the syntax and semantics of the Dart programming language. It does not specify the APIs of the \rationale{whatever} Dart libraries except where those library elements are essential to the correct functioning of the language itself (e.g., the existence of class \cd{Object} with methods such as \cd{noSuchMethod}, \cd{runtimeType}).
%6 insert nested rationale/commentary
\LMHash{}
This Ecma standard specifies \rationale{whatever \commentary{whatever}} the syntax and semantics of the Dart programming language. It does not specify the APIs of the Dart libraries except where those library elements are essential to the correct functioning of the language itself (e.g., the existence of class \cd{Object} with methods such as \cd{noSuchMethod}, \cd{runtimeType}).
%7 insert all; note that this block ends with more blank lines, too
\LMHash{}
This Ecma standard specifies the syntax \rationale{whatever \commentary{whatever}} and semantics of the Dart programming language. It does not specify the APIs of the
Dart libraries except where those library elements are
essential to the correct functioning of
the language itself (e.g., the existence of class
\cd{Object} with methods such as \cd{noSuchMethod}, \cd{runtimeType}).
\section{Conformance}
\LMLabel{ecmaConformance}
% Test a paragraph including math mode --------------------------------
%0 original
\LMHash{}
A list $x_1, \ldots, x_n$ denotes any list of $n$ elements of the form $x_i, 1 \le i \le n$. Note that $n$ may be zero, in which case the list is empty. We use such lists extensively throughout this specification.
%1 enlarge white space in math mode; NB: cannot add new ws, even in math mode
\LMHash{}
A list $x_1, \ldots, x_n$ denotes any list of $n$ elements of the form $x_i, 1 \le i \le n$. Note that $n$ may be zero, in which case the list is empty. We use such lists extensively throughout this specification.
%2 enlarge other white space
\LMHash{}
A list $x_1, \ldots, x_n$ denotes any list of $n$ elements
of the form $x_i, 1 \le i \le n$.
Note that $n$ may be zero, in
which case the list is empty. We use such lists
extensively throughout this
specification.
%3 add comments, also in math mode
\LMHash{}
A list $x_1, \ldots, x_n$ denotes any list of $n$ elements% blah % blah
of the form $x_i, 1% blah
\le i % blah
\le n$. Note that $n$ may be zero, in which
% blah blah %
case the list is empty. We use such lists extensively throughout this
specification.
%4 even multiple newlines do not count when before '$'
\LMHash{}
A list $x_1, \ldots, x_n$ denotes any list of $n$ elements of the form
$x_i, 1 \le i \le n$. Note that $n$ may be zero, in which case the list is empty. We use such lists extensively throughout this specification.
%5 multiple new lines and leading ws before '$', and comments
\LMHash{}
A list $x_1, \ldots, x_n$ denotes any list of $n$ elements of the form $x_i, 1 \le i \le n$. Note that
% blah blah blah blah blah blah blah blah
% blah blah blah blah blah blah blah blah
% blah blah blah blah blah blah blah blah
% blah blah blah blah blah blah blah blah
$n$ may be zero, in which case the list is empty. We use such lists extensively throughout this specification.
%6 precede paragraph with comments
\LMHash{}
% blah
% blah
%
A list $x_1, \ldots, x_n$ denotes any list of $n$ elements of the form $x_i, 1 \le i \le n$. Note that $n$ may be zero, in which case the list is empty. We use such lists extensively throughout this specification.
%7 insert comment lines in the middle of the paragraph
\LMHash{}
A list $x_1, \ldots, x_n$ denotes any list of $n$ elements of the form $x_i, 1 \le i \le n$. Note that $n$ may be zero,
%
% blah
%
in which case the list is empty. We use such lists extensively throughout this specification.
% Test paragraph plus dartCode, with commentary -----------------------
%0 original
\LMHash{}
If a declaration $d$ named $n$ is in the namespace induced by a scope $S$, then $d$ {\em hides} any declaration named $n$ that is available in the lexically enclosing scope of $S$.
\commentary {
A consequence of these rules is that it is possible to hide a type with a method or variable.
Naming conventions usually prevent such abuses. Nevertheless,the following program is legal:
}
\begin{dartCode}
\CLASS{} HighlyStrung \{
String() $=>$ "?";
\}
\end{dartCode}
%1 delete/insert in the commentary, altering number of paragraphs in there
\LMHash{}
If a declaration $d$ named $n$ is in the namespace induced by a scope $S$, then $d$ {\em hides} any declaration named $n$ that is available in the lexically enclosing scope of $S$.
\commentary {% NB: this space before brace begin is ignored by LaTeX
New paragraph.
New paragraph new paragraph new paragraph new paragraph new paragraph new paragraph new paragraph new paragraph new paragraph new paragraph new paragraph new paragraph new paragraph new paragraph new paragraph new paragraph new paragraph.
New paragraph,
new paragraph, and
new paragraph. % blah \commentary{ with unbalanced '{'
\rationale{nested rationale}
Naming conventions usually prevent such abuses. Nevertheless,the following program is legal:
}
\begin{dartCode}
\CLASS{} HighlyStrung \{
String() $=>$ "?";
\}
\end{dartCode}
%2 remove commentary entirely, including newlines
\LMHash{}
If a declaration $d$ named $n$ is in the namespace induced by a scope $S$, then $d$ {\em hides} any declaration named $n$ that is available in the lexically enclosing scope of $S$.
\begin{dartCode}
\CLASS{} HighlyStrung \{
String() $=>$ "?";
\}
\end{dartCode}
%3 change the amount of indentation in dartCode
\LMHash{}
If a declaration $d$ named $n$ is in the namespace induced by a scope $S$, then $d$ {\em hides} any declaration named $n$ that is available in the lexically enclosing scope of $S$.
\commentary {
A consequence of these rules is that it is possible to hide a type with a method or variable.
Naming conventions usually prevent such abuses. Nevertheless,the following program is legal:
}
\begin{dartCode}
\CLASS{} HighlyStrung \{
String() $=>$ "?";
\}
\end{dartCode}
%4 change other white space in dartCode
\LMHash{}
If a declaration $d$ named $n$ is in the namespace induced by a scope $S$, then $d$ {\em hides} any declaration named $n$ that is available in the lexically enclosing scope of $S$.
\commentary {
A consequence of these rules is that it is possible to hide a type with a method or variable.
Naming conventions usually prevent such abuses. Nevertheless,the following program is legal:
}
\begin{dartCode}
\CLASS{} HighlyStrung \{
String() $=>$ "?";
\}
\end{dartCode}
%5 add comments in dartCode
\LMHash{}
If a declaration $d$ named $n$ is in the namespace induced by a scope $S$, then $d$ {\em hides} any declaration named $n$ that is available in the lexically enclosing scope of $S$.
\commentary {
A consequence of these rules is that it is possible to hide a type with a method or variable.
Naming conventions usually prevent such abuses. Nevertheless,the following program is legal:
}
\begin{dartCode}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\CLASS{} HighlyStrung \{
String() $=>$ "?";% blah % blah
\} % blah
%blah
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\end{dartCode}
%6 remove commentary
\LMHash{}
If a declaration $d$ named $n$ is in the namespace induced by a scope $S$, then $d$ {\em hides} any declaration named $n$ that is available in the lexically enclosing scope of $S$.
\begin{dartCode}
\CLASS{} HighlyStrung \{
String() $=>$ "?";
\}
\end{dartCode}
%7 add comment lines after the block
\LMHash{}
If a declaration $d$ named $n$ is in the namespace induced by a scope $S$, then $d$ {\em hides} any declaration named $n$ that is available in the lexically enclosing scope of $S$.
\commentary {
A consequence of these rules is that it is possible to hide a type with a method or variable.
Naming conventions usually prevent such abuses. Nevertheless,the following program is legal:
}
\begin{dartCode}
\CLASS{} HighlyStrung \{
String() $=>$ "?";
\}
\end{dartCode}
% blah blah blah blah blah blah blah blah blah
% blah blah blah blah blah blah blah blah blah
% blah blah blah blah blah blah blah blah blah
% no blank lines before \section this time
\section{Variables}
\LMLabel{variables}
% Test paragraph followed by grammar ----------------------------------
%0 original
\LMHash{}
Variables are storage locations in memory.
\begin{grammar}
{\bf variableDeclaration:}
declaredIdentifier (`,' identifier)*
.
{\bf declaredIdentifier:}
metadata finalConstVarOrType identifier
.
{\bf finalConstVarOrType:}\FINAL{} type?;
\CONST{} type?;
varOrType
.
{\bf varOrType:}\VAR{};
type
.
{\bf initializedVariableDeclaration:}
declaredIdentifier (`=' expression)? (`,' initializedIdentifier)* % could do top level here
.
{\bf initializedIdentifier:}
identifier (`=' expression)? % could do top-level here
.
{\bf initializedIdentifierList:}
initializedIdentifier (`,' initializedIdentifier)*
.
\end{grammar}
%1 collecting grammar rules on single lines
\LMHash{}
Variables are storage locations in memory.
\begin{grammar}
{\bf variableDeclaration:} declaredIdentifier (`,' identifier)* .
{\bf declaredIdentifier:} metadata finalConstVarOrType identifier .
{\bf finalConstVarOrType:}\FINAL{} type?; \CONST{} type?; varOrType .
{\bf varOrType:}\VAR{}; type .
{\bf initializedVariableDeclaration:} declaredIdentifier (`=' expression)? (`,' initializedIdentifier)* .
{\bf initializedIdentifier:} identifier (`=' expression)? .
{\bf initializedIdentifierList:} initializedIdentifier (`,' initializedIdentifier)* .
\end{grammar}
%2 adding comments to grammar
\LMHash{}
Variables are storage locations in memory.
\begin{grammar}% blah
%blah
{\bf variableDeclaration:}%blah
declaredIdentifier (`,' identifier)*%blah
.%blah
%blah
{\bf declaredIdentifier:}
metadata finalConstVarOrType identifier
.
{\bf finalConstVarOrType:}\FINAL{} type?;
\CONST{} type?;
varOrType
.
{\bf varOrType:}\VAR{};
type
.
{\bf initializedVariableDeclaration:}
declaredIdentifier (`=' expression)? (`,' initializedIdentifier)* % could do top level here
.
{\bf initializedIdentifier:}
identifier (`=' expression)? % could do top-level here
.
{\bf initializedIdentifierList:}
initializedIdentifier (`,' initializedIdentifier)*
.% blah
%blah
\end{grammar}%blah
%3 removing empty lines from grammar
\LMHash{}
Variables are storage locations in memory.
\begin{grammar}
{\bf variableDeclaration:}
declaredIdentifier (`,' identifier)*
.
{\bf declaredIdentifier:}
metadata finalConstVarOrType identifier
.
{\bf finalConstVarOrType:}\FINAL{} type?;
\CONST{} type?;
varOrType
.
{\bf varOrType:}\VAR{};
type
.
{\bf initializedVariableDeclaration:}
declaredIdentifier (`=' expression)? (`,' initializedIdentifier)* % could do top level here
.
{\bf initializedIdentifier:}
identifier (`=' expression)? % could do top-level here
.
{\bf initializedIdentifierList:}
initializedIdentifier (`,' initializedIdentifier)*
.
\end{grammar}
%4 inserting comment block in grammar
\LMHash{}
Variables are storage locations in memory.
\begin{grammar}
{\bf variableDeclaration:}
declaredIdentifier (`,' identifier)*
.
{\bf declaredIdentifier:}
metadata finalConstVarOrType identifier
.
{\bf finalConstVarOrType:}\FINAL{} type?;
\CONST{} type?;
varOrType
.
{\bf varOrType:}\VAR{};
type
.
%% {\bf finalConstVarOrType:}\FINAL{} type?;
%% \CONST{} type?;
%% varOrType
%% .
%% {\bf varOrType:}\VAR{};
%% type
%% .
{\bf initializedVariableDeclaration:}
declaredIdentifier (`=' expression)? (`,' initializedIdentifier)* % could do top level here
.
{\bf initializedIdentifier:}
identifier (`=' expression)? % could do top-level here
.
{\bf initializedIdentifierList:}
initializedIdentifier (`,' initializedIdentifier)*
.
\end{grammar}
%5 adding commentary/rationale in grammar (may not happen, is OK anyway)
\LMHash{}
Variables are storage locations in memory.
\begin{grammar}
{\bf variableDeclaration:}
declaredIdentifier (`,' identifier)*
.
{\bf declaredIdentifier:}
metadata finalConstVarOrType identifier
.
\rationale{blah \commentary{blah}}
{\bf finalConstVarOrType:}\FINAL{} type?;
\CONST{} type?;
varOrType
.
{\bf varOrType:}\VAR{};
type
.
{\bf initializedVariableDeclaration:}
declaredIdentifier (`=' expression)? (`,' initializedIdentifier)* % could do top level here
.
{\bf initializedIdentifier:}
identifier (`=' expression)? % could do top-level here
.
{\bf initializedIdentifierList:}
initializedIdentifier (`,' initializedIdentifier)*
.
\end{grammar}
%6 inserting newlines in grammar (not new paragraph, only at existing ws)
\LMHash{}
Variables are storage locations in memory.
\begin{grammar}
{\bf variableDeclaration:}
declaredIdentifier
(`,'
identifier)*
.
{\bf declaredIdentifier:}
metadata finalConstVarOrType
identifier
.
{\bf
finalConstVarOrType:}\FINAL{}
type?;
\CONST{} type?;
varOrType
.
{\bf varOrType:}\VAR{};
type
.
{\bf initializedVariableDeclaration:}
declaredIdentifier (`=' expression)?
(`,' initializedIdentifier)*
% could do top level here
.
{\bf initializedIdentifier:}
identifier (`=' expression)? % could do top-level here
.
{\bf initializedIdentifierList:}
initializedIdentifier (`,' initializedIdentifier)*
.
\end{grammar}
%7
\LMHash{}
Variables are storage locations in memory.
\begin{grammar}
{\bf variableDeclaration:}
declaredIdentifier (`,' identifier)*
.
{\bf declaredIdentifier:}
metadata finalConstVarOrType identifier
.
{\bf finalConstVarOrType:}\FINAL{} type?;
\CONST{} type?;
varOrType
.
{\bf varOrType:}\VAR{};
type
.
{\bf initializedVariableDeclaration:}
declaredIdentifier (`=' expression)? (`,' initializedIdentifier)* % could do top level here
.
{\bf initializedIdentifier:}
identifier (`=' expression)? % could do top-level here
.
{\bf initializedIdentifierList:}
initializedIdentifier (`,' initializedIdentifier)*
.
\end{grammar}
\subsection{Evaluation of Implicit Variable Getters}
\LMLabel{evaluationOfImplicitVariableGetters}
% Test itemized list, right after paragraph ---------------------------
%0 original
\LMHash{}
Let $d$ be the declaration of a static or instance variable $v$. If $d$ is an instance variable, then the invocation of the implicit getter of $v$ evaluates to the value stored in $v$.
If $d$ is a static or library variable then the implicit getter method of $v$ executes as follows:
\begin{itemize}
\item {\bf Non-constant variable declaration with initializer}. If $d$ is of one of the forms \code{\VAR{} $v$ = $e$;} , \code{$T$ $v$ = $e$;} , \code{\FINAL{} $v$ = $e$;} , \code{\FINAL{} $T$ $v$ = $e$;}, \code{\STATIC{} $v$ = $e$; }, \code{\STATIC{} $T$ $v$ = $e$; }, \code{\STATIC{} \FINAL{} $v$ = $e$; } or \code{\STATIC{} \FINAL{} $T$ $v$ = $e$;} and no value has yet been stored into $v$ then the initializer expression $e$ is evaluated. If, during the evaluation of $e$, the getter for $v$ is invoked, a \code{CyclicInitializationError} is thrown. If the evaluation succeeded yielding an object $o$, let $r = o$, otherwise let $r = \NULL{}$. In any case, $r$ is stored into $v$. The result of executing the getter is $r$.
\item {\bf Constant variable declaration}. If $d$ is of one of the forms \code{\CONST{} $v$ = $e$; } , \code{\CONST{} $T$ $v$ = $e$; }, \code{\STATIC{} \CONST{} $v$ = $e$; } or \code{\STATIC{} \CONST{} $T$ $v$ = $e$;} the result of the getter is the value of the compile time constant $e$. \commentary{Note that a compile time constant cannot depend on itself, so no cyclic references can occur.}
Otherwise
\item {\bf Variable declaration without initializer}. The result of executing the getter method is the value stored in $v$.
\end{itemize}
%1 insert blank lines and comments between paragraph and list ---------
\LMHash{}
Let $d$ be the declaration of a static or instance variable $v$. If $d$ is an instance variable, then the invocation of the implicit getter of $v$ evaluates to the value stored in $v$.
If $d$ is a static or library variable then the implicit getter method of $v$ executes as follows:
%blah
%blah
\begin{itemize}
\item {\bf Non-constant variable declaration with initializer}. If $d$ is of one of the forms \code{\VAR{} $v$ = $e$;} , \code{$T$ $v$ = $e$;} , \code{\FINAL{} $v$ = $e$;} , \code{\FINAL{} $T$ $v$ = $e$;}, \code{\STATIC{} $v$ = $e$; }, \code{\STATIC{} $T$ $v$ = $e$; }, \code{\STATIC{} \FINAL{} $v$ = $e$; } or \code{\STATIC{} \FINAL{} $T$ $v$ = $e$;} and no value has yet been stored into $v$ then the initializer expression $e$ is evaluated. If, during the evaluation of $e$, the getter for $v$ is invoked, a \code{CyclicInitializationError} is thrown. If the evaluation succeeded yielding an object $o$, let $r = o$, otherwise let $r = \NULL{}$. In any case, $r$ is stored into $v$. The result of executing the getter is $r$.
\item {\bf Constant variable declaration}. If $d$ is of one of the forms \code{\CONST{} $v$ = $e$; } , \code{\CONST{} $T$ $v$ = $e$; }, \code{\STATIC{} \CONST{} $v$ = $e$; } or \code{\STATIC{} \CONST{} $T$ $v$ = $e$;} the result of the getter is the value of the compile time constant $e$. \commentary{Note that a compile time constant cannot depend on itself, so no cyclic references can occur.}
Otherwise
\item {\bf Variable declaration without initializer}. The result of executing the getter method is the value stored in $v$.
\end{itemize}
%2 insert line break before each item
\LMHash{}
Let $d$ be the declaration of a static or instance variable $v$. If $d$ is an instance variable, then the invocation of the implicit getter of $v$ evaluates to the value stored in $v$.
If $d$ is a static or library variable then the implicit getter method of $v$ executes as follows:
\begin{itemize}
\item
{\bf Non-constant variable declaration with initializer}. If $d$ is of one of the forms \code{\VAR{} $v$ = $e$;} , \code{$T$ $v$ = $e$;} , \code{\FINAL{} $v$ = $e$;} , \code{\FINAL{} $T$ $v$ = $e$;}, \code{\STATIC{} $v$ = $e$; }, \code{\STATIC{} $T$ $v$ = $e$; }, \code{\STATIC{} \FINAL{} $v$ = $e$; } or \code{\STATIC{} \FINAL{} $T$ $v$ = $e$;} and no value has yet been stored into $v$ then the initializer expression $e$ is evaluated. If, during the evaluation of $e$, the getter for $v$ is invoked, a \code{CyclicInitializationError} is thrown. If the evaluation succeeded yielding an object $o$, let $r = o$, otherwise let $r = \NULL{}$. In any case, $r$ is stored into $v$. The result of executing the getter is $r$.
\item
{\bf Constant variable declaration}. If $d$ is of one of the forms \code{\CONST{} $v$ = $e$; } , \code{\CONST{} $T$ $v$ = $e$; }, \code{\STATIC{} \CONST{} $v$ = $e$; } or \code{\STATIC{} \CONST{} $T$ $v$ = $e$;} the result of the getter is the value of the compile time constant $e$. \commentary{Note that a compile time constant cannot depend on itself, so no cyclic references can occur.}
Otherwise
\item
{\bf Variable declaration without initializer}. The result of executing the getter method is the value stored in $v$.
\end{itemize}
%3 insert blank/comment lines between,before,after items, and on begin/end
\LMHash{}
Let $d$ be the declaration of a static or instance variable $v$. If $d$ is an instance variable, then the invocation of the implicit getter of $v$ evaluates to the value stored in $v$.
If $d$ is a static or library variable then the implicit getter method of $v$ executes as follows:
\begin{itemize}%blah
% blah
\item {\bf Non-constant variable declaration with initializer}. If $d$ is of one of the forms \code{\VAR{} $v$ = $e$;} , \code{$T$ $v$ = $e$;} , \code{\FINAL{} $v$ = $e$;} , \code{\FINAL{} $T$ $v$ = $e$;}, \code{\STATIC{} $v$ = $e$; }, \code{\STATIC{} $T$ $v$ = $e$; }, \code{\STATIC{} \FINAL{} $v$ = $e$; } or \code{\STATIC{} \FINAL{} $T$ $v$ = $e$;} and no value has yet been stored into $v$ then the initializer expression $e$ is evaluated. If, during the evaluation of $e$, the getter for $v$ is invoked, a \code{CyclicInitializationError} is thrown. If the evaluation succeeded yielding an object $o$, let $r = o$, otherwise let $r = \NULL{}$. In any case, $r$ is stored into $v$. The result of executing the getter is $r$.
\item {\bf Constant variable declaration}. If $d$ is of one of the forms \code{\CONST{} $v$ = $e$; } , \code{\CONST{} $T$ $v$ = $e$; }, \code{\STATIC{} \CONST{} $v$ = $e$; } or \code{\STATIC{} \CONST{} $T$ $v$ = $e$;} the result of the getter is the value of the compile time constant $e$. \commentary{Note that a compile time constant cannot depend on itself, so no cyclic references can occur.}
Otherwise
%blah
%blah
%blah
\item {\bf Variable declaration without initializer}. The result of executing the getter method is the value stored in $v$.
% blah
\end{itemize}%blah
%4 insert commentary/rationale inside itemized list
\LMHash{}
Let $d$ be the declaration of a static or instance variable $v$. If $d$ is an instance variable, then the invocation of the implicit getter of $v$ evaluates to the value stored in $v$.
If $d$ is a static or library variable then the implicit getter method of $v$ executes as follows:
\begin{itemize}
\commentary{maybe this will not happen, but it is ok}
\item {\bf Non-constant variable declaration with initializer}. If $d$ is of one of the forms \code{\VAR{} $v$ = $e$;} , \code{$T$ $v$ = $e$;} , \code{\FINAL{} $v$ = $e$;} , \code{\FINAL{} $T$ $v$ = $e$;}, \code{\STATIC{} $v$ = $e$; }, \code{\STATIC{} $T$ $v$ = $e$; }, \code{\STATIC{} \FINAL{} $v$ = $e$; } or \code{\STATIC{} \FINAL{} $T$ $v$ = $e$;} and no value has yet been stored into $v$ then the initializer expression $e$ is evaluated. If, during the evaluation of $e$, the getter for $v$ is invoked, a \code{CyclicInitializationError} is thrown. If the evaluation succeeded yielding an object $o$, let $r = o$, otherwise let $r = \NULL{}$. In any case, $r$ is stored into $v$. The result of executing the getter is $r$.
\rationale{but rationale at the end of an item seems to make sense}
\item {\bf Constant variable declaration}. If $d$ is of one of the forms \code{\CONST{} $v$ = $e$; } , \code{\CONST{} $T$ $v$ = $e$; }, \code{\STATIC{} \CONST{} $v$ = $e$; } or \code{\STATIC{} \CONST{} $T$ $v$ = $e$;} the result of the getter is the value of the compile time constant $e$. \commentary{Note that a compile time constant cannot depend on itself, so no cyclic references can occur.}
Otherwise
\item {\bf Variable declaration without initializer}. The result of executing the getter method is the value stored in $v$.
\end{itemize}
\rationale{and we can of course have it immediately after the list}
%5 add line breaks in items, with/without indentation
\LMHash{}
Let $d$ be the declaration of a static or instance variable $v$. If $d$ is an instance variable, then the invocation of the implicit getter of $v$ evaluates to the value stored in $v$.
If $d$ is a static or library variable then the implicit getter method of $v$ executes as follows:
\begin{itemize}
\item {\bf Non-constant variable declaration with initializer}. If $d$
is of one of the forms \code{\VAR{} $v$ = $e$;} , \code{$T$ $v$ = $e$;} ,
\code{\FINAL{} $v$ = $e$;} , \code{\FINAL{} $T$ $v$ = $e$;},
\code{\STATIC{} $v$ = $e$; }, \code{\STATIC{} $T$ $v$ = $e$; },
\code{\STATIC{} \FINAL{} $v$ = $e$; } or
\code{\STATIC{} \FINAL{} $T$ $v$ = $e$;} and no value has yet been stored
into $v$ then the initializer expression $e$ is evaluated. If, during
the evaluation of $e$, the getter for $v$ is invoked, a
\code{CyclicInitializationError} is thrown. If the evaluation succeeded
yielding an object $o$, let $r = o$, otherwise let $r = \NULL{}$. In
any case, $r$ is stored into $v$. The result of executing the getter
is $r$.
\item {\bf Constant variable declaration}. If $d$ is of one of the
forms \code{\CONST{} $v$ = $e$; } , \code{\CONST{} $T$ $v$ = $e$; },
\code{\STATIC{} \CONST{} $v$ = $e$; } or
\code{\STATIC{} \CONST{} $T$ $v$ = $e$;} the result of the getter is the
value of the compile time constant $e$.
\commentary{Note that a compile time constant cannot depend on
itself, so no cyclic references can occur.}
Otherwise
\item {\bf Variable declaration without initializer}. The result of
executing the getter method is the value stored in $v$.
\end{itemize}
%6 add line breaks, then "eliminate" them with comments
\LMHash{}
Let $d$ be the declaration of a static or instance%
variable $v$. If $d$ is an instance variable, then the %
invocation of the implicit getter of $v$ evaluates to%
the value stored in $v$.
If $d$ is a static or library variable then the implicit %
getter method of $v$ executes as follows: %
\begin{itemize}
\item {\bf Non-constant variable declaration with initializer}. If $d$ is of one of the forms \code{\VAR{} $v$ = $e$;} , \code{$T$ $v$ = $e$;} , \code{\FINAL{} $v$ = $e$;} , \code{\FINAL{} $T$ $v$ = $e$;}, \code{\STATIC{} $v$ = $e$; }, \code{\STATIC{} $T$ $v$ = $e$; }, \code{\STATIC{} \FINAL{} $v$ = $e$; } or \code{\STATIC{} \FINAL{} $T$ $v$ = $e$;} and no value has yet been stored into $v$ then the initializer expression $e$ is evaluated. If, during the evaluation of $e$, the getter for $v$ is invoked, a \code{CyclicInitializationError} is thrown. If the evaluation succeeded yielding an object $o$, let $r = o$, otherwise let $r = \NULL{}$. In any case, $r$ is stored into $v$. The result of executing the getter is $r$. %
\item {\bf Constant variable declaration}. If $d$ is of one of the%
forms \code{\CONST{} $v$ = $e$; } , \code{\CONST{} $T$ $v$ = $e$; }, %
\code{\STATIC{} \CONST{} $v$ = $e$; } or%
\code{\STATIC{} \CONST{} $T$ $v$ = $e$;} the result of the getter is the value of the compile time constant $e$. \commentary{Note that a compile time constant cannot depend on itself, so no cyclic references can occur.}
Otherwise
\item {\bf Variable declaration without initializer}. The result of executing the getter method is the value stored in $v$.
\end{itemize}
%7 eliminate line break before environment and before \item
\LMHash{}
Let $d$ be the declaration of a static or instance variable $v$. If $d$ is an instance variable, then the invocation of the implicit getter of $v$ evaluates to the value stored in $v$.
If $d$ is a static or library variable then the implicit getter method of $v$ executes as follows: \begin{itemize}
\item {\bf Non-constant variable declaration with initializer}. If $d$ is of one of the forms \code{\VAR{} $v$ = $e$;} , \code{$T$ $v$ = $e$;} , \code{\FINAL{} $v$ = $e$;} , \code{\FINAL{} $T$ $v$ = $e$;}, \code{\STATIC{} $v$ = $e$; }, \code{\STATIC{} $T$ $v$ = $e$; }, \code{\STATIC{} \FINAL{} $v$ = $e$; } or \code{\STATIC{} \FINAL{} $T$ $v$ = $e$;} and no value has yet been stored into $v$ then the initializer expression $e$ is evaluated. If, during the evaluation of $e$, the getter for $v$ is invoked, a \code{CyclicInitializationError} is thrown. If the evaluation succeeded yielding an object $o$, let $r = o$, otherwise let $r = \NULL{}$. In any case, $r$ is stored into $v$. The result of executing the getter is $r$. \item {\bf Constant variable declaration}. If $d$ is of one of the forms \code{\CONST{} $v$ = $e$; } , \code{\CONST{} $T$ $v$ = $e$; }, \code{\STATIC{} \CONST{} $v$ = $e$; } or \code{\STATIC{} \CONST{} $T$ $v$ = $e$;} the result of the getter is the value of the compile time constant $e$. \commentary{Note that a compile time constant cannot depend on itself, so no cyclic references can occur.}
Otherwise%
\item {\bf Variable declaration without initializer}. The result of executing the getter method is the value stored in $v$.
\end{itemize}
% Test multiple commentary/rationale blocks in succession -------------
%0 original
\LMHash{}
The run time type of a function object always implements the class \cd{Function}.
\commentary{
One cannot assume, based on the above, that given a function \cd{f}, \cd{f.runtimeType} will actually be \cd{Function}, or that any two distinct function objects necessarily have the same runtime type.
}
\rationale{
It is up to the implementation to choose an appropriate representation for functions.
For example, consider that a closure produced via property extraction treats equality different from ordinary closures, and is therefore likely a different class. Implementations may also use different classes for functions based on arity and or type. Arity may be implicitly affected by whether a function is an instance method (with an implicit receiver parameter) or not. The variations are manifold, and so this specification only guarantees that function objects are instances of some class that is considered to implement \cd{Function}.
}
%1 remove commentary/first
\LMHash{}
The run time type of a function object always implements the class \cd{Function}.
\rationale{
It is up to the implementation to choose an appropriate representation for functions.
For example, consider that a closure produced via property extraction treats equality different from ordinary closures, and is therefore likely a different class. Implementations may also use different classes for functions based on arity and or type. Arity may be implicitly affected by whether a function is an instance method (with an implicit receiver parameter) or not. The variations are manifold, and so this specification only guarantees that function objects are instances of some class that is considered to implement \cd{Function}.
}
%2 remove rationale second
\LMHash{}
The run time type of a function object always implements the class \cd{Function}.
\commentary{
One cannot assume, based on the above, that given a function \cd{f}, \cd{f.runtimeType} will actually be \cd{Function}, or that any two distinct function objects necessarily have the same runtime type.
}
%3 remove both
\LMHash{}
The run time type of a function object always implements the class \cd{Function}.
%4 make first in paragraph, even with \par (double newline)
\LMHash{}
\commentary{
One cannot assume, based on the above, that given a function \cd{f}, \cd{f.runtimeType} will actually be \cd{Function}, or that any two distinct function objects necessarily have the same runtime type.
}
\rationale{
It is up to the implementation to choose an appropriate representation for functions.
For example, consider that a closure produced via property extraction treats equality different from ordinary closures, and is therefore likely a different class. Implementations may also use different classes for functions based on arity and or type. Arity may be implicitly affected by whether a function is an instance method (with an implicit receiver parameter) or not. The variations are manifold, and so this specification only guarantees that function objects are instances of some class that is considered to implement \cd{Function}.
}
The run time type of a function object always implements the class \cd{Function}.
%5 insert misleading 'dartCode' comments
\LMHash{}
The run time type of a function object always implements the class \cd{Function}.
\commentary{
One cannot assume, based on the above, that given a function \cd{f}, \cd{f.runt
imeType} will actually be \cd{Function}, or that any two distinct function objec
%\begin{dartCode}
ts necessarily have the same runtime type.
}
\rationale{
It is up to the implementation to choose an appropriate representation for functions.
%\end{dartCode}
For example, consider that a closure produced via property extraction treats equality different from ordinary closures, and is therefore likely a different class. Implementations may also use different classes for functions based on arity and or type. Arity may be implicitly affected by whether a function is an instance method (with an implicit receiver parameter) or not. The variations are manifold, and so this specification only guarantees that function objects are instances of some class that is considered to implement \cd{Function}.
}
%6 remove empty lines between normative and non-normative text
\LMHash{}
The run time type of a function object always implements the class \cd{Function}.
\commentary{
One cannot assume, based on the above, that given a function \cd{f}, \cd{f.runtimeType} will actually be \cd{Function}, or that any two distinct function objects necessarily have the same runtime type.
}
\rationale{
It is up to the implementation to choose an appropriate representation for functions.
For example, consider that a closure produced via property extraction treats equality different from ordinary closures, and is therefore likely a different class. Implementations may also use different classes for functions based on arity and or type. Arity may be implicitly affected by whether a function is an instance method (with an implicit receiver parameter) or not. The variations are manifold, and so this specification only guarantees that function objects are instances of some class that is considered to implement \cd{Function}.
}
%7 remove white space between normative and non-normative text
\LMHash{}
The run time type of a function object always implements the class \cd{Function}.\commentary{
One cannot assume, based on the above, that given a function \cd{f}, \cd{f.runtimeType} will actually be \cd{Function}, or that any two distinct function objects necessarily have the same runtime type.
}\rationale{
It is up to the implementation to choose an appropriate representation for functions.
For example, consider that a closure produced via property extraction treats equality different from ordinary closures, and is therefore likely a different class. Implementations may also use different classes for functions based on arity and or type. Arity may be implicitly affected by whether a function is an instance method (with an implicit receiver parameter) or not. The variations are manifold, and so this specification only guarantees that function objects are instances of some class that is considered to implement \cd{Function}.}
% Test structure command (\section) with leading white space ----------
\subsection{ Equality}
\LMLabel{equality}
The subsection should end the hashing block, so these words should
not affect the previous hash value.
% ---------------------------------------------------------------------
\end{document}

514
tools/addlatexhash.dart Normal file → Executable file
View file

@ -1,3 +1,4 @@
#!/usr/bin/env dart
// Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
@ -6,35 +7,38 @@
// This is a very specialized tool which was created in order to support
// adding hash values used as location markers in the LaTeX source of the
// language specification. It is intended to take its input file as the
// first argument and the output file name as the second argument. From
// docs/language a typical usage would be as follows:
// first argument, an output file name as the second argument, and a
// hash listing file name as the third argument. From docs/language a
// typical usage would be as follows:
//
// dart ../../tools/addlatexhash.dart dartLangSpec.tex tmp.tex
// dart ../../tools/addlatexhash.dart dartLangSpec.tex out.tex hash.txt
//
// This will yield a normalized variant tmp.tex of the language
// specification with hash values filled in. For more details, please
// check the language specification source itself.
// This will produce a normalized variant out.tex of the language
// specification with hash values filled in, and a listing hash.txt of
// all the hash values along with the label of their textual context
// (section, subsection, subsubsection, paragraph) . For more details,
// please check the language specification source itself.
//
// NB: This utility assumes UN*X style line endings, \n, in the LaTeX
// source file receieved as input; it will not work with other styles.
//
// TODO: The current version does not fill in hash values, it only
// standardizes the LaTeX source by removing comments and normalizing
// white space.
import 'dart:io';
import 'dart:convert';
import '../pkg/utf/lib/utf.dart';
import '../pkg/crypto/lib/crypto.dart';
// Normalization of the text, i.e., removal or normalization
// of elements that do not affect the output from latex
// ----------------------------------------------------------------------
// Normalization of the text: removal or normalization of parts that
// do not affect the output from latex, such as white space.
final commentRE = new RegExp(r"[^\\]%.*"); // NB: . does not match \n
final commentRE = new RegExp(r"[^\\]%.*"); // NB: . does not match \n.
final whitespaceAllRE = new RegExp(r"^\s+$");
final whitespaceRE = new RegExp(r"[ \t]{2,}");
// normalization steps
final whitespaceRE = new RegExp(r"(?:(?=\s).){2,}"); // \s except end-of-line
/// Removes [match]ing part of [line], adjusting that part with the
/// given [startOffset] and [endOffset], bounded to be valid indices
/// into the string if needed, then inserts [glue] where text was
/// removed. If there is no match then [line] is returned.
cutMatch(line, match, {startOffset: 0, endOffset: 0, glue: ""}) {
if (match == null) return line;
var start = match.start + startOffset;
@ -52,6 +56,9 @@ cutRegexp(line, re, {startOffset: 0, endOffset: 0, glue: ""}) {
glue: glue);
}
/// Removes the rest of [line] starting from the beginning of the
/// given [match], and adjusting with the given [offset]. If there
/// is no match then [line] is returned.
cutFromMatch(line, match, {offset: 0, glue: ""}) {
if (match == null) return line;
return line.substring(0, match.start + offset) + glue;
@ -61,127 +68,479 @@ cutFromRegexp(line, re, {offset: 0, glue: ""}) {
return cutFromMatch(line, re.firstMatch(line), offset: offset, glue: glue);
}
isWsOnly(line) => whitespaceAllRE.firstMatch(line) != null;
isWsOnly(line) => line.contains(whitespaceAllRE);
isCommentOnly(line) => line.startsWith("%");
/// Returns the end-of-line character at the end of [line], if any,
/// otherwise returns the empty string.
justEol(line) {
return line.endsWith("\n") ? "\n" : "";
}
/// Removes the contents of the comment at the end of [line],
/// leaving the "%" in place. If no comment is present,
/// return [line].
///
/// NB: it is tempting to remove everything from the '%' and out,
/// including the final newline, if any, but this does not work.
/// The problem is that TeX will do exactly this, but then it will
/// add back a character that depends on its state (S, M, or N),
/// and it is tricky to maintain a similar state that matches the
/// state of TeX faithfully. Hence, we remove the content of
/// comments but do not remove the comments themselves, we just
/// leave the '%' at the end of the line and let TeX manage its
/// states in a way that does not differ from the file from before
/// stripComment.
stripComment(line) {
// NB: it is tempting to remove everything from the '%' and out,
// including the final newline, if any, but this does not work.
// The problem is that TeX will do exactly this, but then it will
// add back a character that depends on its state (S, M, or N),
// and it is tricky to maintain a similar state that matches the
// state of TeX faithfully. Hence, we remove the content of
// comments but do not remove the comments themselves, we just
// leave the '%' at the end of the line and let TeX manage its
// states in a way that does not differ from the file from before
// stripComment
if (isCommentOnly(line)) return "%\n";
return cutRegexp(line, commentRE, startOffset: 2);
}
// Reduce a wsOnly line to its eol, remove leading ws
// entirely, and reduce multiple ws chars to one
/// Reduces a white-space-only [line] to its eol character,
/// removes leading ws entirely, and reduces multiple
/// white-space chars to one.
normalizeWhitespace(line) {
var trimLine = line.trimLeft();
if (trimLine.isEmpty) return justEol(line);
return trimLine.replaceAll(whitespaceRE, " ");
}
// Reduce sequences of >1 wsOnly lines to 1, and sequences of >1
// commentOnly lines to 1; moreover, treat commentOnly lines as
// wsOnly when occurring in wsOnly line blocks
/// Reduces sequences of >1 white-space-only lines in [lines] to 1,
/// and sequences of >1 comment-only lines to 1. Treats comment-only
/// lines as white-space-only when they occur in white-space-only
/// line blocks.
multilineNormalize(lines) {
var afterBlankLines = false; // does 'line' succeed >0 empty lines?
var afterCommentLines = false; // .. succeed >0 commentOnly lines?
var afterBlankLines = false; // Does [line] succeed >0 empty lines?
var afterCommentLines = false; // Does [line] succeed >0 commentOnly lines?
var newLines = new List();
for (var line in lines) {
if (afterBlankLines && afterCommentLines) {
// can never happen
// Previous line was both blank and a comment: not possible.
throw "Bug, please report to eernst@";
} else if (afterBlankLines && !afterCommentLines) {
// at least one line before 'line' is wsOnly
// At least one line before [line] is wsOnly.
if (!isWsOnly(line)) {
// blank line block ended
// Blank line block ended.
afterCommentLines = isCommentOnly(line);
// special case: it seems to be safe to remove commentOnly lines
// Special case: It seems to be safe to remove commentOnly lines
// after wsOnly lines, so the TeX state must be predictably right;
// next line will then be afterCommentLines and be dropped, so
// we drop the entire comment block---which is very useful; we can
// we drop the entire comment block---which is very useful. We can
// also consider this comment line to be an empty line, such that
// subsequent empty lines can be considered to be in a block of
// empty lines; note that almost all variants of this will break..
// empty lines. Note that almost all variants of this breaks.
if (afterCommentLines) {
// _current_ 'line' a commentOnly here
// _Current_ 'line' is a commentOnly here.
afterBlankLines = true;
afterCommentLines = false;
// and do not add 'line'
// Omit addition of [line].
} else {
// after blanks, but current 'line' is neither blank nor comment
// After blanks, but current 'line' is neither blank nor comment.
afterBlankLines = false;
newLines.add(line);
}
} else {
// blank line block continues, do not add 'line'
// Blank line block continues, omit addition of [line].
}
} else if (!afterBlankLines && afterCommentLines) {
// at least one line before 'line' is commentOnly
// At least one line before [line] is commentOnly.
if (!isCommentOnly(line)) {
// comment line block ended
// Comment block ended.
afterBlankLines = isWsOnly(line);
afterCommentLines = false;
newLines.add(line);
} else {
// comment line block continues, do not add 'line'
// Comment block continues, do not add [line].
}
} else {
assert(!afterBlankLines && !afterCommentLines);
// no wsOnly or commentOnly lines preceed 'line'
// No wsOnly or commentOnly lines preceed [line].
afterBlankLines = isWsOnly(line);
afterCommentLines = isCommentOnly(line);
if (!afterCommentLines) newLines.add(line);
// else skipping commentOnly line after nonWs, nonComment text
if (!afterCommentLines) {
newLines.add(line);
} else {
// skip commentOnly line after nonWs/nonComment text.
}
}
}
return newLines;
}
// Selecting the elements in the pipeline
/// Selects the elements in the normalization pipeline.
normalize(line) => normalizeWhitespace(stripComment(line));
/// Selects the elements in the significant-spacing block
/// normalization pipeline.
sispNormalize(line) => stripComment(line);
// Managing fragments with significant spacing
// Managing fragments with significant spacing.
final dartCodeBeginRE = new RegExp(r"^\s*\\begin\{dartCode\}");
final dartCodeEndRE = new RegExp (r"^\s*\\end\{dartCode\}");
final dartCodeBeginRE = new RegExp(r"^\s*\\begin\s*\{dartCode\}");
final dartCodeEndRE = new RegExp (r"^\s*\\end\s*\{dartCode\}");
sispIs(line, targetRE) {
return targetRE.firstMatch(line) != null;
/// Recognizes beginning of dartCode block.
sispIsDartBegin(line) => line.contains(dartCodeBeginRE);
/// Recognizes end of dartCode block.
sispIsDartEnd(line) => line.contains(dartCodeEndRE);
// ----------------------------------------------------------------------
// Analyzing the input to point out "interesting" lines
/// Returns the event information for [lines] as determined by the
/// given [analyzer]. The method [analyzer.analyze] indicates that a
/// line is "uninteresting" by returning null (i.e., no events here),
/// and "interesting" lines may be characterized by [analysisFunc] via
/// the returned event object.
findEvents(lines, analyzer) {
var events = new List();
for (var line in lines) {
var event = analyzer.analyze(line);
if (event != null) events.add(event);
}
return events;
}
sispIsDartBegin(line) => sispIs(line, dartCodeBeginRE);
sispIsDartEnd(line) => sispIs(line, dartCodeEndRE);
/// Returns RegExp text for recognizing a command occupying a line
/// of its own, given the part of the RegExp that recognizes the
/// command name, [cmdNameRE]
lineCommandRE(cmdNameRE) =>
new RegExp(r"^\s*\\" + cmdNameRE + r"\s*\{.*\}\s*$");
// Transform input file into output file
final hashLabelStartRE = new RegExp(r"^\s*\\LMLabel\s*\{");
final hashLabelEndRE = new RegExp(r"\}\s*$");
main ([args]) {
if (args.length != 2) {
print("Usage: addlatexhash.dart <input-file> <output-file>");
throw "Received ${args.length} arguments, expected two";
final hashMarkRE = lineCommandRE("LMHash");
final hashLabelRE = lineCommandRE("LMLabel");
final sectioningRE = lineCommandRE("((|sub(|sub))section|paragraph)");
final sectionRE = lineCommandRE("section");
final subsectionRE = lineCommandRE("subsection");
final subsubsectionRE = lineCommandRE("subsubsection");
final paragraphRE = lineCommandRE("paragraph");
/// Returns true iff [line] begins a block of lines that gets a hash value.
isHashMarker(line) => line.contains(hashMarkRE);
/// Returns true iff [line] defines a sectioning label.
isHashLabel(line) => line.contains(hashLabelRE);
/// Returns true iff [line] is a sectioning command resp. one of its
/// more specific forms; note that it is assumed that sectioning commands
/// do not contain a newline between the command name and the '{'.
isSectioningCommand(line) => line.contains(sectioningRE);
isSectionCommand(line) => line.contains(sectionRE);
isSubsectionCommand(line) => line.contains(subsectionRE);
isSubsubsectionCommand(line) => line.contains(subsubsectionRE);
isParagraphCommand(line) => line.contains(paragraphRE);
/// Returns true iff [line] does not end a block of lines that gets
/// a hash value.
isntHashBlockTerminator(line) => !isSectioningCommand(line);
/// Returns the label text part from [line], based on the assumption
/// that isHashLabel(line) returns true.
extractHashLabel(line) {
var startMatch = hashLabelStartRE.firstMatch(line);
var endMatch = hashLabelEndRE.firstMatch(line);
assert(startMatch != null && endMatch != null);
return line.substring(startMatch.end, endMatch.start);
}
// Event classes: Keep track of relevant information about the LaTeX
// source code lines, such as where \LMHash and \LMLabel commands are
// used, and how they are embedded in the sectioning structure.
/// Abstract events, enabling us to [setEndLineNumber] on all events.
abstract class HashEvent {
/// For events that have an endLineNumber, set it; otherwise ignore.
/// The endLineNumber specifies the end of the block of lines
/// associated with a given event, for event types concerned with
/// blocks of lines rather than single lines.
setEndLineNumber(n) {}
/// Returns null except for \LMHash{} events, where it returns
/// the startLineNumber. This serves to specify a boundary because
/// the preceding \LMHash{} block should stop before the line of
/// this \LMHash{} command. Note that hash blocks may stop earlier,
/// because they cannot contain sectioning commands.
getStartLineNumber() => null;
}
class HashMarkerEvent extends HashEvent {
// Line number of first line in block that gets hashed.
var startLineNumber;
// Highest possible number of first line after block that gets
// hashed (where the next \LMHash{} occurs). Note that this value
// is not known initially (because that line has not yet been
// reached), so [endLineNumber] will be initialized in a separate
// scan. Also note that the block may end earlier, because a block
// ends if it would otherwise include a sectioning command.
var endLineNumber;
HashMarkerEvent(this.startLineNumber);
setEndLineNumber(n) { endLineNumber = n; }
getStartLineNumber() => startLineNumber;
}
class HashLabelEvent extends HashEvent {
var labelText;
HashLabelEvent(this.labelText);
}
class HashAnalyzer {
// List of kinds of pending (= most recently seen) sectioning command.
// When updating this list, also update sectioningPrefix below.
static const PENDING_IS_NONE = 0;
static const PENDING_IS_SECTION = 1;
static const PENDING_IS_SUBSECTION = 2;
static const PENDING_IS_SUBSUBSECTION = 3;
static const PENDING_IS_PARAGRAPH = 1;
var lineNumber = 0;
var pendingSectioning = PENDING_IS_NONE;
HashAnalyzer();
setPendingToSection() {
pendingSectioning = PENDING_IS_SECTION;
}
var inputFile = new File(args[0]);
var outputFile = new File(args[1]);
assert(inputFile.existsSync());
setPendingToSubsection() {
pendingSectioning = PENDING_IS_SUBSECTION;
}
setPendingToSubsubsection() {
pendingSectioning = PENDING_IS_SUBSUBSECTION;
}
setPendingToParagraph() {
pendingSectioning = PENDING_IS_PARAGRAPH;
}
clearPending() {
pendingSectioning = PENDING_IS_NONE;
}
sectioningPrefix() {
switch (pendingSectioning) {
case PENDING_IS_SECTION: return "sec:";
case PENDING_IS_SUBSECTION: return "subsec:";
case PENDING_IS_SUBSUBSECTION: return "subsubsec:";
case PENDING_IS_PARAGRAPH: return "par:";
case PENDING_IS_NONE:
throw
"\\LMHash{..} should only be used after a sectioning command " +
"(\\section, \\subsection, \\subsubsection, \\paragraph)";
default:
// set of PENDING_IS_.. was extended, but updates here omitted
throw "Bug, please report to eernst@";
}
}
analyze(line) {
var currentLineNumber = lineNumber++;
if (isHashMarker(line)) {
return new HashMarkerEvent(currentLineNumber);
} else if (isHashLabel(line)) {
var labelText = sectioningPrefix() + extractHashLabel(line);
return new HashLabelEvent(labelText);
} else {
// No events to emit, but we may need to note state changes
if (isSectionCommand(line)) {
setPendingToSection();
} else if (isSubsectionCommand(line)) {
setPendingToSubsection();
} else if (isSubsubsectionCommand(line)) {
setPendingToSubsubsection();
} else if (isParagraphCommand(line)) {
setPendingToParagraph();
} else {
// No state changes.
}
return null;
}
}
}
findHashEvents(lines) {
// Create the list of events, omitting endLineNumbers.
var events = findEvents(lines, new HashAnalyzer());
// Set the endLineNumbers.
var currentEndLineNumber = lines.length;
for (var event in events.reversed) {
event.setEndLineNumber(currentEndLineNumber);
var nextEndLineNumber = event.getStartLineNumber();
if (nextEndLineNumber != null) currentEndLineNumber = nextEndLineNumber;
}
return events;
}
// ----------------------------------------------------------------------
// Removal of non-normative elements of the text (rationale, commentary).
/// Returns [line] without the command [cmdName] (based on a match
/// on "\\cmdName\s*{..}") starting at [startIndex]; note that it is
/// assumed but not checked that [line] contains "\\cmdType\s*{..",
/// and note that the end of the {..} block is found via brace matching
/// (i.e., nested {..} blocks are handled), but it may break if '{' is
/// made an active character etc.etc.
removeCommand(line, cmdName, startIndex) {
const BACKSLASH = 92; // char code for '\\'.
const BRACE_BEGIN = 123; // char code for '{'.
const BRACE_END = 125; // char code for '}'.
var blockStartIndex = startIndex + cmdName.length + 1;
while (blockStartIndex < line.length &&
line.codeUnitAt(blockStartIndex) != BRACE_BEGIN) {
blockStartIndex++;
}
blockStartIndex++;
if (blockStartIndex > line.length) {
throw "Bug, please report to eernst@";
}
// [blockStartIndex] has index just after '{'.
var afterEscape = false; // Is true iff [index] is just after '{'.
var braceLevel = 1; // Have seen so many '{'s minus so many '}'s.
for (var index = blockStartIndex; index < line.length; index++) {
switch (line.codeUnitAt(index)) {
case BRACE_BEGIN:
if (afterEscape) {
afterEscape = false;
} else {
braceLevel++;
}
break;
case BRACE_END:
if (afterEscape) {
afterEscape = false;
} else {
braceLevel--;
}
break;
case BACKSLASH:
afterEscape = true;
break;
default:
afterEscape = false;
}
if (braceLevel == 0) {
return line.substring(0, startIndex) + line.substring(index + 1);
}
}
// Removal failed; we consider this to mean that the input is ill-formed.
throw "Unmatched braces";
}
final commentaryRE = new RegExp(r"\\commentary\s*\{");
final rationaleRE = new RegExp(r"\\rationale\s*\{");
/// Removes {}-balanced '\commentary{..}' commands from [line].
removeCommentary(line) {
var match = commentaryRE.firstMatch(line);
if (match == null) return line;
return removeCommentary(removeCommand(line, r"commentary", match.start));
}
/// Removes {}-balanced '\rationale{..}' commands from [line].
removeRationale(line) {
var match = rationaleRE.firstMatch(line);
if (match == null) return line;
return removeRationale(removeCommand(line, r"rationale", match.start));
}
/// Removes {}-balanced '\commentary{..}' and '\rationale{..}'
/// commands from [line], then normalizes its white-space.
simplifyLine(line) {
var simplerLine = removeCommentary(line);
simplerLine = removeRationale(simplerLine);
simplerLine = normalizeWhitespace(simplerLine);
return simplerLine;
}
// ----------------------------------------------------------------------
// Recognition of line blocks, insertion of block hash into \LMHash{}.
final latexArgumentRE = new RegExp(r"\{.*\}");
cleanupLine(line) => cutRegexp(line, commentRE, startOffset: 1).trimRight();
/// Returns concatenation of all lines from [startIndex] in [lines] until
/// a hash block terminator is encountered or [nextIndex] reached (if so,
/// the line lines[nextIndex] itself is not included); each line is cleaned
/// up using [cleanupLine], and " " is inserted between the lines gathered.
gatherLines(lines, startIndex, nextIndex) =>
lines.getRange(startIndex, nextIndex)
.takeWhile(isntHashBlockTerminator)
.map(cleanupLine)
.join(" ");
/// Computes the hash value for the line block starting at [startIndex]
/// in [lines], stopping just before [nextIndex]. SIDE EFFECT:
/// Outputs the simplified text and its hash value to [listSink].
computeHashValue(lines, startIndex, nextIndex, listSink) {
final hashEncoder = new SHA1();
final gatheredLine = gatherLines(lines, startIndex, nextIndex);
final simplifiedLine = simplifyLine(gatheredLine);
listSink.write(" % $simplifiedLine\n");
hashEncoder.add(encodeUtf8(simplifiedLine));
return hashEncoder.close();
}
computeHashString(lines, startIndex, nextIndex, listSink) =>
CryptoUtils.bytesToHex(computeHashValue(lines,
startIndex,
nextIndex,
listSink));
/// Computes and adds hashes to \LMHash{} lines in [lines] (which
/// must be on the line numbers specified in [hashEvents]), and emits
/// sectioning markers and hash values to [listSink], along with
/// "comments" containing the simplified text (using the format
/// ' % <text>', where the text is one, long line, for easy grepping
/// etc.).
addHashMarks(lines, hashEvents, listSink) {
for (var hashEvent in hashEvents) {
if (hashEvent is HashMarkerEvent) {
var start = hashEvent.startLineNumber;
var end = hashEvent.endLineNumber;
final hashValue = computeHashString(lines, start + 1, end, listSink);
lines[start] =
lines[start].replaceAll(latexArgumentRE, "{" + hashValue + "}");
listSink.write(" $hashValue\n");
} else if (hashEvent is HashLabelEvent) {
listSink.write("${hashEvent.labelText}\n");
}
}
}
/// Transforms LaTeX input to LaTeX output plus hash value list file.
main ([args]) {
if (args.length != 3) {
print("Usage: addlatexhash.dart <input-file> <output-file> <list-file>");
throw "Received ${args.length} arguments, expected three";
}
// Get LaTeX source.
var inputFile = new File(args[0]);
assert(inputFile.existsSync());
var lines = inputFile.readAsLinesSync();
// single-line normalization
// Will hold LaTeX source with normalized spacing etc., plus hash values.
var outputFile = new File(args[1]);
// Will hold hierarchical list of hash values.
var listFile = new File(args[2]);
var listSink = listFile.openWrite();
// Perform single-line normalization.
var inDartCode = false;
var newLines = new List();
var normalizedLines = new List();
for (var line in lines) {
if (sispIsDartBegin(line)) {
@ -190,15 +549,20 @@ main ([args]) {
inDartCode = false;
}
if (inDartCode) {
newLines.add(sispNormalize(line + "\n"));
normalizedLines.add(sispNormalize(line + "\n"));
} else {
newLines.add(normalize(line + "\n"));
normalizedLines.add(normalize(line + "\n"));
}
}
// multi-line normalization
newLines = multilineNormalize(newLines);
// Perform multi-line normalization.
normalizedLines = multilineNormalize(normalizedLines);
// output result
outputFile.writeAsStringSync(newLines.join());
// Insert hash values.
var hashEvents = findHashEvents(normalizedLines);
addHashMarks(normalizedLines, hashEvents, listSink);
// Produce/finalize output.
outputFile.writeAsStringSync(normalizedLines.join());
listSink.close();
}