mirror of
https://github.com/zsh-users/zsh
synced 2024-07-08 19:55:44 +00:00
18982: comments/rant for paramsubst(), accidentally uncommited
This commit is contained in:
parent
c9c5f9da93
commit
bab0f3b920
572
Src/subst.c
572
Src/subst.c
|
@ -300,6 +300,15 @@ multsub(char **s, char ***a, int *isarr, char *sep)
|
|||
while (nonempty(&foo))
|
||||
*p++ = (char *)ugetnode(&foo);
|
||||
*p = NULL;
|
||||
/*
|
||||
* This is the most obscure way of deciding whether a value is
|
||||
* an array it would be possible to imagine. It seems to result
|
||||
* partly because we don't pass down the qt and ssub flags from
|
||||
* paramsubst() through prefork() properly, partly because we
|
||||
* don't tidy up to get back the return type from multsub we
|
||||
* need properly. The crux of neatening this up is to get rid
|
||||
* of the following test.
|
||||
*/
|
||||
if (a && mult_isarr) {
|
||||
*a = r;
|
||||
*isarr = SCANPM_MATCHMANY;
|
||||
|
@ -817,6 +826,23 @@ subst_parse_str(char **sp, int single, int err)
|
|||
#define isstring(c) ((c) == '$' || (char)(c) == String || (char)(c) == Qstring)
|
||||
#define isbrack(c) ((c) == '[' || (char)(c) == Inbrack)
|
||||
|
||||
/*
|
||||
* Given a linked list l with node n, perform parameter substitution
|
||||
* starting from *str. Return the node with the substitutuion performed
|
||||
* or NULL if it failed.
|
||||
*
|
||||
* If qt is true, the `$' was quoted. TODO: why can't we just look
|
||||
* to see if the first character was String or Qstring?
|
||||
*
|
||||
* If ssub is true, we are being called via singsubst(), which means
|
||||
* the result will be a single word. TODO: can we generate the
|
||||
* single word at the end? TODO: if not, or maybe in any case,
|
||||
* can we pass down the ssub flag from prefork with the other flags
|
||||
* instead of pushing it into different arguments? (How exactly
|
||||
* to qt and ssub differ? Are both necessary, if so is there some
|
||||
* better way of separating the two?)
|
||||
*/
|
||||
|
||||
/**/
|
||||
LinkNode
|
||||
paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
|
||||
|
@ -824,43 +850,207 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
|
|||
char *aptr = *str, c, cc;
|
||||
char *s = aptr, *fstr, *idbeg, *idend, *ostr = (char *) getdata(n);
|
||||
int colf; /* != 0 means we found a colon after the name */
|
||||
/*
|
||||
* There are far too many flags. They need to be grouped
|
||||
* together into some structure which ties them to where they
|
||||
* came from.
|
||||
*
|
||||
* Some flags have a an obscure relationship to their effect which
|
||||
* depends on incrementing them to particular values in particular
|
||||
* ways.
|
||||
*/
|
||||
/*
|
||||
* Whether the value is an array (in aval) or not (in val). There's
|
||||
* a movement from storing the value in the stuff read from the
|
||||
* parameter (the value v) to storing them in val and aval.
|
||||
* However, sometimes you find v reappearing temporarily.
|
||||
*
|
||||
* The values -1 and 2 are special to isarr. It looks like 2 is
|
||||
* some kind of an internal flag to do with whether the array's been
|
||||
* copied, in which case I don't know why we don't use the copied
|
||||
* flag, but they do both occur close together so they presumably
|
||||
* have different effects. The value -1 is isued to force us to
|
||||
* keep an empty array. It's tested in the YUK chunk (I mean the
|
||||
* one explicitly marked as such).
|
||||
*/
|
||||
int isarr = 0;
|
||||
/*
|
||||
* This is just the setting of the option except we need to
|
||||
* take account of ^ and ^^.
|
||||
*/
|
||||
int plan9 = isset(RCEXPANDPARAM);
|
||||
/*
|
||||
* Likwise, but with ~ and ~~. Also, we turn it off later
|
||||
* on if qt is passed down.
|
||||
*/
|
||||
int globsubst = isset(GLOBSUBST);
|
||||
/*
|
||||
* Indicates ${#pm}, massaged by whichlen which is set by
|
||||
* the (c), (w), and (W) flags to indicate how we take the length.
|
||||
*/
|
||||
int getlen = 0;
|
||||
int whichlen = 0;
|
||||
/*
|
||||
* Indicates ${+pm}: a simple boolean for once.
|
||||
*/
|
||||
int chkset = 0;
|
||||
/*
|
||||
* Indicates we have tried to get a value in v but that was
|
||||
* unset. I don't quite understand why (v == NULL) isn't
|
||||
* good enough, but there are places where we seem to need
|
||||
* to second guess whether a value is a real value or not.
|
||||
*/
|
||||
int vunset = 0;
|
||||
/*
|
||||
* Indicates (t) flag, i.e. print out types. The code for
|
||||
* this actually isn't too horrifically inbred compared with
|
||||
* that for (P).
|
||||
*/
|
||||
int wantt = 0;
|
||||
/*
|
||||
* Indicates spliting a string into an array. There aren't
|
||||
* actually that many special cases for this --- which may
|
||||
* be why it doesn't work properly; we split in some cases
|
||||
* where we shouldn't, in particular on the multsubs for
|
||||
* handling embedded values for ${...=...} and the like.
|
||||
*/
|
||||
int spbreak = isset(SHWORDSPLIT) && !ssub && !qt;
|
||||
/* Scalar and array value, see isarr above */
|
||||
char *val = NULL, **aval = NULL;
|
||||
/*
|
||||
* Padding based on setting in parameter rather than substitution
|
||||
* flags. This is only used locally.
|
||||
*/
|
||||
unsigned int fwidth = 0;
|
||||
/*
|
||||
* vbuf and v are both used to retrieve parameter values; this
|
||||
* is a kludge, we pass down vbuf and it may or may not return v.
|
||||
*/
|
||||
struct value vbuf;
|
||||
Value v = NULL;
|
||||
/*
|
||||
* This expressive name refers to the set of flags which
|
||||
* is applied to matching for #, %, / and their doubled variants:
|
||||
* (M), (R), (B), (E), (N), (S).
|
||||
*/
|
||||
int flags = 0;
|
||||
/* Value from (I) flag, used for ditto. */
|
||||
int flnum = 0;
|
||||
/*
|
||||
* sortit is an obscure combination of the settings for (o), (O),
|
||||
* (i) and (n). casind is (i) and numord is (n); these are
|
||||
* separate so we can have fun doing the obscure combinatorics later.
|
||||
* indord is the (a) flag, which for consistency doesn't get
|
||||
* combined into sortit.
|
||||
*/
|
||||
int sortit = 0, casind = 0, numord = 0, indord = 0;
|
||||
/* (u): straightforward. */
|
||||
int unique = 0;
|
||||
/* combination of (L), (U) and (C) flags. */
|
||||
int casmod = 0;
|
||||
/*
|
||||
* quotemod says we are doing either (q) (positive), (Q) (negative)
|
||||
* or not (0). quotetype counts the q's for the first case.
|
||||
* quoterr is simply (X) but gets passed around a lot because the
|
||||
* combination (eX) needs it.
|
||||
*/
|
||||
int quotemod = 0, quotetype = 0, quoteerr = 0;
|
||||
/*
|
||||
* (V) flag: fairly straightforward, except that as with so
|
||||
* many flags it's not easy to decide where to put it in the order.
|
||||
*/
|
||||
int visiblemod = 0;
|
||||
/*
|
||||
* The (z) flag, nothing to do with SH_WORD_SPLIT which is tied
|
||||
* spbreak, see above; fairly straighforward in use but c.f.
|
||||
* the comment for visiblemod.
|
||||
*/
|
||||
int shsplit = 0;
|
||||
/*
|
||||
* The separator from (j) and (s) respectively, or (F) and (f)
|
||||
* respectively (hardwired to "\n" in that case). Slightly
|
||||
* confusingly also used for ${#pm}, thought that's at least
|
||||
* documented in the manual
|
||||
*/
|
||||
char *sep = NULL, *spsep = NULL;
|
||||
/*
|
||||
* Padding strings. The left and right padding strings which
|
||||
* are repeated, then the ones which only occur once, for
|
||||
* the (l) and (r) flags.
|
||||
*/
|
||||
char *premul = NULL, *postmul = NULL, *preone = NULL, *postone = NULL;
|
||||
char *replstr = NULL; /* replacement string for /orig/repl */
|
||||
/* Replacement string for /orig/repl and //orig/repl */
|
||||
char *replstr = NULL;
|
||||
/* The numbers for (l) and (r) */
|
||||
zlong prenum = 0, postnum = 0;
|
||||
/*
|
||||
* Whether the value has been copied. Optimisation: if we
|
||||
* are modifying an expression, we only need to copy it the
|
||||
* first time, and if we don't modify it we can just use the
|
||||
* value from the parameter or input.
|
||||
*/
|
||||
int copied = 0;
|
||||
/*
|
||||
* The (A) flag for array assignment, with consequences for
|
||||
* splitting and joining; (AA) gives arrasg == 2 for associative
|
||||
* arrays.
|
||||
*/
|
||||
int arrasg = 0;
|
||||
/*
|
||||
* The (e) flag. As we need to do extra work not quite
|
||||
* at the end, the effect of this is kludged in in several places.
|
||||
*/
|
||||
int eval = 0;
|
||||
/*
|
||||
* The (P) flag. This interacts a bit obscurely with whether
|
||||
* or not we are dealing with a sub expression (subexp).
|
||||
*/
|
||||
int aspar = 0;
|
||||
/*
|
||||
* The (%) flag, c.f. visiblemod again.
|
||||
*/
|
||||
int presc = 0;
|
||||
/*
|
||||
* The (@) flag; interacts obscurely with qt and isarr.
|
||||
* This is one of the things that decides whether multsub
|
||||
* will produce an array, but in an extremely indirect fashion.
|
||||
*/
|
||||
int nojoin = 0;
|
||||
char inbrace = 0; /* != 0 means ${...}, otherwise $... */
|
||||
/*
|
||||
* != 0 means ${...}, otherwise $... What works without braces
|
||||
* is largely a historical artefact (everything works with braces,
|
||||
* I sincerely hope).
|
||||
*/
|
||||
char inbrace = 0;
|
||||
/*
|
||||
* Use for the (k) flag. Goes down into the parameter code,
|
||||
* sometimes.
|
||||
*/
|
||||
char hkeys = 0;
|
||||
/*
|
||||
* Used for the (v) flag, ditto. Not quite sure why they're
|
||||
* separate, but the tradition seems to be that things only
|
||||
* get combined when that makes the result more obscure rather
|
||||
* than less.
|
||||
*/
|
||||
char hvals = 0;
|
||||
/*
|
||||
* Whether we had to evaluate a subexpression, i.e. an
|
||||
* internal ${...} or $(...) or plain $pm. We almost don't
|
||||
* need to remember this (which would be neater), but the (P)
|
||||
* flag means the subexp and !subexp code is obscurely combined,
|
||||
* and the argument passing to fetchvalue has another kludge.
|
||||
*/
|
||||
int subexp;
|
||||
|
||||
*s++ = '\0';
|
||||
/*
|
||||
* Nothing to do unless the character following the $ is
|
||||
* something we recognise.
|
||||
*
|
||||
* Shouldn't this be a table or something? We test for all
|
||||
* these later on, too.
|
||||
*/
|
||||
if (!ialnum(c = *s) && c != '#' && c != Pound && c != '-' &&
|
||||
c != '!' && c != '$' && c != String && c != Qstring &&
|
||||
c != '?' && c != Quest && c != '_' &&
|
||||
|
@ -872,9 +1062,21 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
|
|||
return n;
|
||||
}
|
||||
DPUTS(c == '{', "BUG: inbrace == '{' in paramsubst()");
|
||||
/*
|
||||
* Extra processing if there is an opening brace: mostly
|
||||
* flags in parentheses, but also one ksh hack.
|
||||
*/
|
||||
if (c == Inbrace) {
|
||||
inbrace = 1;
|
||||
s++;
|
||||
/*
|
||||
* In ksh emulation a leading `!' is a special flag working
|
||||
* sort of like our (k).
|
||||
* TODO: this is one of very few cases tied directly to
|
||||
* the emulation mode rather than an option. Since ksh
|
||||
* doesn't have parameter flags it might be neater to
|
||||
* handle this with the ^, =, ~ stuff, below.
|
||||
*/
|
||||
if ((c = *s) == '!' && s[1] != Outbrace && emulation == EMULATE_KSH) {
|
||||
hkeys = SCANPM_WANTKEYS;
|
||||
s++;
|
||||
|
@ -882,6 +1084,14 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
|
|||
char *t, sav;
|
||||
int tt = 0;
|
||||
zlong num;
|
||||
/*
|
||||
* The (p) flag is (uniquely) only remembered within
|
||||
* this block. It says we do print-style handling
|
||||
* on the values for flags, but only on those.
|
||||
* This explains the ghastly macro, but why can't it
|
||||
* be a function? UNTOK_AND_ESCAPE is defined
|
||||
* so that the argument must be an lvalue.
|
||||
*/
|
||||
int escapes = 0;
|
||||
int klen;
|
||||
#define UNTOK(C) (itok(C) ? ztokens[(C) - Pound] : (C))
|
||||
|
@ -1089,22 +1299,40 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
|
|||
s++;
|
||||
}
|
||||
}
|
||||
/* Sort is done by indexing on sortit-1:
|
||||
* bit 1: ascending (o)/descending (O)
|
||||
* bit 2: case sensitive/independent (i)
|
||||
* bit 3: strict order/numeric (n)
|
||||
* unless indord (a) is set set, in which case only test for
|
||||
* descending by assuming only (O) is possible (not verified).
|
||||
*/
|
||||
if (sortit)
|
||||
sortit += (casind << 1) + (numord << 2);
|
||||
|
||||
/*
|
||||
* premul, postmul specify the padding character to be used
|
||||
* multiple times with the (l) and (r) flags respectively.
|
||||
*/
|
||||
if (!premul)
|
||||
premul = " ";
|
||||
if (!postmul)
|
||||
postmul = " ";
|
||||
|
||||
/*
|
||||
* Look for special unparenthesised flags.
|
||||
* TODO: could make these able to appear inside parentheses, too,
|
||||
* i.e. ${(^)...} etc.
|
||||
*/
|
||||
for (;;) {
|
||||
if ((c = *s) == '^' || c == Hat) {
|
||||
/* RC_EXPAND_PARAM on or off (doubled )*/
|
||||
if ((c = *++s) == '^' || c == Hat) {
|
||||
plan9 = 0;
|
||||
s++;
|
||||
} else
|
||||
plan9 = 1;
|
||||
} else if ((c = *s) == '=' || c == Equals) {
|
||||
/* SH_WORD_SPLIT on or off (doubled). spbreak = 2 means force */
|
||||
if ((c = *++s) == '=' || c == Equals) {
|
||||
spbreak = 0;
|
||||
s++;
|
||||
|
@ -1114,19 +1342,33 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
|
|||
(iident(cc = s[1])
|
||||
|| cc == '*' || cc == Star || cc == '@'
|
||||
|| cc == '-' || (cc == ':' && s[2] == '-')
|
||||
|| (isstring(cc) && (s[2] == Inbrace || s[2] == Inpar))))
|
||||
|| (isstring(cc) && (s[2] == Inbrace || s[2] == Inpar)))) {
|
||||
getlen = 1 + whichlen, s++;
|
||||
else if (c == '~' || c == Tilde) {
|
||||
/*
|
||||
* Return the length of the parameter.
|
||||
* getlen can be more than 1 to indicate characters (2),
|
||||
* words ignoring multiple delimiters (3), words taking
|
||||
* account of multiple delimiters. delimiter is in
|
||||
* spsep, NULL means $IFS.
|
||||
*/
|
||||
} else if (c == '~' || c == Tilde) {
|
||||
/* GLOB_SUBST on or off (doubled) */
|
||||
if ((c = *++s) == '~' || c == Tilde) {
|
||||
globsubst = 0;
|
||||
s++;
|
||||
} else
|
||||
globsubst = 1;
|
||||
} else if (c == '+') {
|
||||
/*
|
||||
* Return whether indicated parameter is set.
|
||||
* Try to handle this when parameter is named
|
||||
* by (P) (second part of test).
|
||||
*/
|
||||
if (iident(s[1]) || (aspar && isstring(s[1]) &&
|
||||
(s[2] == Inbrace || s[2] == Inpar)))
|
||||
chkset = 1, s++;
|
||||
else if (!inbrace) {
|
||||
/* Special case for `$+' on its own --- leave unmodified */
|
||||
*aptr = '$';
|
||||
*str = aptr + 1;
|
||||
return n;
|
||||
|
@ -1134,13 +1376,31 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
|
|||
zerr("bad substitution", NULL, 0);
|
||||
return NULL;
|
||||
}
|
||||
} else if (inbrace && INULL(*s))
|
||||
} else if (inbrace && INULL(*s)) {
|
||||
/*
|
||||
* Handles things like ${(f)"$(<file)"} by skipping
|
||||
* the double quotes. We don't need to know what was
|
||||
* actually there; the presence of a String or Qstring
|
||||
* is good enough.
|
||||
*/
|
||||
s++;
|
||||
else
|
||||
} else
|
||||
break;
|
||||
}
|
||||
/* Don't activate special pattern characters if inside quotes */
|
||||
globsubst = globsubst && !qt;
|
||||
|
||||
/*
|
||||
* At this point, we usually expect a parameter name.
|
||||
* However, there may be a nested ${...} or $(...).
|
||||
* These say that the parameter itself is somewhere inside,
|
||||
* or that there isn't a parameter and we will get the values
|
||||
* from a command substitution itself. In either case,
|
||||
* the current instance of paramsubst() doesn't fetch a value,
|
||||
* it just operates on what gets passed up.
|
||||
* (The first ought to have been {...}, reserving ${...}
|
||||
* for substituting a value at that point, but it's too late now.)
|
||||
*/
|
||||
idbeg = s;
|
||||
if ((subexp = (inbrace && s[-1] && isstring(*s) &&
|
||||
(s[1] == Inbrace || s[1] == Inpar)))) {
|
||||
|
@ -1151,26 +1411,78 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
|
|||
skipparens(*s, *s == Inpar ? Outpar : Outbrace, &s);
|
||||
sav = *s;
|
||||
*s = 0;
|
||||
/*
|
||||
* This handles arrays. TODO: this is not the most obscure call to
|
||||
* multsub() (see below) but even so it would be nicer to pass down
|
||||
* and back the arrayness more rationally. In that case, we should
|
||||
* remove the aspar test and extract a value from an array, if
|
||||
* necessary, when we handle (P) lower down.
|
||||
*/
|
||||
if (multsub(&val, (aspar ? NULL : &aval), &isarr, NULL) && quoted) {
|
||||
/* Empty quoted string --- treat as null string, not elided */
|
||||
isarr = -1;
|
||||
aval = (char **) hcalloc(sizeof(char *));
|
||||
aspar = 0;
|
||||
} else if (aspar)
|
||||
idbeg = val;
|
||||
*s = sav;
|
||||
/*
|
||||
* This tests for the second double quote in an expression
|
||||
* like ${(f)"$(<file)"}, compare above.
|
||||
*/
|
||||
while (INULL(*s))
|
||||
s++;
|
||||
v = (Value) NULL;
|
||||
} else if (aspar) {
|
||||
/*
|
||||
* No subexpression, but in any case the value is going
|
||||
* to give us the name of a parameter on which we do
|
||||
* our remaining processing. In other words, this
|
||||
* makes ${(P)param} work like ${(P)${param}}. (Probably
|
||||
* better looked at, this is the basic code for ${(P)param}
|
||||
* and it's been kludged into the subexp code because no
|
||||
* opportunity for a kludge has been neglected.)
|
||||
*/
|
||||
if ((v = fetchvalue(&vbuf, &s, 1, (qt ? SCANPM_DQUOTED : 0)))) {
|
||||
val = idbeg = getstrvalue(v);
|
||||
subexp = 1;
|
||||
} else
|
||||
vunset = 1;
|
||||
}
|
||||
/*
|
||||
* We need to retrieve a value either if we haven't already
|
||||
* got it from a subexpression, or if the processing so
|
||||
* far has just yielded us a parameter name to be processed
|
||||
* with (P).
|
||||
*/
|
||||
if (!subexp || aspar) {
|
||||
char *ov = val;
|
||||
|
||||
/*
|
||||
* Second argument: decide whether to use the subexpression or
|
||||
* the string next on the line as the parameter name.
|
||||
* Third argument: decide how processing for brackets
|
||||
* 1 means full processing
|
||||
* -1 appears to mean something along the lines of
|
||||
* only handle single digits and don't handle brackets.
|
||||
* I *think* (but it's really only a guess) that this
|
||||
* is used by the test below the wantt handling, so
|
||||
* that in certain cases we handle brackets there.
|
||||
* 0 would apparently mean something like we know we
|
||||
* should have the name of a scalar and we get cross
|
||||
* if there's anything present which disagrees with that
|
||||
* but you will search fetchvalue() in vain for comments on this.
|
||||
* Fourth argument gives flags to do with keys, values, quoting,
|
||||
* assigning depending on context and parameter flags.
|
||||
*
|
||||
* This is the last mention of subexp, so presumably this
|
||||
* is what the code which makes sure subexp is set if aspar (the
|
||||
* (P) flag) is set. I *think* what's going on here is the
|
||||
* second argument is for both input and output: with
|
||||
* subexp, we only want the input effect, whereas normally
|
||||
* we let fetchvalue set the main string pointer s to
|
||||
* the end of the bit it's fetched.
|
||||
*/
|
||||
if (!(v = fetchvalue(&vbuf, (subexp ? &ov : &s),
|
||||
(wantt ? -1 :
|
||||
((unset(KSHARRAYS) || inbrace) ? 1 : -1)),
|
||||
|
@ -1181,6 +1493,10 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
|
|||
vunset = 1;
|
||||
|
||||
if (wantt) {
|
||||
/*
|
||||
* Handle the (t) flag: value now becomes the type
|
||||
* information for the parameter.
|
||||
*/
|
||||
if (v && v->pm && !(v->pm->flags & PM_UNSET)) {
|
||||
int f = v->pm->flags;
|
||||
|
||||
|
@ -1227,8 +1543,24 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
|
|||
isarr = 0;
|
||||
}
|
||||
}
|
||||
/*
|
||||
* We get in here two ways; either we need to convert v into
|
||||
* the local value system, or we need to get rid of brackets
|
||||
* even if there isn't a v.
|
||||
*/
|
||||
while (v || ((inbrace || (unset(KSHARRAYS) && vunset)) && isbrack(*s))) {
|
||||
if (!v) {
|
||||
/*
|
||||
* Index applied to non-existent parameter; we may or may
|
||||
* not have a value to index, however. Create a temporary
|
||||
* empty parameter as a trick, and index on that. This
|
||||
* usually happens the second time around the loop when
|
||||
* we've used up the original parameter value and want to
|
||||
* apply a subscript to what's left. However, it's also
|
||||
* possible it's got something to do with some of that murky
|
||||
* passing of -1's as the third argument to fetchvalue() to
|
||||
* inhibit bracket parsing at that stage.
|
||||
*/
|
||||
Param pm;
|
||||
char *os = s;
|
||||
|
||||
|
@ -1251,6 +1583,21 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
|
|||
if (getindex(&s, v, qt) || s == os)
|
||||
break;
|
||||
}
|
||||
/*
|
||||
* This is where we extract a value (we know now we have
|
||||
* one) into the local parameters for a scalar (val) or
|
||||
* array (aval) value. TODO: move val and aval into
|
||||
* a structure with a discriminator. Hope we can make
|
||||
* more things array values at this point and dearrayify later.
|
||||
* v->isarr tells us whether the stuff form down below looks
|
||||
* like an array. Unlike multsub() this is probably clean
|
||||
* enough to keep, although possibly the parameter passing
|
||||
* needs reorganising.
|
||||
*
|
||||
* I think we get to discard the existing value of isarr
|
||||
* here because it's already been taken account of, either
|
||||
* in the subexp stuff or immediately above.
|
||||
*/
|
||||
if ((isarr = v->isarr)) {
|
||||
/* No way to get here with v->inv != 0, so getvaluearr() *
|
||||
* is called by getarrvalue(); needn't test PM_HASHED. */
|
||||
|
@ -1260,7 +1607,18 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
|
|||
} else
|
||||
aval = getarrvalue(v);
|
||||
} else {
|
||||
/* Value retrieved from parameter/subexpression is scalar */
|
||||
if (v->pm->flags & PM_ARRAY) {
|
||||
/*
|
||||
* Although the value is a scalar, the parameter
|
||||
* itself is an array. Presumably this is due to
|
||||
* being quoted, or doing single substitution or something,
|
||||
* TODO: we're about to do some definitely stringy
|
||||
* stuff, so something like this bit is probably
|
||||
* necessary. However, I'd like to leave any
|
||||
* necessary joining of arrays until this point
|
||||
* to avoid the multsub() horror.
|
||||
*/
|
||||
int tmplen = arrlen(v->pm->gets.afn(v->pm));
|
||||
|
||||
if (v->start < 0)
|
||||
|
@ -1269,6 +1627,15 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
|
|||
vunset = 1;
|
||||
}
|
||||
if (!vunset) {
|
||||
/*
|
||||
* There really is a value. Apply any necessary
|
||||
* padding or case transformation. Note these
|
||||
* are the per-parameter transformations specified
|
||||
* with typeset, not the per-substitution ones set
|
||||
* by flags. TODO: maybe therefore this would
|
||||
* be more consistent if moved into getstrvalue()?
|
||||
* Bet that's easier said than done.
|
||||
*/
|
||||
val = getstrvalue(v);
|
||||
fwidth = v->pm->ct ? v->pm->ct : strlen(val);
|
||||
switch (v->pm->flags & (PM_LEFT | PM_RIGHT_B | PM_RIGHT_Z)) {
|
||||
|
@ -1335,10 +1702,25 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
|
|||
}
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Finished with the original parameter and its indices;
|
||||
* carry on looping to see if we need to do more indexing.
|
||||
* This means we final get rid of v in favour of val and
|
||||
* aval. We could do with somehow encapsulating the bit
|
||||
* where we need v.
|
||||
*/
|
||||
v = NULL;
|
||||
if (!inbrace)
|
||||
break;
|
||||
}
|
||||
/*
|
||||
* We're now past the name or subexpression; the only things
|
||||
* which can happen now are a closing brace, one of the standard
|
||||
* parameter postmodifiers, or a history-style colon-modifier.
|
||||
*
|
||||
* Again, this duplicates tests for characters we're about to
|
||||
* examine properly later on.
|
||||
*/
|
||||
if (inbrace &&
|
||||
(c = *s) != '-' && c != '+' && c != ':' && c != '%' && c != '/' &&
|
||||
c != '=' && c != Equals &&
|
||||
|
@ -1348,6 +1730,30 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
|
|||
zerr("bad substitution", NULL, 0);
|
||||
return NULL;
|
||||
}
|
||||
/*
|
||||
* Join arrays up if we're in quotes and there isn't some
|
||||
* override such as (@).
|
||||
* TODO: hmm, if we're called as part of some recursive
|
||||
* substitution do we want to delay this until we get back to
|
||||
* the top level? Or is if there's a qt (i.e. this parameter
|
||||
* substitution is in quotes) always good enough? Potentially
|
||||
* we may be OK by now --- all potential `@'s and subexpressions
|
||||
* have been handled, including any [@] index which comes up
|
||||
* by virture of v->isarr being set to SCANPM_ISVAR_AT which
|
||||
* is now in isarr.
|
||||
*
|
||||
* However, if we are replacing multsub() with something that
|
||||
* doesn't mangle arrays, we may need to delay this step until after
|
||||
* the foo:- or foo:= or whatever that causes that. Note the value
|
||||
* (string or array) at this point is irrelevant if we are going to
|
||||
* be doing that. This would mean // and stuff get applied
|
||||
* arraywise even if quoted. That's probably wrong, so maybe
|
||||
* this just stays.
|
||||
*
|
||||
* We do a separate stage of dearrayification in the YUK chunk,
|
||||
* I think mostly because of the way we make array or scalar
|
||||
* values appear to the caller.
|
||||
*/
|
||||
if (isarr) {
|
||||
if (nojoin)
|
||||
isarr = -1;
|
||||
|
@ -1358,9 +1764,20 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
|
|||
}
|
||||
|
||||
idend = s;
|
||||
if (inbrace)
|
||||
if (inbrace) {
|
||||
/*
|
||||
* This is to match a closing double quote in case
|
||||
* we didn't have a subexpression, e.g. ${"foo"}.
|
||||
* This form is pointless, but logically it ought to work.
|
||||
*/
|
||||
while (INULL(*s))
|
||||
s++;
|
||||
}
|
||||
/*
|
||||
* We don't yet know whether a `:' introduces a history-style
|
||||
* colon modifier or qualifies something like ${...:=...}.
|
||||
* But if we remember the colon here it's easy to check later.
|
||||
*/
|
||||
if ((colf = *s == ':'))
|
||||
s++;
|
||||
|
||||
|
@ -1391,13 +1808,18 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
|
|||
|
||||
if (inbrace && ((c = *s) == '-' ||
|
||||
c == '+' ||
|
||||
c == ':' ||
|
||||
c == ':' || /* i.e. a doubled colon */
|
||||
c == '=' || c == Equals ||
|
||||
c == '%' ||
|
||||
c == '#' || c == Pound ||
|
||||
c == '?' || c == Quest ||
|
||||
c == '/')) {
|
||||
|
||||
/*
|
||||
* Default index is 1 if no (I) or (I) gave zero. But
|
||||
* why don't we set the default explicitly at the start
|
||||
* and massage any passed index where we set flnum anyway?
|
||||
*/
|
||||
if (!flnum)
|
||||
flnum++;
|
||||
if (c == '%')
|
||||
|
@ -1455,6 +1877,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
|
|||
*ptr = '\0';
|
||||
}
|
||||
|
||||
/* See if this was ${...:-...}, ${...:=...}, etc. */
|
||||
if (colf)
|
||||
flags |= SUB_ALL;
|
||||
/*
|
||||
|
@ -1487,12 +1910,23 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
|
|||
* (except according to $@ rules); but this leaves the
|
||||
* unquoted substrings unsplit, and other code below
|
||||
* for spbreak splits even within the quoted substrings.
|
||||
*
|
||||
* TODO: I think multsub needs to be told enough to
|
||||
* decide about splitting with spbreak at this point
|
||||
* (and equally in the `=' handler below). Then
|
||||
* we can turn off spbreak to avoid the join & split
|
||||
* nastiness later.
|
||||
*
|
||||
* What we really want to do is make this look as
|
||||
* if it were the result of an assignment from
|
||||
* the same value, taking account of quoting.
|
||||
*/
|
||||
multsub(&val, (aspar ? NULL : &aval), &isarr, NULL);
|
||||
copied = 1;
|
||||
}
|
||||
break;
|
||||
case ':':
|
||||
/* this must be `::=', unconditional assignment */
|
||||
if (*s != '=' && *s != Equals)
|
||||
goto noclosebrace;
|
||||
vunset = 1;
|
||||
|
@ -1507,11 +1941,22 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
|
|||
*idend = '\0';
|
||||
val = dupstring(s);
|
||||
isarr = 0;
|
||||
/*
|
||||
* TODO: this is one of those places where I don't
|
||||
* think we want to do the joining until later on.
|
||||
* We also need to handle spbreak and spsep at this
|
||||
* point and unset them.
|
||||
*/
|
||||
if (spsep || spbreak || !arrasg)
|
||||
multsub(&val, NULL, NULL, sep);
|
||||
else
|
||||
multsub(&val, &aval, &isarr, NULL);
|
||||
if (arrasg) {
|
||||
/*
|
||||
* This is an array assignment in a context
|
||||
* where we have no syntactic way of finding
|
||||
* out what an array element is. So we just guess.
|
||||
*/
|
||||
char *arr[2], **t, **a, **p;
|
||||
if (spsep || spbreak) {
|
||||
aval = sepsplit(val, spsep, 0, 1);
|
||||
|
@ -1635,6 +2080,10 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
|
|||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* Either loop over an array doing replacements or
|
||||
* do the replacment on a string.
|
||||
*/
|
||||
if (!vunset && isarr) {
|
||||
getmatcharr(&aval, s, flags, flnum, replstr);
|
||||
copied = 1;
|
||||
|
@ -1647,6 +2096,11 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
|
|||
break;
|
||||
}
|
||||
} else { /* no ${...=...} or anything, but possible modifiers. */
|
||||
/*
|
||||
* Handler ${+...}. TODO: strange, why do we handle this only
|
||||
* if there isn't a trailing modifier? Why don't we do this
|
||||
* e.g. when we hanlder the ${(t)...} flag?
|
||||
*/
|
||||
if (chkset) {
|
||||
val = dupstring(vunset ? "0" : "1");
|
||||
isarr = 0;
|
||||
|
@ -1659,6 +2113,10 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
|
|||
val = dupstring("");
|
||||
}
|
||||
if (colf) {
|
||||
/*
|
||||
* History style colon modifiers. May need to apply
|
||||
* on multiple elements of an array.
|
||||
*/
|
||||
s--;
|
||||
if (unset(KSHARRAYS) || inbrace) {
|
||||
if (!isarr)
|
||||
|
@ -1695,6 +2153,13 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
|
|||
}
|
||||
if (errflag)
|
||||
return NULL;
|
||||
/*
|
||||
* This handles taking a length with ${#foo} and variations.
|
||||
* TODO: again. one might naively have thought this had the
|
||||
* same sort of effect as the ${(t)...} flag and the ${+...}
|
||||
* test, although in this case we do need the value rather
|
||||
* the the parameter, so maybe it's a bit different.
|
||||
*/
|
||||
if (getlen) {
|
||||
long len = 0;
|
||||
char buf[14];
|
||||
|
@ -1725,6 +2190,23 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
|
|||
val = dupstring(buf);
|
||||
isarr = 0;
|
||||
}
|
||||
/*
|
||||
* I think this mult_isarr stuff here is used to pass back
|
||||
* the setting of whether we are an array to multsub, and
|
||||
* thence to the top-level paramsubst(). The way the
|
||||
* setting is passed back is completely obscure, however.
|
||||
* It's presumably at this point because we try to remember
|
||||
* whether the value was `really' an array before massaging
|
||||
* some special cases.
|
||||
*
|
||||
* TODO: YUK. This is not the right place to turn arrays into
|
||||
* scalars; we should pass back as an array, and let the calling
|
||||
* code decide how to deal with it. This is almost certainly
|
||||
* a lot harder than it sounds. Do we really need to handle
|
||||
* one-element arrays as scalars at this point? Couldn't
|
||||
* we just test for it later rather than having a multiple-valued
|
||||
* wave-function for isarr?
|
||||
*/
|
||||
mult_isarr = isarr;
|
||||
if (isarr > 0 && !plan9 && (!aval || !aval[0])) {
|
||||
val = dupstring("");
|
||||
|
@ -1739,6 +2221,12 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
|
|||
}
|
||||
/* ssub is true when we are called from singsub (via prefork).
|
||||
* It means that we must join arrays and should not split words. */
|
||||
/*
|
||||
* TODO: this is what is screwing up the use of SH_WORD_SPLIT
|
||||
* after `:-' etc. If we fix multsub(), we might get away
|
||||
* with simply unsetting the appropriate flags when they
|
||||
* get handled.
|
||||
*/
|
||||
if (ssub || spbreak || spsep || sep) {
|
||||
if (isarr)
|
||||
val = sepjoin(aval, sep, 1), isarr = 0;
|
||||
|
@ -1753,6 +2241,9 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
|
|||
}
|
||||
mult_isarr = isarr;
|
||||
}
|
||||
/*
|
||||
* Perform case modififications.
|
||||
*/
|
||||
if (casmod) {
|
||||
if (isarr) {
|
||||
char **ap;
|
||||
|
@ -1782,6 +2273,9 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
|
|||
makecapitals(&val);
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Perform prompt-style modifications.
|
||||
*/
|
||||
if (presc) {
|
||||
int ops = opts[PROMPTSUBST], opb = opts[PROMPTBANG];
|
||||
int opp = opts[PROMPTPERCENT], len;
|
||||
|
@ -1790,6 +2284,14 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
|
|||
opts[PROMPTPERCENT] = 1;
|
||||
opts[PROMPTSUBST] = opts[PROMPTBANG] = 0;
|
||||
}
|
||||
/*
|
||||
* TODO: It would be really quite nice to abstract the
|
||||
* isarr and !issarr code into a function which gets
|
||||
* passed a pointer to a function with the effect of
|
||||
* the promptexpand bit. Then we could use this for
|
||||
* a lot of stuff and bury val/aval/isarr inside a structure
|
||||
* which gets passed to it.
|
||||
*/
|
||||
if (isarr) {
|
||||
char **ap;
|
||||
|
||||
|
@ -1820,6 +2322,10 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
|
|||
opts[PROMPTBANG] = opb;
|
||||
opts[PROMPTPERCENT] = opp;
|
||||
}
|
||||
/*
|
||||
* One of the possible set of quotes to apply, depending on
|
||||
* the repetitions of the (q) flag.
|
||||
*/
|
||||
if (quotemod) {
|
||||
if (--quotetype > 3)
|
||||
quotetype = 3;
|
||||
|
@ -1903,6 +2409,10 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
|
|||
}
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Transform special characters in the string to make them
|
||||
* printable.
|
||||
*/
|
||||
if (visiblemod) {
|
||||
if (isarr) {
|
||||
char **ap;
|
||||
|
@ -1916,6 +2426,11 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
|
|||
val = nicedupstring(val);
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Nothing particularly to do with SH_WORD_SPLIT --- this
|
||||
* performs lexical splitting on a string as specified by
|
||||
* the (z) flag.
|
||||
*/
|
||||
if (shsplit) {
|
||||
LinkList list = NULL;
|
||||
|
||||
|
@ -1944,6 +2459,21 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
|
|||
}
|
||||
copied = 1;
|
||||
}
|
||||
/*
|
||||
* TODO: hmm. At this point we have to be on our toes about
|
||||
* whether we're putting stuff into a line or not, i.e.
|
||||
* we don't want to do this from a recursive call; this is
|
||||
* probably part of the point of the mult_isarr monkey business.
|
||||
* Rather than passing back flags in a non-trivial way, maybe
|
||||
* we could decide on the basis of flags passed down to us.
|
||||
*
|
||||
* This is the ideal place to do any last-minute conversion from
|
||||
* array to strings. However, given all the transformations we've
|
||||
* already done, probably if it's going to be done it will already
|
||||
* have been. (I'd really like to keep everying in aval or
|
||||
* equivalent and only locally decide if we need to treat it
|
||||
* as a scalar.)
|
||||
*/
|
||||
if (isarr) {
|
||||
char *x;
|
||||
char *y;
|
||||
|
@ -1951,6 +2481,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
|
|||
int i;
|
||||
LinkNode on = n;
|
||||
|
||||
/* Handle the (u) flag; we need this before the next test */
|
||||
if (unique) {
|
||||
if(!copied)
|
||||
aval = arrdup(aval);
|
||||
|
@ -1960,6 +2491,16 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
|
|||
zhuniqarray(aval);
|
||||
}
|
||||
if ((!aval[0] || !aval[1]) && !plan9) {
|
||||
/*
|
||||
* Empty array or single element. Currently you only
|
||||
* get a single element array at this point from the
|
||||
* unique expansion above. but we can potentially
|
||||
* have other reasons.
|
||||
*
|
||||
* The following test removes the markers
|
||||
* from surrounding double quotes, but I don't know why
|
||||
* that's necessary.
|
||||
*/
|
||||
int vallen;
|
||||
if (aptr > (char *) getdata(n) &&
|
||||
aptr[-1] == Dnull && *fstr == Dnull)
|
||||
|
@ -1977,6 +2518,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
|
|||
setdata(n, y);
|
||||
return n;
|
||||
}
|
||||
/* Handle (o) and (O) and their variants */
|
||||
if (sortit) {
|
||||
if (!copied)
|
||||
aval = arrdup(aval);
|
||||
|
@ -2004,6 +2546,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
|
|||
}
|
||||
}
|
||||
if (plan9) {
|
||||
/* Handle RC_EXPAND_PARAM */
|
||||
LinkNode tn;
|
||||
local_list1(tl);
|
||||
|
||||
|
@ -2049,6 +2592,14 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
|
|||
return n;
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* Not RC_EXPAND_PARAM: simply join the first and
|
||||
* last values.
|
||||
* TODO: how about removing the restriction that
|
||||
* aval[1] is non-NULL to promote consistency?, or
|
||||
* simply changing the test so that we drop into
|
||||
* the scalar branch, instead of tricking isarr?
|
||||
*/
|
||||
x = aval[0];
|
||||
if (prenum || postnum)
|
||||
x = dopadding(x, prenum, postnum, preone, postone,
|
||||
|
@ -2095,6 +2646,11 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
|
|||
if (eval)
|
||||
n = on;
|
||||
} else {
|
||||
/*
|
||||
* Scalar value. Handle last minute transformations
|
||||
* such as left- or right-padding and the (e) flag to
|
||||
* revaluate the result.
|
||||
*/
|
||||
int xlen;
|
||||
char *x;
|
||||
char *y;
|
||||
|
|
Loading…
Reference in New Issue
Block a user