Patchwork [1/2] layout: Add -i <region>[:<file>] support.

login
register
about
Submitter Carl-Daniel Hailfinger
Date 2013-09-22 00:48:18
Message ID <523E3E52.40901@gmx.net>
Download mbox | patch
Permalink /patch/4054/
State Superseded
Headers show

Comments

Carl-Daniel Hailfinger - 2013-09-22 00:48:18
Am 21.09.2013 18:17 schrieb Stefan Tauner:
> On Fri, 20 Sep 2013 03:48:15 +0200
> Carl-Daniel Hailfinger <c-d.hailfinger.devel.2006@gmx.net> wrote:
>
>> Am 19.09.2013 01:29 schrieb Stefan Tauner:
>>> Add an optional sub-parameter to the -i parameter to allow building the
>>> image to be written from multiple files. This will also allow regions to
>>> be read from flash and written to separate image files in a later patch.
>>> Existing function read_buf_from_file() is refined and reused to read the file.
>>>
>>> Document the whole layout handling including the new features a bit better
>>> and refine wording regarding files, images, layouts and regions as discussed.
>>>
>>> Also, finally introduce helpers.c: a module containing utility functions to be
>>> shared which do not fit elsewhere. The first function to be added is
>>> unquote_string() which is used throughout this patch.
>>>
>>> Abort for non-write operations if a layout file is given.
>>>
>>> based on chromiumos'
>>> d0ea9ed71e7f86bb8e8db2ca7c32a96de25343d8
>>> Signed-off-by: David Hendricks <dhendrix@chromium.org>
>>> Signed-off-by: Stefan Tauner <stefan.tauner@student.tuwien.ac.at>
>> Counter-proposal for helpers.c, addressing all my complaints:
>> - The first token must either be completely enclosed in double quotes or
>> not quoted at all.
>> - There must be end-of-string or delimiter after the closing quote.
>> - The first token shall not have stray quotes anywhere.
>>
>> Function description is yours.
>>
>> Tested with the following inputs (input/output in single quotes):
>>
>> # ./unqote ''
>> Input ''
>> ret=1
>> First token '', remaining text ''
>> # ./unqote '""'
>> Input '""'
>> ret=1
>> First token '', remaining text ''
>> # ./unqote 'foo'
>> Input 'foo'
>> ret=0
>> First token 'foo', remaining text ''
>> # ./unqote ' foo '
>> Input ' foo '
>> ret=0
>> First token 'foo', remaining text ''
>> # ./unqote '"foo"'
>> Input '"foo"'
>> ret=0
>> First token 'foo', remaining text ''
>> # ./unqote ' "foo" '
>> Input ' "foo" '
>> ret=0
>> First token 'foo', remaining text ' '
>> # ./unqote ' " foo " '
>> Input ' " foo " '
>> ret=0
>> First token ' foo ', remaining text ' '
>> # ./unqote 'foo bar'
>> Input 'foo bar'
>> ret=0
>> First token 'foo', remaining text 'bar'
>> # ./unqote ' foo  bar '
>> Input ' foo  bar '
>> ret=0
>> First token 'foo', remaining text ' bar '
>> # ./unqote ' "foo"  bar '
>> Input ' "foo"  bar '
>> ret=0
>> First token 'foo', remaining text '  bar '
>> # ./unqote ' "foo  bar '
>> Input ' "foo  bar '
>> ret=-1
>> Aborting.
>> # ./unqote ' "foo""  bar '
>> Input ' "foo""  bar '
>> ret=-1
>> Aborting.
>> # ./unqote ' "foo"""'
>> Input ' "foo"""'
>> ret=-1
>> Aborting.
>> # ./unqote ' "foo"bar'
>> Input ' "foo"bar'
>> ret=-1
>> Aborting.
>>
>> Signed-off-by: Carl-Daniel Hailfinger <c-d.hailfinger.devel.2006@gmx.net>
>>
>> --- /dev/null	2013-08-19 11:01:03.588011829 +0200
>> +++ helpers.c	2013-09-20 03:31:28.000000000 +0200
>> @@ -0,0 +1,86 @@
>> +/*
>> + * This file is part of the flashrom project.
>> + *
>> + * Copyright (C) 2013 Carl-Daniel Hailfinger
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License as published by
>> + * the Free Software Foundation; either version 2 of the License, or
>> + * (at your option) any later version.
>> + *
>> + * This program is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>> + * GNU General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU General Public License
>> + * along with this program; if not, write to the Free Software
>> + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
>> + */
>> +
>> +#include <string.h>
>> +
>> +#include <string.h>
>> +#include "flash.h"
>> +
>> +/** Parse a \em possibly quoted string.
>> + *
>> + * The function expects \a startp to point to the string to be parsed without a prefix. If the
>> + * string starts with a quotation mark it looks for the second one and removes both in-place, if not then it
>> + * looks for the first character contained in \a delimiters and ends the wanted string there by replacing the
>> + * character with '\0'. If \a delimiters is NULL an empty set is assumed, hence the whole start string equals
>> + * the wanted string.

Let me reword this so it fits the way my variant works.

The behaviour is somewhat similar to a strtok_r variant with quoting
support. A quoted token has to have exactly one doublequote each at the
beginning and the end and has to be followed by end-of-string ('\0') or
a delimiter. \a *startp points to the string which should be parsed. If
the string does not start with a quote, it is terminated at the first
character contained in \a delimiters by replacing it with '\0'. If the
string starts with a quote, it is terminated at the second (terminating)
quote by replacing it with '\0'.


>> + *
>> + * After returning \a startp will point to a string that is either the first quoted part of the
>> + * original string with the quotation marks removed, or the first word of that string before any delimiter.
>> + * If \a endp is not NULL it will be set to point to the first character after the parsed string, or to the
>> + * '\0' at the end of the string pointed to by \a startp if there are no more subsequent characters.

Your variant and my variant don't really conform to this description in
case of delimiters after the closing quote. Will fix mine.


>> + *
>> + * @param start	Points to the input string.
>> + * @param end	Is set to the first char following the input string.
>> + * @return	-1 if a quotation mark is not matched,
>> + *		0 on success,
>> + *		1 if the parsed string is empty.
>> + */
>> +int unquote_string(char **startp, char **endp, const char *delimiters)
>> +{
>> +	size_t len;
>> +
>> +	/* Strip leading delimiters. */
>> +	*startp += strspn(*startp, delimiters);
> Above explodes for delimiters == NULL. It makes it also completely
> useless when the caller needs to track the number of fields (defined by
> the delimiters).

It would be useful as a tokenizer for layout files, though. I can make
this optional or kill it, but the handling of arbitrary amounts of
whitespace between start:end and regionname needs to be done somewhere.


>> +
>> +	/* Check for empty string. */
>> +	if (strlen(*startp) == 0) {
>> +		*endp = *startp;
>> +		return 1;
>> +	}
>> +
>> +	/* Handle unqoted string. */
>> +	if (**startp != '"') {
>> +		len = strcspn(*startp, delimiters);
>> +		/* Check for quotes in the middle. */
>> +		if (strcspn(*startp, "\"") < len)
>> +			return -1;
>> +		/* Check if there is anything after this string. */
>> +		if (strlen(*startp + len) > 0)
>> +			*endp = *startp + len + 1;
> Explodes for endp == NULL

Thanks for catching that.

 
>> +		else
>> +			*endp = *startp + len;
> Likewise.
>
>> +		(*startp)[len] = '\0';
>> +		return 0;
>> +	}
>> +
>> +	/* Handle quoted string. */
>> +	(*startp)++;
>> +	len = strcspn(*startp, "\"");
>> +	/* Catch unclosed quotes and closing quotes not followed immediately by delimiter or end-of-string. */
>> +	if ((*startp)[len] == '\0' || strcspn(*startp + len + 1, delimiters) > 0)
> This will trigger on """ for example

That is intentional. " is not a delimiter, and that means we are not
allowed to break up a string there. As such, """ is as invalid as ""foo.


> (and explode on delimiters == NULL).
>
>> +		return -1;
>> +	/* Overwrite closing quote. */
>> +	(*startp)[len] = '\0';
>> +	*endp = *startp + len + 1;
>> +	/* Check for empty string. */
>> +	if (strlen(*startp) == 0)
>> +		return 1;
>> +	return 0;
>> +}
> All in all I think it does way more than a generic unquoting function
> to be used in parsers should do.

Yes, my variant is a tokenizer with quoting support.
Your variant is a tokenizer which only cares about the supplied
delimiters if there is no quoting and implicitly replaces the specified
delimiters with " if there is quoting.

New unqote as standalone program for easier testing. It now should
handle all the fun stuff you can throw at it.
Suggested example inputs (single quotes are there to protect against the
shell):
' foo '
' "foo" '
'foo '
'"foo" '
'foo'
'"foo"'
'foo bar'
'"foo bar"'
''
'""'
and some inputs which should throw an error:
'"'
'"""'
'"foo'
'foo"'
'"foo"bar'



Regards,
Carl-Daniel

Patch

--- /dev/null	2013-08-19 11:01:03.588011829 +0200
+++ unquote.c	2013-09-22 02:39:31.000000000 +0200
@@ -0,0 +1,83 @@ 
+#include <stdio.h>
+#include <string.h>
+#include <stdbool.h>
+
+int unquote_string(char **startp, char **endp, const char *delimiters, bool strip_leading_delimiters)
+{
+	size_t len;
+
+	if (delimiters == NULL)
+		delimiters = "";
+
+	if (strip_leading_delimiters) {
+		/* Strip leading delimiters. */
+		*startp += strspn(*startp, delimiters);
+	}
+
+	/* Check for empty string. */
+	if (strlen(*startp) == 0) {
+		if (endp != NULL)
+			*endp = *startp;
+		return 1;
+	}
+
+	/* Handle unqoted string. */
+	if (**startp != '"') {
+		len = strcspn(*startp, delimiters);
+		/* Check for quotes in the middle. */
+		if (strcspn(*startp, "\"") < len)
+			return -1;
+		if (endp != NULL) {
+			/* Check if there is anything after this string. */
+			if (strlen(*startp + len) > 0)
+				*endp = *startp + len + 1;
+			else
+				*endp = *startp + len;
+		}
+		(*startp)[len] = '\0';
+		return 0;
+	}
+
+	/* Handle quoted string. */
+	(*startp)++;
+	len = strcspn(*startp, "\"");
+	/* Catch unclosed quotes and closing quotes not followed immediately by delimiter or end-of-string. */
+	if ((*startp)[len] == '\0' || strcspn(*startp + len + 1, delimiters) > 0)
+		return -1;
+	/* Overwrite closing quote. */
+	(*startp)[len] = '\0';
+	if (endp != NULL) {
+		*endp = *startp + len + 1;
+		if (strspn(*endp, delimiters) > 0)
+			(*endp)++;
+	}
+	/* Check for empty string. */
+	if (strlen(*startp) == 0)
+		return 1;
+	return 0;
+}
+
+const char *delim = " \t\r\n";
+
+int main(int argc, const char* argv[])
+{
+	int ret;
+	char *oldstartp;
+	char *startp;
+	char *endp;
+
+	if (argc < 2)
+		return 1;
+	oldstartp = startp = strdup(argv[1]);
+	printf ("Input '%s'\n", startp);
+	ret = unquote_string(&startp, &endp, delim, 0);
+	printf("ret=%i\n", ret);
+	if (ret < 0) {
+		printf("Aborting.\n");
+		return 1;
+	}
+		
+	//printf("startp moved %i bytes, endp is %i bytes from oldstartp\n", startp-oldstartp, endp-oldstartp);
+	printf("First token '%s', remaining text '%s'\n", startp, endp);
+	return 0;
+}