[uri] Decode/encode URIs when parsing/unparsing

Currently, handling of URI escapes is ad-hoc; escaped strings are
stored as-is in the URI structure, and it is up to the individual
protocol to unescape as necessary. This is error-prone and expensive
in terms of code size. Modify this behavior by unescaping in
parse_uri() and escaping in unparse_uri() those fields that typically
handle URI escapes (hostname, user, password, path, query, fragment),
and allowing unparse_uri() to accept a subset of fields to print so
it can be easily used to generate e.g. the escaped HTTP path?query
request.

Signed-off-by: Joshua Oreman <oremanj@rwcr.net>
Signed-off-by: Marty Connor <mdc@etherboot.org>
This commit is contained in:
Joshua Oreman
2009-12-29 22:36:04 -05:00
committed by Marty Connor
parent ef9d1a32c6
commit 3d9dd93a14
7 changed files with 153 additions and 91 deletions

View File

@@ -20,6 +20,10 @@ FILE_LICENCE ( GPL2_OR_LATER );
*
* Note that all fields within a URI are optional and may be NULL.
*
* The pointers to the various fields are packed together so they can
* be accessed in array fashion in some places in uri.c where doing so
* saves significant code size.
*
* Some examples are probably helpful:
*
* http://www.etherboot.org/wiki :
@@ -61,8 +65,40 @@ struct uri {
const char *query;
/** Fragment */
const char *fragment;
} __attribute__ (( packed ));
/** A field in a URI
*
* The order of the indices in this enumeration must match the order
* of the fields in the URI structure.
*/
enum {
URI_SCHEME = 0, URI_SCHEME_BIT = ( 1 << URI_SCHEME ),
URI_OPAQUE = 1, URI_OPAQUE_BIT = ( 1 << URI_OPAQUE ),
URI_USER = 2, URI_USER_BIT = ( 1 << URI_USER ),
URI_PASSWORD = 3, URI_PASSWORD_BIT = ( 1 << URI_PASSWORD ),
URI_HOST = 4, URI_HOST_BIT = ( 1 << URI_HOST ),
URI_PORT = 5, URI_PORT_BIT = ( 1 << URI_PORT ),
URI_PATH = 6, URI_PATH_BIT = ( 1 << URI_PATH ),
URI_QUERY = 7, URI_QUERY_BIT = ( 1 << URI_QUERY ),
URI_FRAGMENT = 8, URI_FRAGMENT_BIT = ( 1 << URI_FRAGMENT ),
URI_FIRST_FIELD = URI_SCHEME,
URI_LAST_FIELD = URI_FRAGMENT,
};
/** Extract field from URI */
#define uri_get_field( uri, field ) (&uri->scheme)[field]
/** All URI fields */
#define URI_ALL ( URI_SCHEME_BIT | URI_OPAQUE_BIT | URI_USER_BIT | \
URI_PASSWORD_BIT | URI_HOST_BIT | URI_PORT_BIT | \
URI_PATH_BIT | URI_QUERY_BIT | URI_FRAGMENT_BIT )
/** URI fields that should be decoded on storage */
#define URI_ENCODED ( URI_USER_BIT | URI_PASSWORD_BIT | URI_HOST_BIT | \
URI_PATH_BIT | URI_QUERY_BIT | URI_FRAGMENT_BIT )
/**
* URI is an absolute URI
*
@@ -131,14 +167,16 @@ extern struct uri *cwuri;
extern struct uri * parse_uri ( const char *uri_string );
extern unsigned int uri_port ( struct uri *uri, unsigned int default_port );
extern int unparse_uri ( char *buf, size_t size, struct uri *uri );
extern int unparse_uri ( char *buf, size_t size, struct uri *uri,
unsigned int fields );
extern struct uri * uri_dup ( struct uri *uri );
extern char * resolve_path ( const char *base_path,
const char *relative_path );
extern struct uri * resolve_uri ( struct uri *base_uri,
struct uri *relative_uri );
extern void churi ( struct uri *uri );
extern size_t uri_encode ( const char *raw_string, char *buf, size_t len );
extern size_t uri_decode ( const char *encoded_string, char *buf, size_t len );
extern size_t uri_encode ( const char *raw_string, char *buf, ssize_t len,
int field );
extern size_t uri_decode ( const char *encoded_string, char *buf, ssize_t len );
#endif /* _GPXE_URI_H */