tools

various tools
git clone git://deadbeef.fr/tools.git
Log | Files | Refs | README | LICENSE

commit f87f5765b28638fef7eef413628a7e0df9776919
parent 1a4c90346a0c34381b4ba95ceaa8919f3e9240db
Author: Morel BĂ©renger <berengermorel76@gmail.com>
Date:   Sat, 30 May 2020 04:44:55 +0200

implemented cmdline option parsing and --help

Diffstat:
MMakefile | 2+-
Msrc/lmerge.cpp | 76+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------
Asrc/log.hpp | 41+++++++++++++++++++++++++++++++++++++++++
Asrc/optparser.cpp | 121+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/optparser.hpp | 201+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
5 files changed, 425 insertions(+), 16 deletions(-)

diff --git a/Makefile b/Makefile @@ -10,7 +10,7 @@ all: manpages lmerge.1 lmerge %.o: src/%.cpp $(CXX) $(CXXFLAGS) -c $< -o $@ -lmerge: lmerge.o +lmerge: lmerge.o optparser.o $(CXX) -o $@ $^ manpages: lmerge.1 diff --git a/src/lmerge.cpp b/src/lmerge.cpp @@ -11,28 +11,15 @@ #include <ctype.h> #include <algorithm> +#include <iterator> /** - * This program reads stdin and when consecutive lines have specific fields all - * containing the same value, prints them replacing the newline character by - * the 1st character in FIELD_SEP. - * Fields are delimited by the FIELD_SEP environment variable. If not defined, - * " \\t" is used instead (see isblank(3)). - * Fields to use are defined by the environment variable FIELDS, which only - * use unsigned decimal integers separated by commas, other characters makes the - * value invalid. - * If FIELDS is not defined or invalid, exits with an error. - * Empty field indexes ("1,,3") are ignored (will resolve in "1,3"). - * Do not work if input is not in line mode. - * Line separator is defined by ENTRY_SEP, or "\\n" if not defined. - * * TODO: * * check that ENTRY_SEP works as expected; * * fix the fact input needs a "\\n" at end of last line for it to be merged; * * UTF-8 support (field separators); * * providing FIELDS variable as command-line option; * * -v/--version option; - * * -h/--help option; * * allow to customize the memory allocation scheme at runtime; * * allow to not print twice merged fields; * * allow to set verbosity on stderr; @@ -44,6 +31,7 @@ **/ #include "vector.hpp" +#include "optparser.hpp" class field_marker; typedef vector<char> line_cache; @@ -83,8 +71,42 @@ bool allocate_markers( field_marker_t& field_cache ); -int main( void ) +void print_help( char const* pgm, FILE* target, opt_desc_t const* start, opt_desc_t const* end ); + +int main( int argc, char const *const * argv ) { + opt_desc_t opts[] = + { + { "help", "shows this message", 'h', 0, nullptr, nullptr, nullptr }, + }; + auto b_opts = std::begin( opts ); + auto e_opts = std::end( opts ); + + char const *arg = argv[1]; + for( int iarg = 1; iarg != argc; ++iarg, ++argv ) + { + auto error = parse_cmd_opt( arg, b_opts, e_opts ); + switch( error ) + { + case MAX_COUNT: + fprintf( stderr, "Warning: in arg \"%s\" => %s(error code: %04x)\n", + arg, parse_error_msgs[error], error ); + [[fallthrough]]; + case NONE: + case IGNORED: + break; + case SET_NO_VAL: + case SET_VAL_IGN: + case SET_FAIL: + case BAD_ARGS: + case BAD_SETTER: + print_help( argv[0], stderr, b_opts, e_opts ); + fprintf( stderr, "Error: in arg \"%s\" => %s(error code: 0x%04x)\n", + arg, parse_error_msgs[error], error ); + return EXIT_FAILURE; + } + } + char const * const DEFAULT_FIELD_SEP = " \t"; char const * const DEFAULT_ENTRY_SEP = "\n"; @@ -294,3 +316,27 @@ bool allocate_markers( field_cache.shrink_to_fit(); return false; } + +void print_help( char const* pgm, FILE* target, opt_desc_t const* start, opt_desc_t const* end ) +{ + fputs( "Usage: ", target ); + fputs( pgm, target ); + fputs( " [OPTIONS]\n" + "Description:\n" + "\tThis program reads stdin and when consecutive lines have specific fields all\n" + "\tcontaining the same value, prints them replacing the newline character by\n" + "\tthe 1st character in FIELD_SEP.\n" + "\tFields are delimited by the FIELD_SEP environment variable. If not defined,\n" + "\t\" \\t\" is used instead (see isblank(3)).\n" + "\tFields to use are defined by the environment variable FIELDS, which only\n" + "\tuse unsigned decimal integers separated by commas, other characters makes the\n" + "\tvalue invalid.\n" + "\tIf FIELDS is not defined or invalid, exits with an error.\n" + "\tEmpty field indexes (\"1,,3\") are ignored (will resolve in \"1,3\").\n" + "\tDo not work if input is not in line mode.\n" + "\tLine separator is defined by ENTRY_SEP, or \"\\n\" if not defined.\n" + "Options:\n" + , target ); + print_opts( target, start, end ); +} + diff --git a/src/log.hpp b/src/log.hpp @@ -0,0 +1,41 @@ +#ifndef LOG_HPP +#define LOG_HPP + +#define xstr(s) str(s) +#define str(s) #s +#define HEADER_LOG __FILE__ "[" xstr(__LINE__) "]" + +#ifndef NDEBUG +#define BUG_CHECK( cond, err ) \ + do \ + { \ + if( cond ) \ + { \ + fputs( HEADER_LOG ": " #err " caused by " #cond, stderr ); \ + abort(); \ + } \ + } while( 0 ) +#else +#define BUG_CHECK( cond, err ) if( cond ) return err +#endif + +#define syserr( str ) \ + do { \ + fputs( HEADER_LOG ": ", stderr ); \ + fputs( strerror( errno ), stderr ); \ + fputs( ": " str "\n", stderr ); \ + } while( 0 ) + +//ok, this should not be here, since not related to any logging stuff +//otoh, I think this file should be renamed utils, or alike... +//it's only defines anyway... +#define ALLOC( type ) static_cast<type*>( malloc( sizeof( type ) ) ) +#define REDEF( type, dst, src ) type& dst = *static_cast<type*>( src ) + +template <typename T> +inline bool empty_array( T* arr ) +{ + return arr && *arr; +} + +#endif diff --git a/src/optparser.cpp b/src/optparser.cpp @@ -0,0 +1,121 @@ +#include <string.h> +#include <assert.h> +#include <stdlib.h> +#include <stdio.h> +#include <stdint.h> +#include <errno.h> +#include <limits.h> + +#include "optparser.hpp" +#include "log.hpp" + +// those strings must be defined in same order than the enum +// Trick: use vim's block insertion to check them +char const *parse_error_msgs[] = +{ + /*NONE */"no error", + /*IGNORED */"not an option", + /*MAX_COUNT */"count overflow", + /*SET_NO_VAL */"this option needs a value", + /*SET_VAL_IGN*/"this option does not needs a value", + /*SET_FAIL */"failed to parse the value", + /*BAD_ARGS */"application bug: bad arguments to call", + /*BAD_SETTER */"application bug: bad values in option", +}; + +parse_error_t parse_cmd_opt( char const* arg, opt_desc_t* start, opt_desc_t const* end ) +{ + BUG_CHECK( !( arg && start && end && end > start ), BAD_ARGS ); + + if( arg[0] != '-' || arg[1] == 0 ) + { + return IGNORED; + } + + opt_desc_t* opt = start; + char const* argv = arg + 2; + + //search for option (stores in opt) + if( arg[1] == '-' ) //long + { + char const* name = argv; + size_t name_sz; + argv = strchr( name, '=' ); + if( argv ) + { + assert( argv > name ); + name_sz = static_cast<size_t>( argv - name ); + ++argv; + } + else + { + name_sz = strlen( name ); + } + + for( ; true + && opt != end + && 0 != strncmp( name, opt->option, name_sz ) + && name_sz != strlen( opt->option ); ++opt ) + { + } + } + else //short + { + uint32_t name = static_cast<uint8_t>( arg[1] ); + if( name & 0x80 ) + { + assert( !( name & 0x80 ) && "TODO: utf-8 support" ); + abort(); + } + for( ; opt != end && opt->short_option != name; ++opt ) + { + } + } + + //BUG_CHECK( ( opt->set && !opt->value ) || ( !opt->set && opt->value ), BAD_SETTER ); + + if( opt == end ) + { + return IGNORED; + } + + if( opt->value && !empty_array( argv ) ) + { + return SET_NO_VAL; + } + + if( !opt->value && empty_array( argv ) ) + { + return SET_VAL_IGN; + } + + if( opt->set && opt->set( opt->value, argv ) ) + { + return SET_FAIL; + } + + if( opt->count < UINT32_MAX ) + { + ++opt->count; + } + return opt->count == UINT32_MAX ? MAX_COUNT : NONE; +} + +void print_opts( FILE* target, opt_desc_t const* start, opt_desc_t const* end ) +{ + char short_name[] = ",-????"; + for( ; start != end; ++start ) + { + memcpy( short_name + 2, &start->short_option, sizeof( start->short_option ) ); + fprintf( target, "\t* --%s%s: %s", + start->option, + start->short_option ? short_name : "", + start->description + ); + if( start->show && start->show( start->value, target ) ) + { + abort(); + } + fputc( '\n', target ); + } +} diff --git a/src/optparser.hpp b/src/optparser.hpp @@ -0,0 +1,201 @@ +#ifndef OPTPARSER_HPP +#define OPTPARSER_HPP + +// this could probably be optimized, but considering it should not be used that +// often, I think it's better to just keep it as easy to use as possible. +// Some doc: +// * option: long name of the option, what people should use for readable shell +// code, but won't in direct calls, because it's faster to use a +// possible shorter version. +// On cmd line, prefixed with "--" and folowed by a "=" if option is +// not a flag. For example, the "foo" option's value would be set on +// cmd line with "--foo=value". +// Must be a null-byte terminated UTF-8 string. +// Character '=' is reserved and must not be used. +// * short_opt: character used as the short name. Short names are prefixed with +// "-" and directly followed by the value to affect to the option, +// for example: "-fvalue". +// * description: how the program should show describe the option to people that +// don't read the docs (hint: most of us). Default values should +// not be repeated in that text. +// Must be a null-byte terminated UTF-8 string. +// * value: a pointer to the variable to set, if applicable. If nullptr, +// occurrences will be counted. +// * set: how to set value given an UTF-8 null-terminated string. +// * show: how to render the current value. Should be used by an "help" option. +// * count: how many times the option was found. Can be used for various things, +// like exiting if --version, -v, --help or -h were found, or if a +// mandatory option is missing. +// If the option appears too often, the count will stop incrementing. +// TODO: implement a way to know which arguments were ignored (no match found) +// TODO: implement the "--" option to mean end of options (and return it's index?) +struct opt_desc_t +{ + char const* option; + char const* description; + uint32_t short_option; + uint32_t count; + void* value; + bool (*set) ( void* val, char const * const arg ); + bool (*show)( void const* val, FILE* target ); +}; + +//see parse_error_msgs for descriptions +enum parse_error_t +{ + NONE, + IGNORED, + MAX_COUNT, + SET_NO_VAL, + SET_VAL_IGN, + SET_FAIL, + BAD_ARGS, + BAD_SETTER, +}; + +extern char const *parse_error_msgs[]; + +// checks if a string is a command-line argument and processes it if yes +// return true if the argument was ignored, false if it was successfully +// processed. +// Range of checked options is: [start,end) +parse_error_t parse_cmd_opt( char const* arg, opt_desc_t* start, opt_desc_t const* end ); + +// prints a message describing options and their current value if applicable. +void print_opts( FILE* target, opt_desc_t const* start, opt_desc_t const* end ); + +#include <type_traits> +#include <limits> + +// C strings in statically allocated buffer +template <typename T, size_t SZ> +bool set( + typename std::enable_if <std::is_same<char*,T>::value, void*>::type val, + char const* arg ) +{ + if( !val ) + { + return true; + } + + strncpy( static_cast<char*>( val ), arg, SZ - 1); + static_cast<char*>( val )[SZ-1] = 0; + return false; +} + +template <typename T> +bool show( + typename std::enable_if <std::is_same<char*,T>::value, void const*>::type val, + FILE* target ) +{ + if( !val ) + { + return true; + } + + fprintf( target, "(current value: \"%s\")", static_cast<char const*>( val ) ); + return false; +} + +// signed integrals +template <typename T> +bool set( + typename std::enable_if + < + std::is_integral<T>::value && std::is_signed<T>::value, + void* + >::type val, char const* arg ) +{ + if( !val ) + { + return true; + } + + char* end; + auto v = strtoll( arg, &end, 0 ); + if( false + || errno + || 0 != *end + || std::numeric_limits<T>::min() > v + || std::numeric_limits<T>::max() < v ) + { + return true; + } + *static_cast<T*>( val ) = static_cast<T>( v ); + return false; +} + +template <typename T> +bool show( + typename std::enable_if + < + std::is_integral<T>::value && std::is_signed<T>::value, + void const* + >::type val, FILE* target ) +{ + if( !val ) + { + return true; + } + + fprintf( target, "(current value: %d)", *static_cast<T const*>( val ) ); + return false; +} + +// unsigned integrals +template <typename T> +bool set( + typename std::enable_if + < + std::is_integral<T>::value && std::is_unsigned<T>::value, + void* + >::type val, char const* arg ) +{ + if( !val ) + { + return true; + } + + char* end; + auto v = strtoull( arg, &end, 0 ); + if( false + || errno + || 0 != *end + || std::numeric_limits<T>::min() > v + || std::numeric_limits<T>::max() < v ) + { + return true; + } + *static_cast<T*>( val ) = static_cast<T>( v ); + return false; +} + +// floating numbers +template <typename T> +bool set( + typename std::enable_if + < + std::is_floating_point<T>::value, + void* + >::type val, char const* arg ) +{ + if( !val ) + { + return true; + } + + char* end; + auto v = strtold( arg, &end ); + if( false + || errno + || 0 != *end + || std::numeric_limits<T>::min() > v + || std::numeric_limits<T>::max() < v ) + { + return true; + } + *static_cast<T*>( val ) = static_cast<T>( v ); + return false; +} + +#endif