commit f87f5765b28638fef7eef413628a7e0df9776919
parent 1a4c90346a0c34381b4ba95ceaa8919f3e9240db
Author: Morel BĂ©renger <berengermorel76@gmail.com>
Date: Sat, 30 May 2020 04:44:55 +0200
implemented cmdline option parsing and --help
Diffstat:
M | Makefile | | | 2 | +- |
M | src/lmerge.cpp | | | 76 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------- |
A | src/log.hpp | | | 41 | +++++++++++++++++++++++++++++++++++++++++ |
A | src/optparser.cpp | | | 121 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | src/optparser.hpp | | | 201 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
5 files changed, 425 insertions(+), 16 deletions(-)
diff --git a/Makefile b/Makefile
@@ -10,7 +10,7 @@ all: manpages lmerge.1 lmerge
%.o: src/%.cpp
$(CXX) $(CXXFLAGS) -c $< -o $@
-lmerge: lmerge.o
+lmerge: lmerge.o optparser.o
$(CXX) -o $@ $^
manpages: lmerge.1
diff --git a/src/lmerge.cpp b/src/lmerge.cpp
@@ -11,28 +11,15 @@
#include <ctype.h>
#include <algorithm>
+#include <iterator>
/**
- * This program reads stdin and when consecutive lines have specific fields all
- * containing the same value, prints them replacing the newline character by
- * the 1st character in FIELD_SEP.
- * Fields are delimited by the FIELD_SEP environment variable. If not defined,
- * " \\t" is used instead (see isblank(3)).
- * Fields to use are defined by the environment variable FIELDS, which only
- * use unsigned decimal integers separated by commas, other characters makes the
- * value invalid.
- * If FIELDS is not defined or invalid, exits with an error.
- * Empty field indexes ("1,,3") are ignored (will resolve in "1,3").
- * Do not work if input is not in line mode.
- * Line separator is defined by ENTRY_SEP, or "\\n" if not defined.
- *
* TODO:
* * check that ENTRY_SEP works as expected;
* * fix the fact input needs a "\\n" at end of last line for it to be merged;
* * UTF-8 support (field separators);
* * providing FIELDS variable as command-line option;
* * -v/--version option;
- * * -h/--help option;
* * allow to customize the memory allocation scheme at runtime;
* * allow to not print twice merged fields;
* * allow to set verbosity on stderr;
@@ -44,6 +31,7 @@
**/
#include "vector.hpp"
+#include "optparser.hpp"
class field_marker;
typedef vector<char> line_cache;
@@ -83,8 +71,42 @@ bool allocate_markers(
field_marker_t& field_cache
);
-int main( void )
+void print_help( char const* pgm, FILE* target, opt_desc_t const* start, opt_desc_t const* end );
+
+int main( int argc, char const *const * argv )
{
+ opt_desc_t opts[] =
+ {
+ { "help", "shows this message", 'h', 0, nullptr, nullptr, nullptr },
+ };
+ auto b_opts = std::begin( opts );
+ auto e_opts = std::end( opts );
+
+ char const *arg = argv[1];
+ for( int iarg = 1; iarg != argc; ++iarg, ++argv )
+ {
+ auto error = parse_cmd_opt( arg, b_opts, e_opts );
+ switch( error )
+ {
+ case MAX_COUNT:
+ fprintf( stderr, "Warning: in arg \"%s\" => %s(error code: %04x)\n",
+ arg, parse_error_msgs[error], error );
+ [[fallthrough]];
+ case NONE:
+ case IGNORED:
+ break;
+ case SET_NO_VAL:
+ case SET_VAL_IGN:
+ case SET_FAIL:
+ case BAD_ARGS:
+ case BAD_SETTER:
+ print_help( argv[0], stderr, b_opts, e_opts );
+ fprintf( stderr, "Error: in arg \"%s\" => %s(error code: 0x%04x)\n",
+ arg, parse_error_msgs[error], error );
+ return EXIT_FAILURE;
+ }
+ }
+
char const * const DEFAULT_FIELD_SEP = " \t";
char const * const DEFAULT_ENTRY_SEP = "\n";
@@ -294,3 +316,27 @@ bool allocate_markers(
field_cache.shrink_to_fit();
return false;
}
+
+void print_help( char const* pgm, FILE* target, opt_desc_t const* start, opt_desc_t const* end )
+{
+ fputs( "Usage: ", target );
+ fputs( pgm, target );
+ fputs( " [OPTIONS]\n"
+ "Description:\n"
+ "\tThis program reads stdin and when consecutive lines have specific fields all\n"
+ "\tcontaining the same value, prints them replacing the newline character by\n"
+ "\tthe 1st character in FIELD_SEP.\n"
+ "\tFields are delimited by the FIELD_SEP environment variable. If not defined,\n"
+ "\t\" \\t\" is used instead (see isblank(3)).\n"
+ "\tFields to use are defined by the environment variable FIELDS, which only\n"
+ "\tuse unsigned decimal integers separated by commas, other characters makes the\n"
+ "\tvalue invalid.\n"
+ "\tIf FIELDS is not defined or invalid, exits with an error.\n"
+ "\tEmpty field indexes (\"1,,3\") are ignored (will resolve in \"1,3\").\n"
+ "\tDo not work if input is not in line mode.\n"
+ "\tLine separator is defined by ENTRY_SEP, or \"\\n\" if not defined.\n"
+ "Options:\n"
+ , target );
+ print_opts( target, start, end );
+}
+
diff --git a/src/log.hpp b/src/log.hpp
@@ -0,0 +1,41 @@
+#ifndef LOG_HPP
+#define LOG_HPP
+
+#define xstr(s) str(s)
+#define str(s) #s
+#define HEADER_LOG __FILE__ "[" xstr(__LINE__) "]"
+
+#ifndef NDEBUG
+#define BUG_CHECK( cond, err ) \
+ do \
+ { \
+ if( cond ) \
+ { \
+ fputs( HEADER_LOG ": " #err " caused by " #cond, stderr ); \
+ abort(); \
+ } \
+ } while( 0 )
+#else
+#define BUG_CHECK( cond, err ) if( cond ) return err
+#endif
+
+#define syserr( str ) \
+ do { \
+ fputs( HEADER_LOG ": ", stderr ); \
+ fputs( strerror( errno ), stderr ); \
+ fputs( ": " str "\n", stderr ); \
+ } while( 0 )
+
+//ok, this should not be here, since not related to any logging stuff
+//otoh, I think this file should be renamed utils, or alike...
+//it's only defines anyway...
+#define ALLOC( type ) static_cast<type*>( malloc( sizeof( type ) ) )
+#define REDEF( type, dst, src ) type& dst = *static_cast<type*>( src )
+
+template <typename T>
+inline bool empty_array( T* arr )
+{
+ return arr && *arr;
+}
+
+#endif
diff --git a/src/optparser.cpp b/src/optparser.cpp
@@ -0,0 +1,121 @@
+#include <string.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <errno.h>
+#include <limits.h>
+
+#include "optparser.hpp"
+#include "log.hpp"
+
+// those strings must be defined in same order than the enum
+// Trick: use vim's block insertion to check them
+char const *parse_error_msgs[] =
+{
+ /*NONE */"no error",
+ /*IGNORED */"not an option",
+ /*MAX_COUNT */"count overflow",
+ /*SET_NO_VAL */"this option needs a value",
+ /*SET_VAL_IGN*/"this option does not needs a value",
+ /*SET_FAIL */"failed to parse the value",
+ /*BAD_ARGS */"application bug: bad arguments to call",
+ /*BAD_SETTER */"application bug: bad values in option",
+};
+
+parse_error_t parse_cmd_opt( char const* arg, opt_desc_t* start, opt_desc_t const* end )
+{
+ BUG_CHECK( !( arg && start && end && end > start ), BAD_ARGS );
+
+ if( arg[0] != '-' || arg[1] == 0 )
+ {
+ return IGNORED;
+ }
+
+ opt_desc_t* opt = start;
+ char const* argv = arg + 2;
+
+ //search for option (stores in opt)
+ if( arg[1] == '-' ) //long
+ {
+ char const* name = argv;
+ size_t name_sz;
+ argv = strchr( name, '=' );
+ if( argv )
+ {
+ assert( argv > name );
+ name_sz = static_cast<size_t>( argv - name );
+ ++argv;
+ }
+ else
+ {
+ name_sz = strlen( name );
+ }
+
+ for( ; true
+ && opt != end
+ && 0 != strncmp( name, opt->option, name_sz )
+ && name_sz != strlen( opt->option ); ++opt )
+ {
+ }
+ }
+ else //short
+ {
+ uint32_t name = static_cast<uint8_t>( arg[1] );
+ if( name & 0x80 )
+ {
+ assert( !( name & 0x80 ) && "TODO: utf-8 support" );
+ abort();
+ }
+ for( ; opt != end && opt->short_option != name; ++opt )
+ {
+ }
+ }
+
+ //BUG_CHECK( ( opt->set && !opt->value ) || ( !opt->set && opt->value ), BAD_SETTER );
+
+ if( opt == end )
+ {
+ return IGNORED;
+ }
+
+ if( opt->value && !empty_array( argv ) )
+ {
+ return SET_NO_VAL;
+ }
+
+ if( !opt->value && empty_array( argv ) )
+ {
+ return SET_VAL_IGN;
+ }
+
+ if( opt->set && opt->set( opt->value, argv ) )
+ {
+ return SET_FAIL;
+ }
+
+ if( opt->count < UINT32_MAX )
+ {
+ ++opt->count;
+ }
+ return opt->count == UINT32_MAX ? MAX_COUNT : NONE;
+}
+
+void print_opts( FILE* target, opt_desc_t const* start, opt_desc_t const* end )
+{
+ char short_name[] = ",-????";
+ for( ; start != end; ++start )
+ {
+ memcpy( short_name + 2, &start->short_option, sizeof( start->short_option ) );
+ fprintf( target, "\t* --%s%s: %s",
+ start->option,
+ start->short_option ? short_name : "",
+ start->description
+ );
+ if( start->show && start->show( start->value, target ) )
+ {
+ abort();
+ }
+ fputc( '\n', target );
+ }
+}
diff --git a/src/optparser.hpp b/src/optparser.hpp
@@ -0,0 +1,201 @@
+#ifndef OPTPARSER_HPP
+#define OPTPARSER_HPP
+
+// this could probably be optimized, but considering it should not be used that
+// often, I think it's better to just keep it as easy to use as possible.
+// Some doc:
+// * option: long name of the option, what people should use for readable shell
+// code, but won't in direct calls, because it's faster to use a
+// possible shorter version.
+// On cmd line, prefixed with "--" and folowed by a "=" if option is
+// not a flag. For example, the "foo" option's value would be set on
+// cmd line with "--foo=value".
+// Must be a null-byte terminated UTF-8 string.
+// Character '=' is reserved and must not be used.
+// * short_opt: character used as the short name. Short names are prefixed with
+// "-" and directly followed by the value to affect to the option,
+// for example: "-fvalue".
+// * description: how the program should show describe the option to people that
+// don't read the docs (hint: most of us). Default values should
+// not be repeated in that text.
+// Must be a null-byte terminated UTF-8 string.
+// * value: a pointer to the variable to set, if applicable. If nullptr,
+// occurrences will be counted.
+// * set: how to set value given an UTF-8 null-terminated string.
+// * show: how to render the current value. Should be used by an "help" option.
+// * count: how many times the option was found. Can be used for various things,
+// like exiting if --version, -v, --help or -h were found, or if a
+// mandatory option is missing.
+// If the option appears too often, the count will stop incrementing.
+// TODO: implement a way to know which arguments were ignored (no match found)
+// TODO: implement the "--" option to mean end of options (and return it's index?)
+struct opt_desc_t
+{
+ char const* option;
+ char const* description;
+ uint32_t short_option;
+ uint32_t count;
+ void* value;
+ bool (*set) ( void* val, char const * const arg );
+ bool (*show)( void const* val, FILE* target );
+};
+
+//see parse_error_msgs for descriptions
+enum parse_error_t
+{
+ NONE,
+ IGNORED,
+ MAX_COUNT,
+ SET_NO_VAL,
+ SET_VAL_IGN,
+ SET_FAIL,
+ BAD_ARGS,
+ BAD_SETTER,
+};
+
+extern char const *parse_error_msgs[];
+
+// checks if a string is a command-line argument and processes it if yes
+// return true if the argument was ignored, false if it was successfully
+// processed.
+// Range of checked options is: [start,end)
+parse_error_t parse_cmd_opt( char const* arg, opt_desc_t* start, opt_desc_t const* end );
+
+// prints a message describing options and their current value if applicable.
+void print_opts( FILE* target, opt_desc_t const* start, opt_desc_t const* end );
+
+#include <type_traits>
+#include <limits>
+
+// C strings in statically allocated buffer
+template <typename T, size_t SZ>
+bool set(
+ typename std::enable_if <std::is_same<char*,T>::value, void*>::type val,
+ char const* arg )
+{
+ if( !val )
+ {
+ return true;
+ }
+
+ strncpy( static_cast<char*>( val ), arg, SZ - 1);
+ static_cast<char*>( val )[SZ-1] = 0;
+ return false;
+}
+
+template <typename T>
+bool show(
+ typename std::enable_if <std::is_same<char*,T>::value, void const*>::type val,
+ FILE* target )
+{
+ if( !val )
+ {
+ return true;
+ }
+
+ fprintf( target, "(current value: \"%s\")", static_cast<char const*>( val ) );
+ return false;
+}
+
+// signed integrals
+template <typename T>
+bool set(
+ typename std::enable_if
+ <
+ std::is_integral<T>::value && std::is_signed<T>::value,
+ void*
+ >::type val, char const* arg )
+{
+ if( !val )
+ {
+ return true;
+ }
+
+ char* end;
+ auto v = strtoll( arg, &end, 0 );
+ if( false
+ || errno
+ || 0 != *end
+ || std::numeric_limits<T>::min() > v
+ || std::numeric_limits<T>::max() < v )
+ {
+ return true;
+ }
+ *static_cast<T*>( val ) = static_cast<T>( v );
+ return false;
+}
+
+template <typename T>
+bool show(
+ typename std::enable_if
+ <
+ std::is_integral<T>::value && std::is_signed<T>::value,
+ void const*
+ >::type val, FILE* target )
+{
+ if( !val )
+ {
+ return true;
+ }
+
+ fprintf( target, "(current value: %d)", *static_cast<T const*>( val ) );
+ return false;
+}
+
+// unsigned integrals
+template <typename T>
+bool set(
+ typename std::enable_if
+ <
+ std::is_integral<T>::value && std::is_unsigned<T>::value,
+ void*
+ >::type val, char const* arg )
+{
+ if( !val )
+ {
+ return true;
+ }
+
+ char* end;
+ auto v = strtoull( arg, &end, 0 );
+ if( false
+ || errno
+ || 0 != *end
+ || std::numeric_limits<T>::min() > v
+ || std::numeric_limits<T>::max() < v )
+ {
+ return true;
+ }
+ *static_cast<T*>( val ) = static_cast<T>( v );
+ return false;
+}
+
+// floating numbers
+template <typename T>
+bool set(
+ typename std::enable_if
+ <
+ std::is_floating_point<T>::value,
+ void*
+ >::type val, char const* arg )
+{
+ if( !val )
+ {
+ return true;
+ }
+
+ char* end;
+ auto v = strtold( arg, &end );
+ if( false
+ || errno
+ || 0 != *end
+ || std::numeric_limits<T>::min() > v
+ || std::numeric_limits<T>::max() < v )
+ {
+ return true;
+ }
+ *static_cast<T*>( val ) = static_cast<T>( v );
+ return false;
+}
+
+#endif