tools

various tools
git clone git://deadbeef.fr/tools.git
Log | Files | Refs | README | LICENSE

commit 0f4d26c460c139747d165b6b26fe6764a34f55b1
parent 409b0d2605e1a2ebf26a3b8e298a3116f0087eab
Author: Morel BĂ©renger <berengermorel76@gmail.com>
Date:   Fri, 26 Jun 2020 15:10:36 +0200

move files into "subprojects"

Diffstat:
DMakefile | 30------------------------------
MREADME | 53+++--------------------------------------------------
Abtl/src/optparser.cpp | 122+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Rsrc/optparser.hpp -> btl/src/optparser.hpp | 0
Rsrc/utils.hpp -> btl/src/utils.hpp | 0
Rsrc/vector.hpp -> btl/src/vector.hpp | 0
Almerge/Makefile | 33+++++++++++++++++++++++++++++++++
CREADME -> lmerge/doc/README | 0
Rdoc/lmerge.1.md -> lmerge/doc/lmerge.1.md | 0
Almerge/src/lmerge.cpp | 347+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Dsrc/lmerge.cpp | 347-------------------------------------------------------------------------------
Dsrc/optparser.cpp | 121-------------------------------------------------------------------------------
12 files changed, 505 insertions(+), 548 deletions(-)

diff --git a/Makefile b/Makefile @@ -1,30 +0,0 @@ -CC ?= cc -CXX ?= c++ -PREFIX ?= "/usr/local" - -all: manpages lmerge.1 lmerge - -%.1: doc/%.1.md - pandoc -s --to=man $< -o $@ - -%.o: src/%.cpp - $(CXX) $(CXXFLAGS) -c $< -o $@ - -lmerge: lmerge.o optparser.o - $(CXX) -o $@ $^ - -manpages: lmerge.1 - -clean: - rm -f lmerge lmerge.1 *.o - -install: all - mkdir -p ${DESTDIR}${PREFIX}/bin/ ${DESTDIR}${PREFIX}/share/man/man1/ - install -m 755 lmerge ${DESTDIR}${PREFIX}/bin/ - install -m 644 lmerge.1 ${DESTDIR}${PREFIX}/share/man/man1/ - -uninstall: - rm ${DESTDIR}${PREFIX}/bin/lmerge - rm ${DESTDIR}${PREFIX}/share/man/man1/lmerge.1 - -.PHONY: all clean install uninstall diff --git a/README b/README @@ -1,50 +1,3 @@ -This tool merges sequential entries if they have some fields with same values. - -USAGE: - -See lmerge.1.md - -DEPENDENCIES: - -* a C++ compiler (clang and g++ have been tested); -* pandoc to build man-page; - -This is a work in progress tool, but it works for me. - -BUILD OPTIONS: - -There are several '#define' that can affect the build process: - -* WITH_STL: do not use home-made vector implementation. This will drastically - increase the size of the binary, though (~25Kio). -* NO_CMDLINE: do not build support for command-line switches except the '--help' - communator. This will reduce the size of the binary (~9Kio). - -Those options can be set using the CXXFLAGS environment variable, for example: - -CXXFLAGS="$CXXFLAGS -DNO_CMDLINE" make - -COMPACT BUILD: - -It is possible to build a statically linked binary, but the invocation will be -different depending on your OS and requires alternate tools. -Even on Debian, the invocation may differ a lot between major versions, for -example it's more hackish un current stable (Buster) than it was in old-stable -(Stretch), especially with the include paths. -On Debian buster, I do this for example (beware, every single change can break -the build): - -clang++ -o lmerge \ - lmerge.cpp /usr/lib/x86_64-linux-musl/crt1.o \ - -Os -nostdlib -static -fno-exceptions -stdlib=libc++ -nobuiltininc -nostdinc++ \ - -L /usr/lib/x86_64-linux-musl \ - -lpthread -lc \ - -I /usr/lib/llvm-7/include/c++/v1/ \ - -I /usr/include/x86_64-linux-musl/ \ - -D LIBCPP_MUSL_STATIC - -The resulting binary is bigger than the output of `make` (on this system, -stripped dynamic linking gives a 19Kio binary, stripped static gives a 38Kio one) -but does not imply to install the dynamic libraries, which are, here, around -2Mio for libstdc++ and 850Kio for libc++. -One of the points of doing so would be to use this tool in a chroot. +This repository contains various softwares and libraries used to build them. +Each of them are contained in their own sub-directory, being source code, +documentation or any related files. diff --git a/btl/src/optparser.cpp b/btl/src/optparser.cpp @@ -0,0 +1,122 @@ +#include <string.h> +#include <assert.h> +#include <stdlib.h> +#include <stdio.h> +#include <stdint.h> +#include <errno.h> +#include <limits.h> + +#include "optparser.hpp" +#include "utils.hpp" + +// those strings must be defined in same order than the enum +// Trick: use vim's block insertion to check them +char const *parse_error_msgs[] = +{ + /*NONE */"no error", + /*IGNORED */"not an option", + /*MAX_COUNT */"count overflow", + /*SET_NO_VAL */"this option needs a value", + /*SET_VAL_IGN*/"this option does not needs a value", + /*SET_FAIL */"failed to parse the value", + /*BAD_ARGS */"application bug: bad arguments to call", + /*BAD_SETTER */"application bug: bad values in option", +}; + +parse_error_t parse_cmd_opt( char const* arg, opt_desc_t* start, opt_desc_t const* end ) +{ + BUG_CHECK( !( arg && start && end && end > start ), BAD_ARGS ); + + if( arg[0] != '-' || arg[1] == 0 ) + { + return IGNORED; + } + + opt_desc_t* opt = start; + char const* argv = arg + 2; + + //search for option (stores in opt) + if( arg[1] == '-' ) //long + { + char const* name = argv; + size_t name_sz; + argv = strchr( name, '=' ); + if( argv ) + { + assert( argv > name ); + name_sz = static_cast<size_t>( argv - name ); + ++argv; + } + else + { + name_sz = strlen( name ); + } + + for( ; true + && opt != end + && 0 != strncmp( name, opt->option, name_sz ) + && name_sz != strlen( opt->option ); ++opt ) + { + } + } + else //short + { + uint32_t name = static_cast<uint8_t>( arg[1] ); + if( name & 0x80 ) + { + assert( !( name & 0x80 ) && "TODO: utf-8 support" ); + abort(); + } + for( ; opt != end && opt->short_option != name; ++opt ) + { + } + } + + //BUG_CHECK( ( opt->set && !opt->value ) || ( !opt->set && opt->value ), BAD_SETTER ); + + if( opt == end ) + { + return IGNORED; + } + + if( opt->value && !empty_array( argv ) ) + { + return SET_NO_VAL; + } + + if( !opt->value && empty_array( argv ) ) + { + return SET_VAL_IGN; + } + + if( opt->set && opt->set( opt->value, argv ) ) + { + return SET_FAIL; + } + + if( opt->count < UINT32_MAX ) + { + ++opt->count; + } + return opt->count == UINT32_MAX ? MAX_COUNT : NONE; +} + +void print_opts( FILE* target, opt_desc_t const* start, opt_desc_t const* end ) +{ + char short_name[] = ",-????"; + for( ; start != end; ++start ) + { + memcpy( short_name + 2, &start->short_option, sizeof( start->short_option ) ); + fprintf( target, "\t* --%s%s: %s", + start->option, + start->short_option ? short_name : "", + start->description + ); + //fail to show current val + if( start->show && start->show( start->value, target ) ) + { + abort(); + } + fputc( '\n', target ); + } +} diff --git a/src/optparser.hpp b/btl/src/optparser.hpp diff --git a/src/utils.hpp b/btl/src/utils.hpp diff --git a/src/vector.hpp b/btl/src/vector.hpp diff --git a/lmerge/Makefile b/lmerge/Makefile @@ -0,0 +1,33 @@ +CC ?= cc +CXX ?= c++ +PREFIX ?= "/usr/local" + +all: manpages lmerge.1 lmerge + +%.1: doc/%.1.md + pandoc -s --to=man $< -o $@ + +lmerge.o: src/lmerge.cpp + $(CXX) -I ../btl/src $(CXXFLAGS) -c $< -o $@ + +%.o: ../btl/src/%.cpp + $(CXX) $(CXXFLAGS) -c $< -o $@ + +lmerge: lmerge.o optparser.o + $(CXX) -o $@ $^ + +manpages: lmerge.1 + +clean: + rm -f lmerge lmerge.1 *.o + +install: all + mkdir -p ${DESTDIR}${PREFIX}/bin/ ${DESTDIR}${PREFIX}/share/man/man1/ + install -m 755 lmerge ${DESTDIR}${PREFIX}/bin/ + install -m 644 lmerge.1 ${DESTDIR}${PREFIX}/share/man/man1/ + +uninstall: + rm ${DESTDIR}${PREFIX}/bin/lmerge + rm ${DESTDIR}${PREFIX}/share/man/man1/lmerge.1 + +.PHONY: all clean install uninstall diff --git a/README b/lmerge/doc/README diff --git a/doc/lmerge.1.md b/lmerge/doc/lmerge.1.md diff --git a/lmerge/src/lmerge.cpp b/lmerge/src/lmerge.cpp @@ -0,0 +1,347 @@ +#ifdef LIBCPP_MUSL_STATIC +#define __GLIBC_PREREQ(x,y) 0 +#endif + +#include <stdlib.h> +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <assert.h> +#include <stdint.h> +#include <ctype.h> + +#include <algorithm> +#include <iterator> + +/** + * TODO: + * * check that ENTRY_SEP works as expected; + * * fix the fact input needs a "\\n" at end of last line for it to be merged; + * * UTF-8 support (field separators); + * * -v/--version option; + * * allow to customize the memory allocation scheme at runtime; + * * allow to not print twice merged fields; + * * allow to set verbosity on stderr; + * * remove hard-coded limit of UINT16_MAX - 1 for fields start/stop positions; + * * print as many lines as there where duplicates? + * + * Coding rules: + * * const affect what is before it, so it must follow the type; + **/ + +#include <vector.hpp> +#include <optparser.hpp> + +class field_marker; +typedef vector<char> line_cache; +typedef vector<field_marker> field_marker_t; + +class field_marker +{ + uint16_t m_start = UINT16_MAX, m_end = UINT16_MAX; + +public: + bool ignore( void ) const + { + return m_start == m_end && m_start == UINT16_MAX; + } + + void define( uint16_t start, uint16_t end ) + { + assert( end >= start && start != UINT8_MAX && end != UINT8_MAX ); + m_start = start; m_end = end; + } + + uint16_t start( void ) const + { + assert( !ignore() ); + return m_start; + } + + uint16_t end( void ) const + { + assert( !ignore() ); + return m_end; + } +}; + +bool allocate_markers( + char const * const FIELDS, + field_marker_t& field_cache +); + +void print_help( char const* pgm, FILE* target, opt_desc_t const* start, opt_desc_t const* end ); + +int main( int argc, char const *const *argv ) +{ + char const * SEP_START = getenv( "FIELD_SEP" ); + char const * SEP_ENTRY = getenv( "ENTRY_SEP" ); + char const * FIELDS = getenv( "FIELDS" ); + if( !SEP_START ) + { + SEP_START = " \t"; + } + if( !SEP_ENTRY ) + { + SEP_ENTRY = "\n"; + } + + opt_desc_t opts[] = + { + { "help", "shows this message", 'h', 0, nullptr, nullptr, nullptr }, +#ifndef NO_CMDLINE + { "field_sep", "field separator", 't' , 0, &SEP_START, set<char const**>, show<char*> }, + { "entry_sep", "entry separator", 'l' , 0, &SEP_ENTRY, set<char const**>, show<char*> }, + { "fields" , "fields to compare", 'f', 0, &FIELDS , set<char const**>, show<char*> }, +#endif + }; + auto b_opts = std::begin( opts ); + auto e_opts = std::end( opts ); + + char const *arg = argv[1]; assert( argc > 0 ); + for( int iarg = 1; iarg != argc; ++iarg, ++argv ) + { + auto error = parse_cmd_opt( arg, b_opts, e_opts ); + switch( error ) + { + case MAX_COUNT: + arg_warning( arg, error ); + break; + case NONE: + case IGNORED: + break; + case SET_NO_VAL: + case SET_VAL_IGN: + case SET_FAIL: + case BAD_ARGS: + case BAD_SETTER: + print_help( argv[0], stderr, b_opts, e_opts ); + arg_error( arg, error ); + return EXIT_FAILURE; + } + } + + if( opts[0].count ) + { + print_help( argv[0], stdout, b_opts, e_opts ); + return EXIT_SUCCESS; + } + + if( !FIELDS ) + { + fputs( "ERROR: FIELDS is not defined\n", stderr ); + return EXIT_FAILURE; + } + + if( strlen( FIELDS ) == 0 ) + { + fputs( "ERROR: FIELDS is empty\n", stderr ); + return EXIT_FAILURE; + } + + field_marker_t field_cache; + if( allocate_markers( FIELDS, field_cache ) ) + { + return EXIT_FAILURE; + } + + size_t buf_sz = 2048; + char* buf = nullptr; + + // allocating a cache of at least 16 bytes. + // Note: I don't see how merging lines smaller than 16 bytes can be useful + // also, not even enough mem for that would indicate bigger problems... + while( !buf && buf_sz >= 32 ) + { + buf_sz /= 2; + char* nbuf = static_cast<char*>( realloc( buf, buf_sz ) ); + if( !nbuf ) + { + free( buf ); + return EXIT_FAILURE; + } + buf = nbuf; + } + + if( !buf ) + { + fprintf( stderr, "ERROR: malloc %s(%d)\n", strerror( errno ), errno ); + free( buf ); + return EXIT_FAILURE; + } + + bool fetch = true; + line_cache last_line; + char const * const SEP_END = SEP_START + strlen( SEP_START ); + while( !feof( stdin ) ) + { + if( !fgets( buf, static_cast<int>( buf_sz ), stdin ) ) + { + free( buf ); + buf = nullptr; + if( !feof( stdin ) ) + { + fprintf( stderr, "ERROR: fgets %s(%d)\n", strerror( errno ), errno ); + free( buf ); + return EXIT_FAILURE; + } + break; + } + + size_t str_sz = strlen( buf ); + if( str_sz == buf_sz - 1 && buf[str_sz] != '\n' && !feof( stdin ) ) + { + fprintf( stderr, "ERROR: buffer too small for some lines\n" ); + free( buf ); + return EXIT_FAILURE; + } + + if( !fetch ) + { + char const* dst_ptr = buf; + for( size_t i = 0; i < field_cache.size(); ++i ) + { + field_marker const& src = field_cache[i]; + if( src.ignore() ) + { + char const *sep = SEP_END; + while( sep == SEP_END ) + { + ++dst_ptr; + sep = SEP_START; + for( ; sep != SEP_END && *dst_ptr && *dst_ptr != *sep; ++sep ){} + } + ++dst_ptr; + continue; + } + char const * src_ptr = last_line.data() + src.start(); + size_t len = src.end() - src.start(); + if( len > buf_sz - static_cast<size_t>( dst_ptr - buf ) ) + { + fetch = true; + break; + } + + if( 0 != memcmp( dst_ptr, src_ptr, len ) ) + { + fetch = true; + break; + } + + char last = dst_ptr[len]; + char const * sep_ = SEP_START; + assert( sep_ != nullptr ); + while( 0 != *sep_ && *sep_ != last && *SEP_ENTRY != last ) + { + ++sep_; + } + if( 0 == *sep_ ) + { + fetch = true; + break; + } + dst_ptr += len; + assert( dst_ptr >= buf ); + } + + fputc( fetch ? *SEP_ENTRY : * SEP_START, stdout ); + } + + last_line.assign( buf, buf + str_sz ); + if( last_line.back() == *SEP_ENTRY ) + { + last_line.back() = 0; + } + + if( fetch ) + { + line_cache::iterator start = last_line.begin(); + line_cache::iterator cache_end = last_line.end(); + size_t field_index = 0; + while( start != cache_end && field_index < field_cache.size() ) + { + auto end = last_line.end(); + if( last_line.back() == 0 ) + { + --end; + } + line_cache::iterator it = std::find_first_of + ( + start, end, + SEP_START, SEP_END + ); + if( !field_cache[field_index].ignore() ) + { + field_cache[field_index].define( + static_cast<uint16_t>( start - last_line.begin() ), + static_cast<uint16_t>( it - last_line.begin() ) + ); + } + start = it + 1; + ++field_index; + } + fetch = false; + } + fputs( last_line.data(), stdout ); + } + fputc( *SEP_ENTRY, stdout ); + free( buf ); + return EXIT_SUCCESS; +} + +bool allocate_markers( + char const * const FIELDS, + field_marker_t& field_cache +) +{ + field_cache.reserve( UINT8_MAX ); //255 fields should fit most cases + size_t last_field = 0; + + char const * fields = FIELDS - 1; + do + { + ++fields; + if( isdigit( *fields ) ) + { + last_field = last_field * 10 + static_cast<size_t>( *fields - '0' ); + } + else if( *fields == ',' || *fields == 0 ) + { + size_t max = std::max( field_cache.size(), last_field ); + field_cache.resize( max ); + field_cache[last_field - 1].define( 0, 0 ); + last_field = 0; + } + else + { + fputs( "ERROR: FIELDS contains illegal characters\n", stderr ); + return true; + } + }while( *fields ); + field_cache.shrink_to_fit(); + return false; +} + +void print_help( char const* pgm, FILE* target, opt_desc_t const* start, opt_desc_t const* end ) +{ + fputs( "Usage: ", target ); + fputs( pgm, target ); + fputs( " [OPTIONS]\n" + "Description:\n" + "\tThis program reads stdin and when consecutive lines have specific fields all\n" + "\tcontaining the same value, prints them replacing the newline character by\n" + "\tthe 1st character in FIELD_SEP.\n" + "\tFields are delimited by the FIELD_SEP environment variable. If not defined,\n" + "\t\" \\t\" is used instead (see isblank(3)).\n" + "\tFields to use are defined by the environment variable FIELDS, which only\n" + "\tuse unsigned decimal integers separated by commas, other characters makes the\n" + "\tvalue invalid.\n" + "\tIf FIELDS is not defined or invalid, exits with an error.\n" + "\tEmpty field indexes (\"1,,3\") are ignored (will resolve in \"1,3\").\n" + "\tDo not work if input is not in line mode.\n" + "\tLine separator is defined by ENTRY_SEP, or \"\\n\" if not defined.\n" + "Options:\n" + , target ); + print_opts( target, start, end ); +} + diff --git a/src/lmerge.cpp b/src/lmerge.cpp @@ -1,347 +0,0 @@ -#ifdef LIBCPP_MUSL_STATIC -#define __GLIBC_PREREQ(x,y) 0 -#endif - -#include <stdlib.h> -#include <stdio.h> -#include <errno.h> -#include <string.h> -#include <assert.h> -#include <stdint.h> -#include <ctype.h> - -#include <algorithm> -#include <iterator> - -/** - * TODO: - * * check that ENTRY_SEP works as expected; - * * fix the fact input needs a "\\n" at end of last line for it to be merged; - * * UTF-8 support (field separators); - * * -v/--version option; - * * allow to customize the memory allocation scheme at runtime; - * * allow to not print twice merged fields; - * * allow to set verbosity on stderr; - * * remove hard-coded limit of UINT16_MAX - 1 for fields start/stop positions; - * * print as many lines as there where duplicates? - * - * Coding rules: - * * const affect what is before it, so it must follow the type; - **/ - -#include "vector.hpp" -#include "optparser.hpp" - -class field_marker; -typedef vector<char> line_cache; -typedef vector<field_marker> field_marker_t; - -class field_marker -{ - uint16_t m_start = UINT16_MAX, m_end = UINT16_MAX; - -public: - bool ignore( void ) const - { - return m_start == m_end && m_start == UINT16_MAX; - } - - void define( uint16_t start, uint16_t end ) - { - assert( end >= start && start != UINT8_MAX && end != UINT8_MAX ); - m_start = start; m_end = end; - } - - uint16_t start( void ) const - { - assert( !ignore() ); - return m_start; - } - - uint16_t end( void ) const - { - assert( !ignore() ); - return m_end; - } -}; - -bool allocate_markers( - char const * const FIELDS, - field_marker_t& field_cache -); - -void print_help( char const* pgm, FILE* target, opt_desc_t const* start, opt_desc_t const* end ); - -int main( int argc, char const *const *argv ) -{ - char const * SEP_START = getenv( "FIELD_SEP" ); - char const * SEP_ENTRY = getenv( "ENTRY_SEP" ); - char const * FIELDS = getenv( "FIELDS" ); - if( !SEP_START ) - { - SEP_START = " \t"; - } - if( !SEP_ENTRY ) - { - SEP_ENTRY = "\n"; - } - - opt_desc_t opts[] = - { - { "help", "shows this message", 'h', 0, nullptr, nullptr, nullptr }, -#ifndef NO_CMDLINE - { "field_sep", "field separator", 't' , 0, &SEP_START, set<char const**>, show<char*> }, - { "entry_sep", "entry separator", 'l' , 0, &SEP_ENTRY, set<char const**>, show<char*> }, - { "fields" , "fields to compare", 'f', 0, &FIELDS , set<char const**>, show<char*> }, -#endif - }; - auto b_opts = std::begin( opts ); - auto e_opts = std::end( opts ); - - char const *arg = argv[1]; assert( argc > 0 ); - for( int iarg = 1; iarg != argc; ++iarg, ++argv ) - { - auto error = parse_cmd_opt( arg, b_opts, e_opts ); - switch( error ) - { - case MAX_COUNT: - arg_warning( arg, error ); - break; - case NONE: - case IGNORED: - break; - case SET_NO_VAL: - case SET_VAL_IGN: - case SET_FAIL: - case BAD_ARGS: - case BAD_SETTER: - print_help( argv[0], stderr, b_opts, e_opts ); - arg_error( arg, error ); - return EXIT_FAILURE; - } - } - - if( opts[0].count ) - { - print_help( argv[0], stdout, b_opts, e_opts ); - return EXIT_SUCCESS; - } - - if( !FIELDS ) - { - fputs( "ERROR: FIELDS is not defined\n", stderr ); - return EXIT_FAILURE; - } - - if( strlen( FIELDS ) == 0 ) - { - fputs( "ERROR: FIELDS is empty\n", stderr ); - return EXIT_FAILURE; - } - - field_marker_t field_cache; - if( allocate_markers( FIELDS, field_cache ) ) - { - return EXIT_FAILURE; - } - - size_t buf_sz = 2048; - char* buf = nullptr; - - // allocating a cache of at least 16 bytes. - // Note: I don't see how merging lines smaller than 16 bytes can be useful - // also, not even enough mem for that would indicate bigger problems... - while( !buf && buf_sz >= 32 ) - { - buf_sz /= 2; - char* nbuf = static_cast<char*>( realloc( buf, buf_sz ) ); - if( !nbuf ) - { - free( buf ); - return EXIT_FAILURE; - } - buf = nbuf; - } - - if( !buf ) - { - fprintf( stderr, "ERROR: malloc %s(%d)\n", strerror( errno ), errno ); - free( buf ); - return EXIT_FAILURE; - } - - bool fetch = true; - line_cache last_line; - char const * const SEP_END = SEP_START + strlen( SEP_START ); - while( !feof( stdin ) ) - { - if( !fgets( buf, static_cast<int>( buf_sz ), stdin ) ) - { - free( buf ); - buf = nullptr; - if( !feof( stdin ) ) - { - fprintf( stderr, "ERROR: fgets %s(%d)\n", strerror( errno ), errno ); - free( buf ); - return EXIT_FAILURE; - } - break; - } - - size_t str_sz = strlen( buf ); - if( str_sz == buf_sz - 1 && buf[str_sz] != '\n' && !feof( stdin ) ) - { - fprintf( stderr, "ERROR: buffer too small for some lines\n" ); - free( buf ); - return EXIT_FAILURE; - } - - if( !fetch ) - { - char const* dst_ptr = buf; - for( size_t i = 0; i < field_cache.size(); ++i ) - { - field_marker const& src = field_cache[i]; - if( src.ignore() ) - { - char const *sep = SEP_END; - while( sep == SEP_END ) - { - ++dst_ptr; - sep = SEP_START; - for( ; sep != SEP_END && *dst_ptr && *dst_ptr != *sep; ++sep ){} - } - ++dst_ptr; - continue; - } - char const * src_ptr = last_line.data() + src.start(); - size_t len = src.end() - src.start(); - if( len > buf_sz - static_cast<size_t>( dst_ptr - buf ) ) - { - fetch = true; - break; - } - - if( 0 != memcmp( dst_ptr, src_ptr, len ) ) - { - fetch = true; - break; - } - - char last = dst_ptr[len]; - char const * sep_ = SEP_START; - assert( sep_ != nullptr ); - while( 0 != *sep_ && *sep_ != last && *SEP_ENTRY != last ) - { - ++sep_; - } - if( 0 == *sep_ ) - { - fetch = true; - break; - } - dst_ptr += len; - assert( dst_ptr >= buf ); - } - - fputc( fetch ? *SEP_ENTRY : * SEP_START, stdout ); - } - - last_line.assign( buf, buf + str_sz ); - if( last_line.back() == *SEP_ENTRY ) - { - last_line.back() = 0; - } - - if( fetch ) - { - line_cache::iterator start = last_line.begin(); - line_cache::iterator cache_end = last_line.end(); - size_t field_index = 0; - while( start != cache_end && field_index < field_cache.size() ) - { - auto end = last_line.end(); - if( last_line.back() == 0 ) - { - --end; - } - line_cache::iterator it = std::find_first_of - ( - start, end, - SEP_START, SEP_END - ); - if( !field_cache[field_index].ignore() ) - { - field_cache[field_index].define( - static_cast<uint16_t>( start - last_line.begin() ), - static_cast<uint16_t>( it - last_line.begin() ) - ); - } - start = it + 1; - ++field_index; - } - fetch = false; - } - fputs( last_line.data(), stdout ); - } - fputc( *SEP_ENTRY, stdout ); - free( buf ); - return EXIT_SUCCESS; -} - -bool allocate_markers( - char const * const FIELDS, - field_marker_t& field_cache -) -{ - field_cache.reserve( UINT8_MAX ); //255 fields should fit most cases - size_t last_field = 0; - - char const * fields = FIELDS - 1; - do - { - ++fields; - if( isdigit( *fields ) ) - { - last_field = last_field * 10 + static_cast<size_t>( *fields - '0' ); - } - else if( *fields == ',' || *fields == 0 ) - { - size_t max = std::max( field_cache.size(), last_field ); - field_cache.resize( max ); - field_cache[last_field - 1].define( 0, 0 ); - last_field = 0; - } - else - { - fputs( "ERROR: FIELDS contains illegal characters\n", stderr ); - return true; - } - }while( *fields ); - field_cache.shrink_to_fit(); - return false; -} - -void print_help( char const* pgm, FILE* target, opt_desc_t const* start, opt_desc_t const* end ) -{ - fputs( "Usage: ", target ); - fputs( pgm, target ); - fputs( " [OPTIONS]\n" - "Description:\n" - "\tThis program reads stdin and when consecutive lines have specific fields all\n" - "\tcontaining the same value, prints them replacing the newline character by\n" - "\tthe 1st character in FIELD_SEP.\n" - "\tFields are delimited by the FIELD_SEP environment variable. If not defined,\n" - "\t\" \\t\" is used instead (see isblank(3)).\n" - "\tFields to use are defined by the environment variable FIELDS, which only\n" - "\tuse unsigned decimal integers separated by commas, other characters makes the\n" - "\tvalue invalid.\n" - "\tIf FIELDS is not defined or invalid, exits with an error.\n" - "\tEmpty field indexes (\"1,,3\") are ignored (will resolve in \"1,3\").\n" - "\tDo not work if input is not in line mode.\n" - "\tLine separator is defined by ENTRY_SEP, or \"\\n\" if not defined.\n" - "Options:\n" - , target ); - print_opts( target, start, end ); -} - diff --git a/src/optparser.cpp b/src/optparser.cpp @@ -1,121 +0,0 @@ -#include <string.h> -#include <assert.h> -#include <stdlib.h> -#include <stdio.h> -#include <stdint.h> -#include <errno.h> -#include <limits.h> - -#include "optparser.hpp" -#include "utils.hpp" - -// those strings must be defined in same order than the enum -// Trick: use vim's block insertion to check them -char const *parse_error_msgs[] = -{ - /*NONE */"no error", - /*IGNORED */"not an option", - /*MAX_COUNT */"count overflow", - /*SET_NO_VAL */"this option needs a value", - /*SET_VAL_IGN*/"this option does not needs a value", - /*SET_FAIL */"failed to parse the value", - /*BAD_ARGS */"application bug: bad arguments to call", - /*BAD_SETTER */"application bug: bad values in option", -}; - -parse_error_t parse_cmd_opt( char const* arg, opt_desc_t* start, opt_desc_t const* end ) -{ - BUG_CHECK( !( arg && start && end && end > start ), BAD_ARGS ); - - if( arg[0] != '-' || arg[1] == 0 ) - { - return IGNORED; - } - - opt_desc_t* opt = start; - char const* argv = arg + 2; - - //search for option (stores in opt) - if( arg[1] == '-' ) //long - { - char const* name = argv; - size_t name_sz; - argv = strchr( name, '=' ); - if( argv ) - { - assert( argv > name ); - name_sz = static_cast<size_t>( argv - name ); - ++argv; - } - else - { - name_sz = strlen( name ); - } - - for( ; true - && opt != end - && 0 != strncmp( name, opt->option, name_sz ) - && name_sz != strlen( opt->option ); ++opt ) - { - } - } - else //short - { - uint32_t name = static_cast<uint8_t>( arg[1] ); - if( name & 0x80 ) - { - assert( !( name & 0x80 ) && "TODO: utf-8 support" ); - abort(); - } - for( ; opt != end && opt->short_option != name; ++opt ) - { - } - } - - //BUG_CHECK( ( opt->set && !opt->value ) || ( !opt->set && opt->value ), BAD_SETTER ); - - if( opt == end ) - { - return IGNORED; - } - - if( opt->value && !empty_array( argv ) ) - { - return SET_NO_VAL; - } - - if( !opt->value && empty_array( argv ) ) - { - return SET_VAL_IGN; - } - - if( opt->set && opt->set( opt->value, argv ) ) - { - return SET_FAIL; - } - - if( opt->count < UINT32_MAX ) - { - ++opt->count; - } - return opt->count == UINT32_MAX ? MAX_COUNT : NONE; -} - -void print_opts( FILE* target, opt_desc_t const* start, opt_desc_t const* end ) -{ - char short_name[] = ",-????"; - for( ; start != end; ++start ) - { - memcpy( short_name + 2, &start->short_option, sizeof( start->short_option ) ); - fprintf( target, "\t* --%s%s: %s", - start->option, - start->short_option ? short_name : "", - start->description - ); - if( start->show && start->show( start->value, target ) ) - { - abort(); - } - fputc( '\n', target ); - } -}