commit 0f4d26c460c139747d165b6b26fe6764a34f55b1
parent 409b0d2605e1a2ebf26a3b8e298a3116f0087eab
Author: Morel BĂ©renger <berengermorel76@gmail.com>
Date: Fri, 26 Jun 2020 15:10:36 +0200
move files into "subprojects"
Diffstat:
12 files changed, 505 insertions(+), 548 deletions(-)
diff --git a/Makefile b/Makefile
@@ -1,30 +0,0 @@
-CC ?= cc
-CXX ?= c++
-PREFIX ?= "/usr/local"
-
-all: manpages lmerge.1 lmerge
-
-%.1: doc/%.1.md
- pandoc -s --to=man $< -o $@
-
-%.o: src/%.cpp
- $(CXX) $(CXXFLAGS) -c $< -o $@
-
-lmerge: lmerge.o optparser.o
- $(CXX) -o $@ $^
-
-manpages: lmerge.1
-
-clean:
- rm -f lmerge lmerge.1 *.o
-
-install: all
- mkdir -p ${DESTDIR}${PREFIX}/bin/ ${DESTDIR}${PREFIX}/share/man/man1/
- install -m 755 lmerge ${DESTDIR}${PREFIX}/bin/
- install -m 644 lmerge.1 ${DESTDIR}${PREFIX}/share/man/man1/
-
-uninstall:
- rm ${DESTDIR}${PREFIX}/bin/lmerge
- rm ${DESTDIR}${PREFIX}/share/man/man1/lmerge.1
-
-.PHONY: all clean install uninstall
diff --git a/README b/README
@@ -1,50 +1,3 @@
-This tool merges sequential entries if they have some fields with same values.
-
-USAGE:
-
-See lmerge.1.md
-
-DEPENDENCIES:
-
-* a C++ compiler (clang and g++ have been tested);
-* pandoc to build man-page;
-
-This is a work in progress tool, but it works for me.
-
-BUILD OPTIONS:
-
-There are several '#define' that can affect the build process:
-
-* WITH_STL: do not use home-made vector implementation. This will drastically
- increase the size of the binary, though (~25Kio).
-* NO_CMDLINE: do not build support for command-line switches except the '--help'
- communator. This will reduce the size of the binary (~9Kio).
-
-Those options can be set using the CXXFLAGS environment variable, for example:
-
-CXXFLAGS="$CXXFLAGS -DNO_CMDLINE" make
-
-COMPACT BUILD:
-
-It is possible to build a statically linked binary, but the invocation will be
-different depending on your OS and requires alternate tools.
-Even on Debian, the invocation may differ a lot between major versions, for
-example it's more hackish un current stable (Buster) than it was in old-stable
-(Stretch), especially with the include paths.
-On Debian buster, I do this for example (beware, every single change can break
-the build):
-
-clang++ -o lmerge \
- lmerge.cpp /usr/lib/x86_64-linux-musl/crt1.o \
- -Os -nostdlib -static -fno-exceptions -stdlib=libc++ -nobuiltininc -nostdinc++ \
- -L /usr/lib/x86_64-linux-musl \
- -lpthread -lc \
- -I /usr/lib/llvm-7/include/c++/v1/ \
- -I /usr/include/x86_64-linux-musl/ \
- -D LIBCPP_MUSL_STATIC
-
-The resulting binary is bigger than the output of `make` (on this system,
-stripped dynamic linking gives a 19Kio binary, stripped static gives a 38Kio one)
-but does not imply to install the dynamic libraries, which are, here, around
-2Mio for libstdc++ and 850Kio for libc++.
-One of the points of doing so would be to use this tool in a chroot.
+This repository contains various softwares and libraries used to build them.
+Each of them are contained in their own sub-directory, being source code,
+documentation or any related files.
diff --git a/btl/src/optparser.cpp b/btl/src/optparser.cpp
@@ -0,0 +1,122 @@
+#include <string.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <errno.h>
+#include <limits.h>
+
+#include "optparser.hpp"
+#include "utils.hpp"
+
+// those strings must be defined in same order than the enum
+// Trick: use vim's block insertion to check them
+char const *parse_error_msgs[] =
+{
+ /*NONE */"no error",
+ /*IGNORED */"not an option",
+ /*MAX_COUNT */"count overflow",
+ /*SET_NO_VAL */"this option needs a value",
+ /*SET_VAL_IGN*/"this option does not needs a value",
+ /*SET_FAIL */"failed to parse the value",
+ /*BAD_ARGS */"application bug: bad arguments to call",
+ /*BAD_SETTER */"application bug: bad values in option",
+};
+
+parse_error_t parse_cmd_opt( char const* arg, opt_desc_t* start, opt_desc_t const* end )
+{
+ BUG_CHECK( !( arg && start && end && end > start ), BAD_ARGS );
+
+ if( arg[0] != '-' || arg[1] == 0 )
+ {
+ return IGNORED;
+ }
+
+ opt_desc_t* opt = start;
+ char const* argv = arg + 2;
+
+ //search for option (stores in opt)
+ if( arg[1] == '-' ) //long
+ {
+ char const* name = argv;
+ size_t name_sz;
+ argv = strchr( name, '=' );
+ if( argv )
+ {
+ assert( argv > name );
+ name_sz = static_cast<size_t>( argv - name );
+ ++argv;
+ }
+ else
+ {
+ name_sz = strlen( name );
+ }
+
+ for( ; true
+ && opt != end
+ && 0 != strncmp( name, opt->option, name_sz )
+ && name_sz != strlen( opt->option ); ++opt )
+ {
+ }
+ }
+ else //short
+ {
+ uint32_t name = static_cast<uint8_t>( arg[1] );
+ if( name & 0x80 )
+ {
+ assert( !( name & 0x80 ) && "TODO: utf-8 support" );
+ abort();
+ }
+ for( ; opt != end && opt->short_option != name; ++opt )
+ {
+ }
+ }
+
+ //BUG_CHECK( ( opt->set && !opt->value ) || ( !opt->set && opt->value ), BAD_SETTER );
+
+ if( opt == end )
+ {
+ return IGNORED;
+ }
+
+ if( opt->value && !empty_array( argv ) )
+ {
+ return SET_NO_VAL;
+ }
+
+ if( !opt->value && empty_array( argv ) )
+ {
+ return SET_VAL_IGN;
+ }
+
+ if( opt->set && opt->set( opt->value, argv ) )
+ {
+ return SET_FAIL;
+ }
+
+ if( opt->count < UINT32_MAX )
+ {
+ ++opt->count;
+ }
+ return opt->count == UINT32_MAX ? MAX_COUNT : NONE;
+}
+
+void print_opts( FILE* target, opt_desc_t const* start, opt_desc_t const* end )
+{
+ char short_name[] = ",-????";
+ for( ; start != end; ++start )
+ {
+ memcpy( short_name + 2, &start->short_option, sizeof( start->short_option ) );
+ fprintf( target, "\t* --%s%s: %s",
+ start->option,
+ start->short_option ? short_name : "",
+ start->description
+ );
+ //fail to show current val
+ if( start->show && start->show( start->value, target ) )
+ {
+ abort();
+ }
+ fputc( '\n', target );
+ }
+}
diff --git a/src/optparser.hpp b/btl/src/optparser.hpp
diff --git a/src/utils.hpp b/btl/src/utils.hpp
diff --git a/src/vector.hpp b/btl/src/vector.hpp
diff --git a/lmerge/Makefile b/lmerge/Makefile
@@ -0,0 +1,33 @@
+CC ?= cc
+CXX ?= c++
+PREFIX ?= "/usr/local"
+
+all: manpages lmerge.1 lmerge
+
+%.1: doc/%.1.md
+ pandoc -s --to=man $< -o $@
+
+lmerge.o: src/lmerge.cpp
+ $(CXX) -I ../btl/src $(CXXFLAGS) -c $< -o $@
+
+%.o: ../btl/src/%.cpp
+ $(CXX) $(CXXFLAGS) -c $< -o $@
+
+lmerge: lmerge.o optparser.o
+ $(CXX) -o $@ $^
+
+manpages: lmerge.1
+
+clean:
+ rm -f lmerge lmerge.1 *.o
+
+install: all
+ mkdir -p ${DESTDIR}${PREFIX}/bin/ ${DESTDIR}${PREFIX}/share/man/man1/
+ install -m 755 lmerge ${DESTDIR}${PREFIX}/bin/
+ install -m 644 lmerge.1 ${DESTDIR}${PREFIX}/share/man/man1/
+
+uninstall:
+ rm ${DESTDIR}${PREFIX}/bin/lmerge
+ rm ${DESTDIR}${PREFIX}/share/man/man1/lmerge.1
+
+.PHONY: all clean install uninstall
diff --git a/README b/lmerge/doc/README
diff --git a/doc/lmerge.1.md b/lmerge/doc/lmerge.1.md
diff --git a/lmerge/src/lmerge.cpp b/lmerge/src/lmerge.cpp
@@ -0,0 +1,347 @@
+#ifdef LIBCPP_MUSL_STATIC
+#define __GLIBC_PREREQ(x,y) 0
+#endif
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <assert.h>
+#include <stdint.h>
+#include <ctype.h>
+
+#include <algorithm>
+#include <iterator>
+
+/**
+ * TODO:
+ * * check that ENTRY_SEP works as expected;
+ * * fix the fact input needs a "\\n" at end of last line for it to be merged;
+ * * UTF-8 support (field separators);
+ * * -v/--version option;
+ * * allow to customize the memory allocation scheme at runtime;
+ * * allow to not print twice merged fields;
+ * * allow to set verbosity on stderr;
+ * * remove hard-coded limit of UINT16_MAX - 1 for fields start/stop positions;
+ * * print as many lines as there where duplicates?
+ *
+ * Coding rules:
+ * * const affect what is before it, so it must follow the type;
+ **/
+
+#include <vector.hpp>
+#include <optparser.hpp>
+
+class field_marker;
+typedef vector<char> line_cache;
+typedef vector<field_marker> field_marker_t;
+
+class field_marker
+{
+ uint16_t m_start = UINT16_MAX, m_end = UINT16_MAX;
+
+public:
+ bool ignore( void ) const
+ {
+ return m_start == m_end && m_start == UINT16_MAX;
+ }
+
+ void define( uint16_t start, uint16_t end )
+ {
+ assert( end >= start && start != UINT8_MAX && end != UINT8_MAX );
+ m_start = start; m_end = end;
+ }
+
+ uint16_t start( void ) const
+ {
+ assert( !ignore() );
+ return m_start;
+ }
+
+ uint16_t end( void ) const
+ {
+ assert( !ignore() );
+ return m_end;
+ }
+};
+
+bool allocate_markers(
+ char const * const FIELDS,
+ field_marker_t& field_cache
+);
+
+void print_help( char const* pgm, FILE* target, opt_desc_t const* start, opt_desc_t const* end );
+
+int main( int argc, char const *const *argv )
+{
+ char const * SEP_START = getenv( "FIELD_SEP" );
+ char const * SEP_ENTRY = getenv( "ENTRY_SEP" );
+ char const * FIELDS = getenv( "FIELDS" );
+ if( !SEP_START )
+ {
+ SEP_START = " \t";
+ }
+ if( !SEP_ENTRY )
+ {
+ SEP_ENTRY = "\n";
+ }
+
+ opt_desc_t opts[] =
+ {
+ { "help", "shows this message", 'h', 0, nullptr, nullptr, nullptr },
+#ifndef NO_CMDLINE
+ { "field_sep", "field separator", 't' , 0, &SEP_START, set<char const**>, show<char*> },
+ { "entry_sep", "entry separator", 'l' , 0, &SEP_ENTRY, set<char const**>, show<char*> },
+ { "fields" , "fields to compare", 'f', 0, &FIELDS , set<char const**>, show<char*> },
+#endif
+ };
+ auto b_opts = std::begin( opts );
+ auto e_opts = std::end( opts );
+
+ char const *arg = argv[1]; assert( argc > 0 );
+ for( int iarg = 1; iarg != argc; ++iarg, ++argv )
+ {
+ auto error = parse_cmd_opt( arg, b_opts, e_opts );
+ switch( error )
+ {
+ case MAX_COUNT:
+ arg_warning( arg, error );
+ break;
+ case NONE:
+ case IGNORED:
+ break;
+ case SET_NO_VAL:
+ case SET_VAL_IGN:
+ case SET_FAIL:
+ case BAD_ARGS:
+ case BAD_SETTER:
+ print_help( argv[0], stderr, b_opts, e_opts );
+ arg_error( arg, error );
+ return EXIT_FAILURE;
+ }
+ }
+
+ if( opts[0].count )
+ {
+ print_help( argv[0], stdout, b_opts, e_opts );
+ return EXIT_SUCCESS;
+ }
+
+ if( !FIELDS )
+ {
+ fputs( "ERROR: FIELDS is not defined\n", stderr );
+ return EXIT_FAILURE;
+ }
+
+ if( strlen( FIELDS ) == 0 )
+ {
+ fputs( "ERROR: FIELDS is empty\n", stderr );
+ return EXIT_FAILURE;
+ }
+
+ field_marker_t field_cache;
+ if( allocate_markers( FIELDS, field_cache ) )
+ {
+ return EXIT_FAILURE;
+ }
+
+ size_t buf_sz = 2048;
+ char* buf = nullptr;
+
+ // allocating a cache of at least 16 bytes.
+ // Note: I don't see how merging lines smaller than 16 bytes can be useful
+ // also, not even enough mem for that would indicate bigger problems...
+ while( !buf && buf_sz >= 32 )
+ {
+ buf_sz /= 2;
+ char* nbuf = static_cast<char*>( realloc( buf, buf_sz ) );
+ if( !nbuf )
+ {
+ free( buf );
+ return EXIT_FAILURE;
+ }
+ buf = nbuf;
+ }
+
+ if( !buf )
+ {
+ fprintf( stderr, "ERROR: malloc %s(%d)\n", strerror( errno ), errno );
+ free( buf );
+ return EXIT_FAILURE;
+ }
+
+ bool fetch = true;
+ line_cache last_line;
+ char const * const SEP_END = SEP_START + strlen( SEP_START );
+ while( !feof( stdin ) )
+ {
+ if( !fgets( buf, static_cast<int>( buf_sz ), stdin ) )
+ {
+ free( buf );
+ buf = nullptr;
+ if( !feof( stdin ) )
+ {
+ fprintf( stderr, "ERROR: fgets %s(%d)\n", strerror( errno ), errno );
+ free( buf );
+ return EXIT_FAILURE;
+ }
+ break;
+ }
+
+ size_t str_sz = strlen( buf );
+ if( str_sz == buf_sz - 1 && buf[str_sz] != '\n' && !feof( stdin ) )
+ {
+ fprintf( stderr, "ERROR: buffer too small for some lines\n" );
+ free( buf );
+ return EXIT_FAILURE;
+ }
+
+ if( !fetch )
+ {
+ char const* dst_ptr = buf;
+ for( size_t i = 0; i < field_cache.size(); ++i )
+ {
+ field_marker const& src = field_cache[i];
+ if( src.ignore() )
+ {
+ char const *sep = SEP_END;
+ while( sep == SEP_END )
+ {
+ ++dst_ptr;
+ sep = SEP_START;
+ for( ; sep != SEP_END && *dst_ptr && *dst_ptr != *sep; ++sep ){}
+ }
+ ++dst_ptr;
+ continue;
+ }
+ char const * src_ptr = last_line.data() + src.start();
+ size_t len = src.end() - src.start();
+ if( len > buf_sz - static_cast<size_t>( dst_ptr - buf ) )
+ {
+ fetch = true;
+ break;
+ }
+
+ if( 0 != memcmp( dst_ptr, src_ptr, len ) )
+ {
+ fetch = true;
+ break;
+ }
+
+ char last = dst_ptr[len];
+ char const * sep_ = SEP_START;
+ assert( sep_ != nullptr );
+ while( 0 != *sep_ && *sep_ != last && *SEP_ENTRY != last )
+ {
+ ++sep_;
+ }
+ if( 0 == *sep_ )
+ {
+ fetch = true;
+ break;
+ }
+ dst_ptr += len;
+ assert( dst_ptr >= buf );
+ }
+
+ fputc( fetch ? *SEP_ENTRY : * SEP_START, stdout );
+ }
+
+ last_line.assign( buf, buf + str_sz );
+ if( last_line.back() == *SEP_ENTRY )
+ {
+ last_line.back() = 0;
+ }
+
+ if( fetch )
+ {
+ line_cache::iterator start = last_line.begin();
+ line_cache::iterator cache_end = last_line.end();
+ size_t field_index = 0;
+ while( start != cache_end && field_index < field_cache.size() )
+ {
+ auto end = last_line.end();
+ if( last_line.back() == 0 )
+ {
+ --end;
+ }
+ line_cache::iterator it = std::find_first_of
+ (
+ start, end,
+ SEP_START, SEP_END
+ );
+ if( !field_cache[field_index].ignore() )
+ {
+ field_cache[field_index].define(
+ static_cast<uint16_t>( start - last_line.begin() ),
+ static_cast<uint16_t>( it - last_line.begin() )
+ );
+ }
+ start = it + 1;
+ ++field_index;
+ }
+ fetch = false;
+ }
+ fputs( last_line.data(), stdout );
+ }
+ fputc( *SEP_ENTRY, stdout );
+ free( buf );
+ return EXIT_SUCCESS;
+}
+
+bool allocate_markers(
+ char const * const FIELDS,
+ field_marker_t& field_cache
+)
+{
+ field_cache.reserve( UINT8_MAX ); //255 fields should fit most cases
+ size_t last_field = 0;
+
+ char const * fields = FIELDS - 1;
+ do
+ {
+ ++fields;
+ if( isdigit( *fields ) )
+ {
+ last_field = last_field * 10 + static_cast<size_t>( *fields - '0' );
+ }
+ else if( *fields == ',' || *fields == 0 )
+ {
+ size_t max = std::max( field_cache.size(), last_field );
+ field_cache.resize( max );
+ field_cache[last_field - 1].define( 0, 0 );
+ last_field = 0;
+ }
+ else
+ {
+ fputs( "ERROR: FIELDS contains illegal characters\n", stderr );
+ return true;
+ }
+ }while( *fields );
+ field_cache.shrink_to_fit();
+ return false;
+}
+
+void print_help( char const* pgm, FILE* target, opt_desc_t const* start, opt_desc_t const* end )
+{
+ fputs( "Usage: ", target );
+ fputs( pgm, target );
+ fputs( " [OPTIONS]\n"
+ "Description:\n"
+ "\tThis program reads stdin and when consecutive lines have specific fields all\n"
+ "\tcontaining the same value, prints them replacing the newline character by\n"
+ "\tthe 1st character in FIELD_SEP.\n"
+ "\tFields are delimited by the FIELD_SEP environment variable. If not defined,\n"
+ "\t\" \\t\" is used instead (see isblank(3)).\n"
+ "\tFields to use are defined by the environment variable FIELDS, which only\n"
+ "\tuse unsigned decimal integers separated by commas, other characters makes the\n"
+ "\tvalue invalid.\n"
+ "\tIf FIELDS is not defined or invalid, exits with an error.\n"
+ "\tEmpty field indexes (\"1,,3\") are ignored (will resolve in \"1,3\").\n"
+ "\tDo not work if input is not in line mode.\n"
+ "\tLine separator is defined by ENTRY_SEP, or \"\\n\" if not defined.\n"
+ "Options:\n"
+ , target );
+ print_opts( target, start, end );
+}
+
diff --git a/src/lmerge.cpp b/src/lmerge.cpp
@@ -1,347 +0,0 @@
-#ifdef LIBCPP_MUSL_STATIC
-#define __GLIBC_PREREQ(x,y) 0
-#endif
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <errno.h>
-#include <string.h>
-#include <assert.h>
-#include <stdint.h>
-#include <ctype.h>
-
-#include <algorithm>
-#include <iterator>
-
-/**
- * TODO:
- * * check that ENTRY_SEP works as expected;
- * * fix the fact input needs a "\\n" at end of last line for it to be merged;
- * * UTF-8 support (field separators);
- * * -v/--version option;
- * * allow to customize the memory allocation scheme at runtime;
- * * allow to not print twice merged fields;
- * * allow to set verbosity on stderr;
- * * remove hard-coded limit of UINT16_MAX - 1 for fields start/stop positions;
- * * print as many lines as there where duplicates?
- *
- * Coding rules:
- * * const affect what is before it, so it must follow the type;
- **/
-
-#include "vector.hpp"
-#include "optparser.hpp"
-
-class field_marker;
-typedef vector<char> line_cache;
-typedef vector<field_marker> field_marker_t;
-
-class field_marker
-{
- uint16_t m_start = UINT16_MAX, m_end = UINT16_MAX;
-
-public:
- bool ignore( void ) const
- {
- return m_start == m_end && m_start == UINT16_MAX;
- }
-
- void define( uint16_t start, uint16_t end )
- {
- assert( end >= start && start != UINT8_MAX && end != UINT8_MAX );
- m_start = start; m_end = end;
- }
-
- uint16_t start( void ) const
- {
- assert( !ignore() );
- return m_start;
- }
-
- uint16_t end( void ) const
- {
- assert( !ignore() );
- return m_end;
- }
-};
-
-bool allocate_markers(
- char const * const FIELDS,
- field_marker_t& field_cache
-);
-
-void print_help( char const* pgm, FILE* target, opt_desc_t const* start, opt_desc_t const* end );
-
-int main( int argc, char const *const *argv )
-{
- char const * SEP_START = getenv( "FIELD_SEP" );
- char const * SEP_ENTRY = getenv( "ENTRY_SEP" );
- char const * FIELDS = getenv( "FIELDS" );
- if( !SEP_START )
- {
- SEP_START = " \t";
- }
- if( !SEP_ENTRY )
- {
- SEP_ENTRY = "\n";
- }
-
- opt_desc_t opts[] =
- {
- { "help", "shows this message", 'h', 0, nullptr, nullptr, nullptr },
-#ifndef NO_CMDLINE
- { "field_sep", "field separator", 't' , 0, &SEP_START, set<char const**>, show<char*> },
- { "entry_sep", "entry separator", 'l' , 0, &SEP_ENTRY, set<char const**>, show<char*> },
- { "fields" , "fields to compare", 'f', 0, &FIELDS , set<char const**>, show<char*> },
-#endif
- };
- auto b_opts = std::begin( opts );
- auto e_opts = std::end( opts );
-
- char const *arg = argv[1]; assert( argc > 0 );
- for( int iarg = 1; iarg != argc; ++iarg, ++argv )
- {
- auto error = parse_cmd_opt( arg, b_opts, e_opts );
- switch( error )
- {
- case MAX_COUNT:
- arg_warning( arg, error );
- break;
- case NONE:
- case IGNORED:
- break;
- case SET_NO_VAL:
- case SET_VAL_IGN:
- case SET_FAIL:
- case BAD_ARGS:
- case BAD_SETTER:
- print_help( argv[0], stderr, b_opts, e_opts );
- arg_error( arg, error );
- return EXIT_FAILURE;
- }
- }
-
- if( opts[0].count )
- {
- print_help( argv[0], stdout, b_opts, e_opts );
- return EXIT_SUCCESS;
- }
-
- if( !FIELDS )
- {
- fputs( "ERROR: FIELDS is not defined\n", stderr );
- return EXIT_FAILURE;
- }
-
- if( strlen( FIELDS ) == 0 )
- {
- fputs( "ERROR: FIELDS is empty\n", stderr );
- return EXIT_FAILURE;
- }
-
- field_marker_t field_cache;
- if( allocate_markers( FIELDS, field_cache ) )
- {
- return EXIT_FAILURE;
- }
-
- size_t buf_sz = 2048;
- char* buf = nullptr;
-
- // allocating a cache of at least 16 bytes.
- // Note: I don't see how merging lines smaller than 16 bytes can be useful
- // also, not even enough mem for that would indicate bigger problems...
- while( !buf && buf_sz >= 32 )
- {
- buf_sz /= 2;
- char* nbuf = static_cast<char*>( realloc( buf, buf_sz ) );
- if( !nbuf )
- {
- free( buf );
- return EXIT_FAILURE;
- }
- buf = nbuf;
- }
-
- if( !buf )
- {
- fprintf( stderr, "ERROR: malloc %s(%d)\n", strerror( errno ), errno );
- free( buf );
- return EXIT_FAILURE;
- }
-
- bool fetch = true;
- line_cache last_line;
- char const * const SEP_END = SEP_START + strlen( SEP_START );
- while( !feof( stdin ) )
- {
- if( !fgets( buf, static_cast<int>( buf_sz ), stdin ) )
- {
- free( buf );
- buf = nullptr;
- if( !feof( stdin ) )
- {
- fprintf( stderr, "ERROR: fgets %s(%d)\n", strerror( errno ), errno );
- free( buf );
- return EXIT_FAILURE;
- }
- break;
- }
-
- size_t str_sz = strlen( buf );
- if( str_sz == buf_sz - 1 && buf[str_sz] != '\n' && !feof( stdin ) )
- {
- fprintf( stderr, "ERROR: buffer too small for some lines\n" );
- free( buf );
- return EXIT_FAILURE;
- }
-
- if( !fetch )
- {
- char const* dst_ptr = buf;
- for( size_t i = 0; i < field_cache.size(); ++i )
- {
- field_marker const& src = field_cache[i];
- if( src.ignore() )
- {
- char const *sep = SEP_END;
- while( sep == SEP_END )
- {
- ++dst_ptr;
- sep = SEP_START;
- for( ; sep != SEP_END && *dst_ptr && *dst_ptr != *sep; ++sep ){}
- }
- ++dst_ptr;
- continue;
- }
- char const * src_ptr = last_line.data() + src.start();
- size_t len = src.end() - src.start();
- if( len > buf_sz - static_cast<size_t>( dst_ptr - buf ) )
- {
- fetch = true;
- break;
- }
-
- if( 0 != memcmp( dst_ptr, src_ptr, len ) )
- {
- fetch = true;
- break;
- }
-
- char last = dst_ptr[len];
- char const * sep_ = SEP_START;
- assert( sep_ != nullptr );
- while( 0 != *sep_ && *sep_ != last && *SEP_ENTRY != last )
- {
- ++sep_;
- }
- if( 0 == *sep_ )
- {
- fetch = true;
- break;
- }
- dst_ptr += len;
- assert( dst_ptr >= buf );
- }
-
- fputc( fetch ? *SEP_ENTRY : * SEP_START, stdout );
- }
-
- last_line.assign( buf, buf + str_sz );
- if( last_line.back() == *SEP_ENTRY )
- {
- last_line.back() = 0;
- }
-
- if( fetch )
- {
- line_cache::iterator start = last_line.begin();
- line_cache::iterator cache_end = last_line.end();
- size_t field_index = 0;
- while( start != cache_end && field_index < field_cache.size() )
- {
- auto end = last_line.end();
- if( last_line.back() == 0 )
- {
- --end;
- }
- line_cache::iterator it = std::find_first_of
- (
- start, end,
- SEP_START, SEP_END
- );
- if( !field_cache[field_index].ignore() )
- {
- field_cache[field_index].define(
- static_cast<uint16_t>( start - last_line.begin() ),
- static_cast<uint16_t>( it - last_line.begin() )
- );
- }
- start = it + 1;
- ++field_index;
- }
- fetch = false;
- }
- fputs( last_line.data(), stdout );
- }
- fputc( *SEP_ENTRY, stdout );
- free( buf );
- return EXIT_SUCCESS;
-}
-
-bool allocate_markers(
- char const * const FIELDS,
- field_marker_t& field_cache
-)
-{
- field_cache.reserve( UINT8_MAX ); //255 fields should fit most cases
- size_t last_field = 0;
-
- char const * fields = FIELDS - 1;
- do
- {
- ++fields;
- if( isdigit( *fields ) )
- {
- last_field = last_field * 10 + static_cast<size_t>( *fields - '0' );
- }
- else if( *fields == ',' || *fields == 0 )
- {
- size_t max = std::max( field_cache.size(), last_field );
- field_cache.resize( max );
- field_cache[last_field - 1].define( 0, 0 );
- last_field = 0;
- }
- else
- {
- fputs( "ERROR: FIELDS contains illegal characters\n", stderr );
- return true;
- }
- }while( *fields );
- field_cache.shrink_to_fit();
- return false;
-}
-
-void print_help( char const* pgm, FILE* target, opt_desc_t const* start, opt_desc_t const* end )
-{
- fputs( "Usage: ", target );
- fputs( pgm, target );
- fputs( " [OPTIONS]\n"
- "Description:\n"
- "\tThis program reads stdin and when consecutive lines have specific fields all\n"
- "\tcontaining the same value, prints them replacing the newline character by\n"
- "\tthe 1st character in FIELD_SEP.\n"
- "\tFields are delimited by the FIELD_SEP environment variable. If not defined,\n"
- "\t\" \\t\" is used instead (see isblank(3)).\n"
- "\tFields to use are defined by the environment variable FIELDS, which only\n"
- "\tuse unsigned decimal integers separated by commas, other characters makes the\n"
- "\tvalue invalid.\n"
- "\tIf FIELDS is not defined or invalid, exits with an error.\n"
- "\tEmpty field indexes (\"1,,3\") are ignored (will resolve in \"1,3\").\n"
- "\tDo not work if input is not in line mode.\n"
- "\tLine separator is defined by ENTRY_SEP, or \"\\n\" if not defined.\n"
- "Options:\n"
- , target );
- print_opts( target, start, end );
-}
-
diff --git a/src/optparser.cpp b/src/optparser.cpp
@@ -1,121 +0,0 @@
-#include <string.h>
-#include <assert.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <stdint.h>
-#include <errno.h>
-#include <limits.h>
-
-#include "optparser.hpp"
-#include "utils.hpp"
-
-// those strings must be defined in same order than the enum
-// Trick: use vim's block insertion to check them
-char const *parse_error_msgs[] =
-{
- /*NONE */"no error",
- /*IGNORED */"not an option",
- /*MAX_COUNT */"count overflow",
- /*SET_NO_VAL */"this option needs a value",
- /*SET_VAL_IGN*/"this option does not needs a value",
- /*SET_FAIL */"failed to parse the value",
- /*BAD_ARGS */"application bug: bad arguments to call",
- /*BAD_SETTER */"application bug: bad values in option",
-};
-
-parse_error_t parse_cmd_opt( char const* arg, opt_desc_t* start, opt_desc_t const* end )
-{
- BUG_CHECK( !( arg && start && end && end > start ), BAD_ARGS );
-
- if( arg[0] != '-' || arg[1] == 0 )
- {
- return IGNORED;
- }
-
- opt_desc_t* opt = start;
- char const* argv = arg + 2;
-
- //search for option (stores in opt)
- if( arg[1] == '-' ) //long
- {
- char const* name = argv;
- size_t name_sz;
- argv = strchr( name, '=' );
- if( argv )
- {
- assert( argv > name );
- name_sz = static_cast<size_t>( argv - name );
- ++argv;
- }
- else
- {
- name_sz = strlen( name );
- }
-
- for( ; true
- && opt != end
- && 0 != strncmp( name, opt->option, name_sz )
- && name_sz != strlen( opt->option ); ++opt )
- {
- }
- }
- else //short
- {
- uint32_t name = static_cast<uint8_t>( arg[1] );
- if( name & 0x80 )
- {
- assert( !( name & 0x80 ) && "TODO: utf-8 support" );
- abort();
- }
- for( ; opt != end && opt->short_option != name; ++opt )
- {
- }
- }
-
- //BUG_CHECK( ( opt->set && !opt->value ) || ( !opt->set && opt->value ), BAD_SETTER );
-
- if( opt == end )
- {
- return IGNORED;
- }
-
- if( opt->value && !empty_array( argv ) )
- {
- return SET_NO_VAL;
- }
-
- if( !opt->value && empty_array( argv ) )
- {
- return SET_VAL_IGN;
- }
-
- if( opt->set && opt->set( opt->value, argv ) )
- {
- return SET_FAIL;
- }
-
- if( opt->count < UINT32_MAX )
- {
- ++opt->count;
- }
- return opt->count == UINT32_MAX ? MAX_COUNT : NONE;
-}
-
-void print_opts( FILE* target, opt_desc_t const* start, opt_desc_t const* end )
-{
- char short_name[] = ",-????";
- for( ; start != end; ++start )
- {
- memcpy( short_name + 2, &start->short_option, sizeof( start->short_option ) );
- fprintf( target, "\t* --%s%s: %s",
- start->option,
- start->short_option ? short_name : "",
- start->description
- );
- if( start->show && start->show( start->value, target ) )
- {
- abort();
- }
- fputc( '\n', target );
- }
-}