Converting fixed-width text file to CSV in C












4














I'm trying to get reacquainted with C after years of higher-level languages.



I wrote a small fixed-width to CSV converter. It works (for ASCII at least) and it's quite fast, but I wonder if I missed anything or if it can be made even faster.



I'm reading the file line by line and process the desired columns (some can be skipped), placing them into an output 8K buffer. I'm using one function to do the substring, appending and trimming trailing space (just the space, in this case I don't want to bother testing for other whitespace characters).



#include <ctype.h>

// number of columns to process
#define COLS 3
#define LINE_SIZE 256
#define BUFFER_SIZE 8192

#define INFILE "in.txt"
#define OUTFILE "out.csv"

size_t RANGES[COLS][2] = {{0, 6}, {6, 20}, {29, 3}};

/*
* Copy from source to destination, up to len chars, trimming trailing spaces
* Returns number of chars actually copied
*/
int trimcpy(char *destination, char *source, size_t len) {
// trim spaces from the end - we only care about the space char
while (len>0 && source[len-1]==' ')
len--;

int i = 0;
while (i<len && *source != '') {
*destination++ = *source++;
i++;
}
*destination = '';

return i;
}

int main(void) {
FILE *rfp;
FILE *wfp;

char line[LINE_SIZE];
char out[BUFFER_SIZE];

rfp = fopen(INFILE, "r");
if (rfp == NULL)
exit(EXIT_FAILURE);

wfp = fopen(OUTFILE, "w");
if (wfp == NULL)
exit(EXIT_FAILURE);

int p = 0;

// fgets is 4x faster than getline!
while (fgets(line, LINE_SIZE, rfp) != NULL) {
// write buffer if almost full (largest column is 20 chars)
if (p > BUFFER_SIZE - 20) {
fputs(out, wfp);
p = 0;
}

// go through the columns
for (int i=0; i<COLS; i++) {
p += trimcpy(out+p, line+RANGES[i][0], RANGES[i][1]);
p += trimcpy(out+p, i<COLS-1 ? "," : "n", 1);
}
}

// write any remaining data in buffer
fputs(out, wfp);

fclose(rfp);
fclose(wfp);

exit(EXIT_SUCCESS);
}









share|improve this question









New contributor




Armand is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.

























    4














    I'm trying to get reacquainted with C after years of higher-level languages.



    I wrote a small fixed-width to CSV converter. It works (for ASCII at least) and it's quite fast, but I wonder if I missed anything or if it can be made even faster.



    I'm reading the file line by line and process the desired columns (some can be skipped), placing them into an output 8K buffer. I'm using one function to do the substring, appending and trimming trailing space (just the space, in this case I don't want to bother testing for other whitespace characters).



    #include <ctype.h>

    // number of columns to process
    #define COLS 3
    #define LINE_SIZE 256
    #define BUFFER_SIZE 8192

    #define INFILE "in.txt"
    #define OUTFILE "out.csv"

    size_t RANGES[COLS][2] = {{0, 6}, {6, 20}, {29, 3}};

    /*
    * Copy from source to destination, up to len chars, trimming trailing spaces
    * Returns number of chars actually copied
    */
    int trimcpy(char *destination, char *source, size_t len) {
    // trim spaces from the end - we only care about the space char
    while (len>0 && source[len-1]==' ')
    len--;

    int i = 0;
    while (i<len && *source != '') {
    *destination++ = *source++;
    i++;
    }
    *destination = '';

    return i;
    }

    int main(void) {
    FILE *rfp;
    FILE *wfp;

    char line[LINE_SIZE];
    char out[BUFFER_SIZE];

    rfp = fopen(INFILE, "r");
    if (rfp == NULL)
    exit(EXIT_FAILURE);

    wfp = fopen(OUTFILE, "w");
    if (wfp == NULL)
    exit(EXIT_FAILURE);

    int p = 0;

    // fgets is 4x faster than getline!
    while (fgets(line, LINE_SIZE, rfp) != NULL) {
    // write buffer if almost full (largest column is 20 chars)
    if (p > BUFFER_SIZE - 20) {
    fputs(out, wfp);
    p = 0;
    }

    // go through the columns
    for (int i=0; i<COLS; i++) {
    p += trimcpy(out+p, line+RANGES[i][0], RANGES[i][1]);
    p += trimcpy(out+p, i<COLS-1 ? "," : "n", 1);
    }
    }

    // write any remaining data in buffer
    fputs(out, wfp);

    fclose(rfp);
    fclose(wfp);

    exit(EXIT_SUCCESS);
    }









    share|improve this question









    New contributor




    Armand is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
    Check out our Code of Conduct.























      4












      4








      4







      I'm trying to get reacquainted with C after years of higher-level languages.



      I wrote a small fixed-width to CSV converter. It works (for ASCII at least) and it's quite fast, but I wonder if I missed anything or if it can be made even faster.



      I'm reading the file line by line and process the desired columns (some can be skipped), placing them into an output 8K buffer. I'm using one function to do the substring, appending and trimming trailing space (just the space, in this case I don't want to bother testing for other whitespace characters).



      #include <ctype.h>

      // number of columns to process
      #define COLS 3
      #define LINE_SIZE 256
      #define BUFFER_SIZE 8192

      #define INFILE "in.txt"
      #define OUTFILE "out.csv"

      size_t RANGES[COLS][2] = {{0, 6}, {6, 20}, {29, 3}};

      /*
      * Copy from source to destination, up to len chars, trimming trailing spaces
      * Returns number of chars actually copied
      */
      int trimcpy(char *destination, char *source, size_t len) {
      // trim spaces from the end - we only care about the space char
      while (len>0 && source[len-1]==' ')
      len--;

      int i = 0;
      while (i<len && *source != '') {
      *destination++ = *source++;
      i++;
      }
      *destination = '';

      return i;
      }

      int main(void) {
      FILE *rfp;
      FILE *wfp;

      char line[LINE_SIZE];
      char out[BUFFER_SIZE];

      rfp = fopen(INFILE, "r");
      if (rfp == NULL)
      exit(EXIT_FAILURE);

      wfp = fopen(OUTFILE, "w");
      if (wfp == NULL)
      exit(EXIT_FAILURE);

      int p = 0;

      // fgets is 4x faster than getline!
      while (fgets(line, LINE_SIZE, rfp) != NULL) {
      // write buffer if almost full (largest column is 20 chars)
      if (p > BUFFER_SIZE - 20) {
      fputs(out, wfp);
      p = 0;
      }

      // go through the columns
      for (int i=0; i<COLS; i++) {
      p += trimcpy(out+p, line+RANGES[i][0], RANGES[i][1]);
      p += trimcpy(out+p, i<COLS-1 ? "," : "n", 1);
      }
      }

      // write any remaining data in buffer
      fputs(out, wfp);

      fclose(rfp);
      fclose(wfp);

      exit(EXIT_SUCCESS);
      }









      share|improve this question









      New contributor




      Armand is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
      Check out our Code of Conduct.











      I'm trying to get reacquainted with C after years of higher-level languages.



      I wrote a small fixed-width to CSV converter. It works (for ASCII at least) and it's quite fast, but I wonder if I missed anything or if it can be made even faster.



      I'm reading the file line by line and process the desired columns (some can be skipped), placing them into an output 8K buffer. I'm using one function to do the substring, appending and trimming trailing space (just the space, in this case I don't want to bother testing for other whitespace characters).



      #include <ctype.h>

      // number of columns to process
      #define COLS 3
      #define LINE_SIZE 256
      #define BUFFER_SIZE 8192

      #define INFILE "in.txt"
      #define OUTFILE "out.csv"

      size_t RANGES[COLS][2] = {{0, 6}, {6, 20}, {29, 3}};

      /*
      * Copy from source to destination, up to len chars, trimming trailing spaces
      * Returns number of chars actually copied
      */
      int trimcpy(char *destination, char *source, size_t len) {
      // trim spaces from the end - we only care about the space char
      while (len>0 && source[len-1]==' ')
      len--;

      int i = 0;
      while (i<len && *source != '') {
      *destination++ = *source++;
      i++;
      }
      *destination = '';

      return i;
      }

      int main(void) {
      FILE *rfp;
      FILE *wfp;

      char line[LINE_SIZE];
      char out[BUFFER_SIZE];

      rfp = fopen(INFILE, "r");
      if (rfp == NULL)
      exit(EXIT_FAILURE);

      wfp = fopen(OUTFILE, "w");
      if (wfp == NULL)
      exit(EXIT_FAILURE);

      int p = 0;

      // fgets is 4x faster than getline!
      while (fgets(line, LINE_SIZE, rfp) != NULL) {
      // write buffer if almost full (largest column is 20 chars)
      if (p > BUFFER_SIZE - 20) {
      fputs(out, wfp);
      p = 0;
      }

      // go through the columns
      for (int i=0; i<COLS; i++) {
      p += trimcpy(out+p, line+RANGES[i][0], RANGES[i][1]);
      p += trimcpy(out+p, i<COLS-1 ? "," : "n", 1);
      }
      }

      // write any remaining data in buffer
      fputs(out, wfp);

      fclose(rfp);
      fclose(wfp);

      exit(EXIT_SUCCESS);
      }






      performance c csv






      share|improve this question









      New contributor




      Armand is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
      Check out our Code of Conduct.











      share|improve this question









      New contributor




      Armand is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
      Check out our Code of Conduct.









      share|improve this question




      share|improve this question








      edited Dec 25 at 14:39









      200_success

      128k15150412




      128k15150412






      New contributor




      Armand is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
      Check out our Code of Conduct.









      asked Dec 25 at 10:18









      Armand

      233




      233




      New contributor




      Armand is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
      Check out our Code of Conduct.





      New contributor





      Armand is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
      Check out our Code of Conduct.






      Armand is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
      Check out our Code of Conduct.






















          2 Answers
          2






          active

          oldest

          votes


















          2














          Consider const char *



          const char *source allows the function to be used with const strings, better conveys code's functionality and more readily allows for optimizations.



          // int trimcpy(char *destination, char *source, size_t len) {
          int trimcpy(char *destination, const char *source, size_t len) {


          Undefined behavior



          trimcpy() begins by examining later elements of source even though they are not known to have been assigned. The string may not be as long as len.



          int trimcpy_alt(char *destination, char *source, size_t len) {
          // Suggest memchr() here rather than strlen() to not look too far.
          char *null_character_pos = memchr(source, '', len);
          if (null_character_pos) len = null_character_pos - source;
          ...


          Undefined behavior 2



          In a selective case of an empty file, the first call to fgets() returns NULL and then the following fputs(out, wfp); is UB as out contents are not initialized. Add initialization or assignment.



          char out[BUFFER_SIZE];
          out[0] = ''; // add


          Not trimming end spaces before n



          trimcpy() does not trim spaces just before 'n'. I suspect this in not in line with OP's goals.



          Avoid redundant information



          Redundant information takes more work to maintain. Consider dropping the 3



          // #define COLS 3
          size_t RANGES[2] = {{0, 6}, {6, 20}, {29, 3}};
          #define COLS (sizeof RANGES/sizeof RANGES[0])




          Minor things:



          Mixing types



          Little reason to mix size_t and int types here for array indexing and sizing. Recommend to use just one: size_t (my preference) or 2) int, but not both.



          // From
          int trimcpy(char *destination, char *source, size_t len) {
          ...
          int i = 0;
          while (i<len && *source != '') {

          // To
          size_t trimcpy(char *destination, char *source, size_t len) {
          ...
          size_t i = 0;
          while (i<len && *source != '') {


          main()



            // int p = 0;
          size_t p = 0;




          Subtraction with unsigned types



          If above size_t employed, consider the 2 below: Which works well should a later version of code surprisingly define #define BUFFER_SIZE 19?



          if (p > BUFFER_SIZE - 20) {
          if (p + 20 > BUFFER_SIZE) {



          The first is the same as if (p > SIZE_MAX) { or if (0) {







          share|improve this answer























          • @Armand null_character_pos - source subtracts two char * pointers. The difference is the length from source to null_character_pos, the length of the string.
            – chux
            2 days ago



















          1














          This code:



          int i = 0;
          while (i<len && *source != '') {
          *destination++ = *source++;
          i++;
          }
          *destination = '';


          shouldn't be doing a byte-by-byte copy. (It should also be a for-loop instead of a while loop, but that's beside the point.) Instead, you should probably just call memcpy:



          memcpy(destination, source, len);
          destination[len] = '';


          The reference for fopen says that:




          Upon successful completion, fopen() shall return a pointer to the object controlling the stream. Otherwise, a null pointer shall be returned, and errno shall be set to indicate the error.




          Your invocation here:



          rfp = fopen(INFILE, "r");
          if (rfp == NULL)
          exit(EXIT_FAILURE);


          is throwing away the error information. In the failure block, you should be calling perror to see why exactly the call failed.



          This:



          while (fgets(line, LINE_SIZE, rfp) != NULL) {


          assumes that NULL only happens if an EOF is encountered, but that isn't necessarily the case. You need to check feof, and if it isn't an EOF, then something bad has happened and you need to again call perror and bail.



          A note about the fgets documentation described in the POSIX standard. The "CX" in this text:




          ... [CX] and shall set errno to indicate the error.




          indicates that support for setting errno is in an extension; however, from the same standard:




          The functionality described is an extension to the ISO C standard. Application developers may make use of an extension as it is supported on all POSIX.1-2017-conforming systems.




          So as long as you're targeting a system that doesn't violate POSIX, you should be able to use it. Even if a system violated POSIX and didn't set errno, you should still be checking feof; the condition where fgets returns NULL and errno is set to an error would just never be seen. The worst that would happen is a perror indicating that the system doesn't know what the error is, but you still know that there's an error.



          Lastly: do some light reading here - https://stackoverflow.com/questions/461449/return-statement-vs-exit-in-main



          I don't recommend calling exit at the end of main; simply return.






          share|improve this answer























          • On fopen() error, The C spec says "If the open operation fails, fopen returns a null pointer.". There is no spec concerning errno with that function - unlike the extension in your reference. OP's code is portable because of that. Using "you should be calling perror()" can well be un-informative.
            – chux
            Dec 26 at 9:00












          • The similar advice about "You need to check feof, ... and you need to again call perror and bail." similarly relies on an extension.
            – chux
            Dec 26 at 9:06












          • @chux Edited; I stand by my recommendation but I've added more detail about the standard.
            – Reinderien
            Dec 26 at 15:03











          Your Answer





          StackExchange.ifUsing("editor", function () {
          return StackExchange.using("mathjaxEditing", function () {
          StackExchange.MarkdownEditor.creationCallbacks.add(function (editor, postfix) {
          StackExchange.mathjaxEditing.prepareWmdForMathJax(editor, postfix, [["\$", "\$"]]);
          });
          });
          }, "mathjax-editing");

          StackExchange.ifUsing("editor", function () {
          StackExchange.using("externalEditor", function () {
          StackExchange.using("snippets", function () {
          StackExchange.snippets.init();
          });
          });
          }, "code-snippets");

          StackExchange.ready(function() {
          var channelOptions = {
          tags: "".split(" "),
          id: "196"
          };
          initTagRenderer("".split(" "), "".split(" "), channelOptions);

          StackExchange.using("externalEditor", function() {
          // Have to fire editor after snippets, if snippets enabled
          if (StackExchange.settings.snippets.snippetsEnabled) {
          StackExchange.using("snippets", function() {
          createEditor();
          });
          }
          else {
          createEditor();
          }
          });

          function createEditor() {
          StackExchange.prepareEditor({
          heartbeatType: 'answer',
          autoActivateHeartbeat: false,
          convertImagesToLinks: false,
          noModals: true,
          showLowRepImageUploadWarning: true,
          reputationToPostImages: null,
          bindNavPrevention: true,
          postfix: "",
          imageUploader: {
          brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
          contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
          allowUrls: true
          },
          onDemand: true,
          discardSelector: ".discard-answer"
          ,immediatelyShowMarkdownHelp:true
          });


          }
          });






          Armand is a new contributor. Be nice, and check out our Code of Conduct.










          draft saved

          draft discarded


















          StackExchange.ready(
          function () {
          StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f210307%2fconverting-fixed-width-text-file-to-csv-in-c%23new-answer', 'question_page');
          }
          );

          Post as a guest















          Required, but never shown

























          2 Answers
          2






          active

          oldest

          votes








          2 Answers
          2






          active

          oldest

          votes









          active

          oldest

          votes






          active

          oldest

          votes









          2














          Consider const char *



          const char *source allows the function to be used with const strings, better conveys code's functionality and more readily allows for optimizations.



          // int trimcpy(char *destination, char *source, size_t len) {
          int trimcpy(char *destination, const char *source, size_t len) {


          Undefined behavior



          trimcpy() begins by examining later elements of source even though they are not known to have been assigned. The string may not be as long as len.



          int trimcpy_alt(char *destination, char *source, size_t len) {
          // Suggest memchr() here rather than strlen() to not look too far.
          char *null_character_pos = memchr(source, '', len);
          if (null_character_pos) len = null_character_pos - source;
          ...


          Undefined behavior 2



          In a selective case of an empty file, the first call to fgets() returns NULL and then the following fputs(out, wfp); is UB as out contents are not initialized. Add initialization or assignment.



          char out[BUFFER_SIZE];
          out[0] = ''; // add


          Not trimming end spaces before n



          trimcpy() does not trim spaces just before 'n'. I suspect this in not in line with OP's goals.



          Avoid redundant information



          Redundant information takes more work to maintain. Consider dropping the 3



          // #define COLS 3
          size_t RANGES[2] = {{0, 6}, {6, 20}, {29, 3}};
          #define COLS (sizeof RANGES/sizeof RANGES[0])




          Minor things:



          Mixing types



          Little reason to mix size_t and int types here for array indexing and sizing. Recommend to use just one: size_t (my preference) or 2) int, but not both.



          // From
          int trimcpy(char *destination, char *source, size_t len) {
          ...
          int i = 0;
          while (i<len && *source != '') {

          // To
          size_t trimcpy(char *destination, char *source, size_t len) {
          ...
          size_t i = 0;
          while (i<len && *source != '') {


          main()



            // int p = 0;
          size_t p = 0;




          Subtraction with unsigned types



          If above size_t employed, consider the 2 below: Which works well should a later version of code surprisingly define #define BUFFER_SIZE 19?



          if (p > BUFFER_SIZE - 20) {
          if (p + 20 > BUFFER_SIZE) {



          The first is the same as if (p > SIZE_MAX) { or if (0) {







          share|improve this answer























          • @Armand null_character_pos - source subtracts two char * pointers. The difference is the length from source to null_character_pos, the length of the string.
            – chux
            2 days ago
















          2














          Consider const char *



          const char *source allows the function to be used with const strings, better conveys code's functionality and more readily allows for optimizations.



          // int trimcpy(char *destination, char *source, size_t len) {
          int trimcpy(char *destination, const char *source, size_t len) {


          Undefined behavior



          trimcpy() begins by examining later elements of source even though they are not known to have been assigned. The string may not be as long as len.



          int trimcpy_alt(char *destination, char *source, size_t len) {
          // Suggest memchr() here rather than strlen() to not look too far.
          char *null_character_pos = memchr(source, '', len);
          if (null_character_pos) len = null_character_pos - source;
          ...


          Undefined behavior 2



          In a selective case of an empty file, the first call to fgets() returns NULL and then the following fputs(out, wfp); is UB as out contents are not initialized. Add initialization or assignment.



          char out[BUFFER_SIZE];
          out[0] = ''; // add


          Not trimming end spaces before n



          trimcpy() does not trim spaces just before 'n'. I suspect this in not in line with OP's goals.



          Avoid redundant information



          Redundant information takes more work to maintain. Consider dropping the 3



          // #define COLS 3
          size_t RANGES[2] = {{0, 6}, {6, 20}, {29, 3}};
          #define COLS (sizeof RANGES/sizeof RANGES[0])




          Minor things:



          Mixing types



          Little reason to mix size_t and int types here for array indexing and sizing. Recommend to use just one: size_t (my preference) or 2) int, but not both.



          // From
          int trimcpy(char *destination, char *source, size_t len) {
          ...
          int i = 0;
          while (i<len && *source != '') {

          // To
          size_t trimcpy(char *destination, char *source, size_t len) {
          ...
          size_t i = 0;
          while (i<len && *source != '') {


          main()



            // int p = 0;
          size_t p = 0;




          Subtraction with unsigned types



          If above size_t employed, consider the 2 below: Which works well should a later version of code surprisingly define #define BUFFER_SIZE 19?



          if (p > BUFFER_SIZE - 20) {
          if (p + 20 > BUFFER_SIZE) {



          The first is the same as if (p > SIZE_MAX) { or if (0) {







          share|improve this answer























          • @Armand null_character_pos - source subtracts two char * pointers. The difference is the length from source to null_character_pos, the length of the string.
            – chux
            2 days ago














          2












          2








          2






          Consider const char *



          const char *source allows the function to be used with const strings, better conveys code's functionality and more readily allows for optimizations.



          // int trimcpy(char *destination, char *source, size_t len) {
          int trimcpy(char *destination, const char *source, size_t len) {


          Undefined behavior



          trimcpy() begins by examining later elements of source even though they are not known to have been assigned. The string may not be as long as len.



          int trimcpy_alt(char *destination, char *source, size_t len) {
          // Suggest memchr() here rather than strlen() to not look too far.
          char *null_character_pos = memchr(source, '', len);
          if (null_character_pos) len = null_character_pos - source;
          ...


          Undefined behavior 2



          In a selective case of an empty file, the first call to fgets() returns NULL and then the following fputs(out, wfp); is UB as out contents are not initialized. Add initialization or assignment.



          char out[BUFFER_SIZE];
          out[0] = ''; // add


          Not trimming end spaces before n



          trimcpy() does not trim spaces just before 'n'. I suspect this in not in line with OP's goals.



          Avoid redundant information



          Redundant information takes more work to maintain. Consider dropping the 3



          // #define COLS 3
          size_t RANGES[2] = {{0, 6}, {6, 20}, {29, 3}};
          #define COLS (sizeof RANGES/sizeof RANGES[0])




          Minor things:



          Mixing types



          Little reason to mix size_t and int types here for array indexing and sizing. Recommend to use just one: size_t (my preference) or 2) int, but not both.



          // From
          int trimcpy(char *destination, char *source, size_t len) {
          ...
          int i = 0;
          while (i<len && *source != '') {

          // To
          size_t trimcpy(char *destination, char *source, size_t len) {
          ...
          size_t i = 0;
          while (i<len && *source != '') {


          main()



            // int p = 0;
          size_t p = 0;




          Subtraction with unsigned types



          If above size_t employed, consider the 2 below: Which works well should a later version of code surprisingly define #define BUFFER_SIZE 19?



          if (p > BUFFER_SIZE - 20) {
          if (p + 20 > BUFFER_SIZE) {



          The first is the same as if (p > SIZE_MAX) { or if (0) {







          share|improve this answer














          Consider const char *



          const char *source allows the function to be used with const strings, better conveys code's functionality and more readily allows for optimizations.



          // int trimcpy(char *destination, char *source, size_t len) {
          int trimcpy(char *destination, const char *source, size_t len) {


          Undefined behavior



          trimcpy() begins by examining later elements of source even though they are not known to have been assigned. The string may not be as long as len.



          int trimcpy_alt(char *destination, char *source, size_t len) {
          // Suggest memchr() here rather than strlen() to not look too far.
          char *null_character_pos = memchr(source, '', len);
          if (null_character_pos) len = null_character_pos - source;
          ...


          Undefined behavior 2



          In a selective case of an empty file, the first call to fgets() returns NULL and then the following fputs(out, wfp); is UB as out contents are not initialized. Add initialization or assignment.



          char out[BUFFER_SIZE];
          out[0] = ''; // add


          Not trimming end spaces before n



          trimcpy() does not trim spaces just before 'n'. I suspect this in not in line with OP's goals.



          Avoid redundant information



          Redundant information takes more work to maintain. Consider dropping the 3



          // #define COLS 3
          size_t RANGES[2] = {{0, 6}, {6, 20}, {29, 3}};
          #define COLS (sizeof RANGES/sizeof RANGES[0])




          Minor things:



          Mixing types



          Little reason to mix size_t and int types here for array indexing and sizing. Recommend to use just one: size_t (my preference) or 2) int, but not both.



          // From
          int trimcpy(char *destination, char *source, size_t len) {
          ...
          int i = 0;
          while (i<len && *source != '') {

          // To
          size_t trimcpy(char *destination, char *source, size_t len) {
          ...
          size_t i = 0;
          while (i<len && *source != '') {


          main()



            // int p = 0;
          size_t p = 0;




          Subtraction with unsigned types



          If above size_t employed, consider the 2 below: Which works well should a later version of code surprisingly define #define BUFFER_SIZE 19?



          if (p > BUFFER_SIZE - 20) {
          if (p + 20 > BUFFER_SIZE) {



          The first is the same as if (p > SIZE_MAX) { or if (0) {








          share|improve this answer














          share|improve this answer



          share|improve this answer








          edited Dec 26 at 22:36

























          answered Dec 26 at 9:45









          chux

          12.6k11344




          12.6k11344












          • @Armand null_character_pos - source subtracts two char * pointers. The difference is the length from source to null_character_pos, the length of the string.
            – chux
            2 days ago


















          • @Armand null_character_pos - source subtracts two char * pointers. The difference is the length from source to null_character_pos, the length of the string.
            – chux
            2 days ago
















          @Armand null_character_pos - source subtracts two char * pointers. The difference is the length from source to null_character_pos, the length of the string.
          – chux
          2 days ago




          @Armand null_character_pos - source subtracts two char * pointers. The difference is the length from source to null_character_pos, the length of the string.
          – chux
          2 days ago













          1














          This code:



          int i = 0;
          while (i<len && *source != '') {
          *destination++ = *source++;
          i++;
          }
          *destination = '';


          shouldn't be doing a byte-by-byte copy. (It should also be a for-loop instead of a while loop, but that's beside the point.) Instead, you should probably just call memcpy:



          memcpy(destination, source, len);
          destination[len] = '';


          The reference for fopen says that:




          Upon successful completion, fopen() shall return a pointer to the object controlling the stream. Otherwise, a null pointer shall be returned, and errno shall be set to indicate the error.




          Your invocation here:



          rfp = fopen(INFILE, "r");
          if (rfp == NULL)
          exit(EXIT_FAILURE);


          is throwing away the error information. In the failure block, you should be calling perror to see why exactly the call failed.



          This:



          while (fgets(line, LINE_SIZE, rfp) != NULL) {


          assumes that NULL only happens if an EOF is encountered, but that isn't necessarily the case. You need to check feof, and if it isn't an EOF, then something bad has happened and you need to again call perror and bail.



          A note about the fgets documentation described in the POSIX standard. The "CX" in this text:




          ... [CX] and shall set errno to indicate the error.




          indicates that support for setting errno is in an extension; however, from the same standard:




          The functionality described is an extension to the ISO C standard. Application developers may make use of an extension as it is supported on all POSIX.1-2017-conforming systems.




          So as long as you're targeting a system that doesn't violate POSIX, you should be able to use it. Even if a system violated POSIX and didn't set errno, you should still be checking feof; the condition where fgets returns NULL and errno is set to an error would just never be seen. The worst that would happen is a perror indicating that the system doesn't know what the error is, but you still know that there's an error.



          Lastly: do some light reading here - https://stackoverflow.com/questions/461449/return-statement-vs-exit-in-main



          I don't recommend calling exit at the end of main; simply return.






          share|improve this answer























          • On fopen() error, The C spec says "If the open operation fails, fopen returns a null pointer.". There is no spec concerning errno with that function - unlike the extension in your reference. OP's code is portable because of that. Using "you should be calling perror()" can well be un-informative.
            – chux
            Dec 26 at 9:00












          • The similar advice about "You need to check feof, ... and you need to again call perror and bail." similarly relies on an extension.
            – chux
            Dec 26 at 9:06












          • @chux Edited; I stand by my recommendation but I've added more detail about the standard.
            – Reinderien
            Dec 26 at 15:03
















          1














          This code:



          int i = 0;
          while (i<len && *source != '') {
          *destination++ = *source++;
          i++;
          }
          *destination = '';


          shouldn't be doing a byte-by-byte copy. (It should also be a for-loop instead of a while loop, but that's beside the point.) Instead, you should probably just call memcpy:



          memcpy(destination, source, len);
          destination[len] = '';


          The reference for fopen says that:




          Upon successful completion, fopen() shall return a pointer to the object controlling the stream. Otherwise, a null pointer shall be returned, and errno shall be set to indicate the error.




          Your invocation here:



          rfp = fopen(INFILE, "r");
          if (rfp == NULL)
          exit(EXIT_FAILURE);


          is throwing away the error information. In the failure block, you should be calling perror to see why exactly the call failed.



          This:



          while (fgets(line, LINE_SIZE, rfp) != NULL) {


          assumes that NULL only happens if an EOF is encountered, but that isn't necessarily the case. You need to check feof, and if it isn't an EOF, then something bad has happened and you need to again call perror and bail.



          A note about the fgets documentation described in the POSIX standard. The "CX" in this text:




          ... [CX] and shall set errno to indicate the error.




          indicates that support for setting errno is in an extension; however, from the same standard:




          The functionality described is an extension to the ISO C standard. Application developers may make use of an extension as it is supported on all POSIX.1-2017-conforming systems.




          So as long as you're targeting a system that doesn't violate POSIX, you should be able to use it. Even if a system violated POSIX and didn't set errno, you should still be checking feof; the condition where fgets returns NULL and errno is set to an error would just never be seen. The worst that would happen is a perror indicating that the system doesn't know what the error is, but you still know that there's an error.



          Lastly: do some light reading here - https://stackoverflow.com/questions/461449/return-statement-vs-exit-in-main



          I don't recommend calling exit at the end of main; simply return.






          share|improve this answer























          • On fopen() error, The C spec says "If the open operation fails, fopen returns a null pointer.". There is no spec concerning errno with that function - unlike the extension in your reference. OP's code is portable because of that. Using "you should be calling perror()" can well be un-informative.
            – chux
            Dec 26 at 9:00












          • The similar advice about "You need to check feof, ... and you need to again call perror and bail." similarly relies on an extension.
            – chux
            Dec 26 at 9:06












          • @chux Edited; I stand by my recommendation but I've added more detail about the standard.
            – Reinderien
            Dec 26 at 15:03














          1












          1








          1






          This code:



          int i = 0;
          while (i<len && *source != '') {
          *destination++ = *source++;
          i++;
          }
          *destination = '';


          shouldn't be doing a byte-by-byte copy. (It should also be a for-loop instead of a while loop, but that's beside the point.) Instead, you should probably just call memcpy:



          memcpy(destination, source, len);
          destination[len] = '';


          The reference for fopen says that:




          Upon successful completion, fopen() shall return a pointer to the object controlling the stream. Otherwise, a null pointer shall be returned, and errno shall be set to indicate the error.




          Your invocation here:



          rfp = fopen(INFILE, "r");
          if (rfp == NULL)
          exit(EXIT_FAILURE);


          is throwing away the error information. In the failure block, you should be calling perror to see why exactly the call failed.



          This:



          while (fgets(line, LINE_SIZE, rfp) != NULL) {


          assumes that NULL only happens if an EOF is encountered, but that isn't necessarily the case. You need to check feof, and if it isn't an EOF, then something bad has happened and you need to again call perror and bail.



          A note about the fgets documentation described in the POSIX standard. The "CX" in this text:




          ... [CX] and shall set errno to indicate the error.




          indicates that support for setting errno is in an extension; however, from the same standard:




          The functionality described is an extension to the ISO C standard. Application developers may make use of an extension as it is supported on all POSIX.1-2017-conforming systems.




          So as long as you're targeting a system that doesn't violate POSIX, you should be able to use it. Even if a system violated POSIX and didn't set errno, you should still be checking feof; the condition where fgets returns NULL and errno is set to an error would just never be seen. The worst that would happen is a perror indicating that the system doesn't know what the error is, but you still know that there's an error.



          Lastly: do some light reading here - https://stackoverflow.com/questions/461449/return-statement-vs-exit-in-main



          I don't recommend calling exit at the end of main; simply return.






          share|improve this answer














          This code:



          int i = 0;
          while (i<len && *source != '') {
          *destination++ = *source++;
          i++;
          }
          *destination = '';


          shouldn't be doing a byte-by-byte copy. (It should also be a for-loop instead of a while loop, but that's beside the point.) Instead, you should probably just call memcpy:



          memcpy(destination, source, len);
          destination[len] = '';


          The reference for fopen says that:




          Upon successful completion, fopen() shall return a pointer to the object controlling the stream. Otherwise, a null pointer shall be returned, and errno shall be set to indicate the error.




          Your invocation here:



          rfp = fopen(INFILE, "r");
          if (rfp == NULL)
          exit(EXIT_FAILURE);


          is throwing away the error information. In the failure block, you should be calling perror to see why exactly the call failed.



          This:



          while (fgets(line, LINE_SIZE, rfp) != NULL) {


          assumes that NULL only happens if an EOF is encountered, but that isn't necessarily the case. You need to check feof, and if it isn't an EOF, then something bad has happened and you need to again call perror and bail.



          A note about the fgets documentation described in the POSIX standard. The "CX" in this text:




          ... [CX] and shall set errno to indicate the error.




          indicates that support for setting errno is in an extension; however, from the same standard:




          The functionality described is an extension to the ISO C standard. Application developers may make use of an extension as it is supported on all POSIX.1-2017-conforming systems.




          So as long as you're targeting a system that doesn't violate POSIX, you should be able to use it. Even if a system violated POSIX and didn't set errno, you should still be checking feof; the condition where fgets returns NULL and errno is set to an error would just never be seen. The worst that would happen is a perror indicating that the system doesn't know what the error is, but you still know that there's an error.



          Lastly: do some light reading here - https://stackoverflow.com/questions/461449/return-statement-vs-exit-in-main



          I don't recommend calling exit at the end of main; simply return.







          share|improve this answer














          share|improve this answer



          share|improve this answer








          edited Dec 26 at 15:02

























          answered Dec 26 at 1:42









          Reinderien

          3,189720




          3,189720












          • On fopen() error, The C spec says "If the open operation fails, fopen returns a null pointer.". There is no spec concerning errno with that function - unlike the extension in your reference. OP's code is portable because of that. Using "you should be calling perror()" can well be un-informative.
            – chux
            Dec 26 at 9:00












          • The similar advice about "You need to check feof, ... and you need to again call perror and bail." similarly relies on an extension.
            – chux
            Dec 26 at 9:06












          • @chux Edited; I stand by my recommendation but I've added more detail about the standard.
            – Reinderien
            Dec 26 at 15:03


















          • On fopen() error, The C spec says "If the open operation fails, fopen returns a null pointer.". There is no spec concerning errno with that function - unlike the extension in your reference. OP's code is portable because of that. Using "you should be calling perror()" can well be un-informative.
            – chux
            Dec 26 at 9:00












          • The similar advice about "You need to check feof, ... and you need to again call perror and bail." similarly relies on an extension.
            – chux
            Dec 26 at 9:06












          • @chux Edited; I stand by my recommendation but I've added more detail about the standard.
            – Reinderien
            Dec 26 at 15:03
















          On fopen() error, The C spec says "If the open operation fails, fopen returns a null pointer.". There is no spec concerning errno with that function - unlike the extension in your reference. OP's code is portable because of that. Using "you should be calling perror()" can well be un-informative.
          – chux
          Dec 26 at 9:00






          On fopen() error, The C spec says "If the open operation fails, fopen returns a null pointer.". There is no spec concerning errno with that function - unlike the extension in your reference. OP's code is portable because of that. Using "you should be calling perror()" can well be un-informative.
          – chux
          Dec 26 at 9:00














          The similar advice about "You need to check feof, ... and you need to again call perror and bail." similarly relies on an extension.
          – chux
          Dec 26 at 9:06






          The similar advice about "You need to check feof, ... and you need to again call perror and bail." similarly relies on an extension.
          – chux
          Dec 26 at 9:06














          @chux Edited; I stand by my recommendation but I've added more detail about the standard.
          – Reinderien
          Dec 26 at 15:03




          @chux Edited; I stand by my recommendation but I've added more detail about the standard.
          – Reinderien
          Dec 26 at 15:03










          Armand is a new contributor. Be nice, and check out our Code of Conduct.










          draft saved

          draft discarded


















          Armand is a new contributor. Be nice, and check out our Code of Conduct.













          Armand is a new contributor. Be nice, and check out our Code of Conduct.












          Armand is a new contributor. Be nice, and check out our Code of Conduct.
















          Thanks for contributing an answer to Code Review Stack Exchange!


          • Please be sure to answer the question. Provide details and share your research!

          But avoid



          • Asking for help, clarification, or responding to other answers.

          • Making statements based on opinion; back them up with references or personal experience.


          Use MathJax to format equations. MathJax reference.


          To learn more, see our tips on writing great answers.





          Some of your past answers have not been well-received, and you're in danger of being blocked from answering.


          Please pay close attention to the following guidance:


          • Please be sure to answer the question. Provide details and share your research!

          But avoid



          • Asking for help, clarification, or responding to other answers.

          • Making statements based on opinion; back them up with references or personal experience.


          To learn more, see our tips on writing great answers.




          draft saved


          draft discarded














          StackExchange.ready(
          function () {
          StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f210307%2fconverting-fixed-width-text-file-to-csv-in-c%23new-answer', 'question_page');
          }
          );

          Post as a guest















          Required, but never shown





















































          Required, but never shown














          Required, but never shown












          Required, but never shown







          Required, but never shown

































          Required, but never shown














          Required, but never shown












          Required, but never shown







          Required, but never shown







          Popular posts from this blog

          Список кардиналов, возведённых папой римским Каликстом III

          Deduzione

          Mysql.sock missing - “Can't connect to local MySQL server through socket”