Parsing CSV file into 2D array in C

I have. CSV file that reads like this:

SKU,Plant,Qty
40000,ca56,1245
40000,ca81,12553.3
40000,ca82,125.3
45000,ca62,0
45000,ca71,3
45000,ca78,54.9

      

Note. This is my example, but it actually has about 500,000 rows and 3 columns.

I am trying to convert these records to a 2D array so that I can then manipulate the data. You will notice that in my example I just set up a small 10x10 matrix A

to try and get this example to work, before moving on to the real thing.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

const char *getfield(char *line, int num);

int main() {
    FILE *stream = fopen("input/input.csv", "r");
    char line[1000000];
    int A[10][10];
    int i, j = 0;

    //Zero matrix
    for (i = 0; i < 10; i++) {
        for (j = 0; j < 10; j++) {
            A[i][j] = 0;
        }
    }

    for (i = 0; fgets(line, 1000000, stream); i++) {
        while (j < 10) {
            char *tmp = strdup(line);
            A[i][j] = getfield(tmp, j);
            free(tmp);
            j++;
        }
    }
    //print matrix
    for (i = 0; i < 10; i++) {
        for (j = 0; j < 10; j++) {
            printf("%s\t", A[i][j]);
        }
        printf("\n");
    }
}

const char *getfield(char *line, int num) {
    const char *tok;
    for (tok = strtok(line, ",");
         tok && *tok;
         tok = strtok(NULL, ",\n"))
    {
        if (!--num)
            return tok;
    }
    return 0;
}

      

It prints only the "zero" error, and I believe that making a mistake associated with pointers to this line: A[i][j] = getfield(tmp, j)

. I'm just not sure how to fix this.

This work is based almost exclusively on this issue: Read the .CSV file in the C . Any help in adapting this would be much appreciated as it has been a couple of years since I last touched C or external files.

+3


source to share


2 answers


It looks like the commenters have already helped you find several bugs in your code. The problems are pretty ingrained, however. One of the biggest problems is when you are using strings. Strings are, of course, char arrays; this means that the dimension is already in use.

It would probably be better to just use a structure like this:

struct csvTable
{
    char sku[10];
    char plant[10];
    char qty[10];
};

      



This will also allow you to set the columns to the datatypes you want (looks like the SKU might be int, but I don't know the context).

Here's an example of this implementation. I apologize for the mess, it adapted on the fly from what I was already working on.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

// Based on your estimate
// You could make this adaptive or dynamic

#define rowNum 500000

struct csvTable
{
    char sku[10];
    char plant[10];
    char qty[10];
};

// Declare table
struct csvTable table[rowNum];

int main()
{
    // Load file
    FILE* fp = fopen("demo.csv", "r");

    if (fp == NULL)
    {
        printf("Couldn't open file\n");
        return 0;
    }

    for (int counter = 0; counter < rowNum; counter++)
    {
        char entry[100];
        fgets(entry, 100, fp);

        char *sku = strtok(entry, ",");
        char *plant = strtok(NULL, ",");
        char *qty = strtok(NULL, ",");

        if (sku != NULL && plant != NULL && qty != NULL)
        {
            strcpy(table[counter].sku, sku);
            strcpy(table[counter].plant, plant);
            strcpy(table[counter].qty, qty);
        }
        else
        {
            strcpy(table[counter].sku, "\0");
            strcpy(table[counter].plant, "\0");
            strcpy(table[counter].qty, "\0");
        }
    }

    // Prove that the process worked
    for (int printCounter = 0; printCounter < rowNum; printCounter++)
    {
        printf("Row %d: column 1 = %s, column 2 = %s, column 3 = %s\n", 
            printCounter + 1, table[printCounter].sku, 
            table[printCounter].plant, table[printCounter].qty);
    }

    // Wait for keypress to exit
    getchar();

}

      

+1


source


There are several problems with the code:

  • In the second loop, you don't stop reading the file after 10 lines, so you are trying to store the elements outside of the array A

    .
  • You don't reset j

    until 0

    at the beginning of the loop while (j < 10)

    . j

    matters 10

    at the end of the initialization loop, so you don't actually store anything in the matrix.
  • The matrix A

    must be a two-dimensional array char *

    , not int

    or potentially an array of structures.


Here's a simpler version with a dedicated array of structures:

#include <stdio.h>
#include <stdlib.h>

typedef struct item_t {
    char SKU[20];
    char Plant[20];
    char Qty[20];
};

int main(void) {
    FILE *stream = fopen("input/input.csv", "r");
    char line[200];
    int size = 0, len = 0, i, c;
    item_t *A = NULL;

    if (stream) {
        while (fgets(line, sizeof(line), stream)) {
            if (len == size) {
                size = size ? size * 2 : 1000;
                A = realloc(A, sizeof(*A) * size);
                if (A == NULL) {
                    fprintf(stderr, "out of memory for %d items\n", size);
                    return 1;
                }
            }
            if (sscanf(line, "%19[^,\n],%19[^,\n],%19[^,\n]%c",
                       A[len].SKU, A[len].Plant, A[len].Qty, &c) != 4
            ||  c != '\n') {
                fprintf(stderr, "invalid format: %s\n, line);
            } else {
                len++;
            }
        }
        fclose(stream);

        //print matrix
        for (i = 0; i < len; i++) {
            printf("%s,%s,%s\n", A[i].SKU, A[i].Plant, A[i].Qty);
        }
        free(A);
    }
    return 0;
}

      

+1


source







All Articles