Code not returning line if there is no newline character

177 Views Asked by At

I started coding in C a few months ago. I wrote this code to return each line from a file. It has to work with any buffer size, which right now it does, and it should return each line present in the file. However if I don't have a newline character at the end of my line, my code does not return anything.

So far I believe the issue is in my get_next_line function. I can see that I can get the line without the newline character into the line variable, as well as the stash variable. However, after the code cleans the stash, I believe it enters the if condition (stash[0] == '\0'), therefore ending my program before returning my line which is in my line variable. I added this condition as a way to terminate my main while loop, so if I remove it the code gets stuck on an infinite loop. I have tried other approaches and I managed to get all lines but then it only worked with buffer sizes greater than the size of my file, I did this by changing the i variable to be equal to find_nl(line) + (line[0] != '\0');. In this scenario, for smaller buffers I would get the lines but they would be "broken".

I'm at the very beginning of learning C so I would be grateful for any inputs to improve this code. Thank you in advance.

Here is the program:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

#define BUFFER_SIZE 100

int find_nl(char *stash);
char *get_line(char *stash, char *line);
void ft_clean(char *stash);

char    *get_next_line(int fd)
{
    static char     stash[BUFFER_SIZE + 1];
    char            *line;
    int         n;
    int         i;

    if (fd < 0 || BUFFER_SIZE <= 0)
        return (NULL);
    i = 0;
    n = 0;
    line = NULL;
    while (!i)
    {
        if (!stash[0])
            n = read(fd, stash, BUFFER_SIZE);   
        if (n == -1)
            return (NULL);
        line = get_line(stash, line);
        i = find_nl(stash) + (line == NULL);
        if (stash[0] == '\0')
            return (NULL);
        ft_clean(stash);
    }
    return (line);
}

int find_nl(char *stash)
{
    size_t      i;

    if (stash == NULL)
        return (0);
    i = 0;
    while (stash[i])
    {
        if (stash[i] == '\n')
            return (1);
        i++;
    }
    return (0);
}

char    *get_line(char *stash, char *line)
{
    size_t  len;
    size_t  i;
    size_t  j;
    char    *nline;

    len = 0;
    j = 0;
    while (stash[len] && stash[len] != '\n')
        len++;
    if (line == NULL)
        i = 0;
    else
        while (line[i])
            i++;
    nline = (char *)malloc((len + i + 1) * sizeof(char));
    if (nline == NULL)
        return (NULL);
    while (line && line[j])
    {
        nline[j] = line[j];
        j++;
    }
    i = 0;
    while (i < len)
    {
        nline[j] = stash[i];
        i++;
        j++;
    }
    nline[j] = '\0';
    return (nline);
}

void    ft_clean(char *stash)
{
    size_t  stash_len;
    size_t  len;
    size_t  i;

    len = 0;
    stash_len = 0;
    i = 0;
    if (stash == NULL)
        return ;
    while (stash[len])
    {
        if (stash[len] == '\n')
        {
            len++;
            break ;
        }
        len++;
    }
    while (stash[stash_len] != '\0')
        stash_len++;
    while (i < stash_len - len + 1)
    {
        stash[i] = stash[i + len];
        i++;
    }
    stash[i] = '\0';
}

int main(void) {
    char *line;
    while ((line = get_next_line(0)) != NULL) {
        printf("[%s]\n", line);
        free(line);
    }
    return 0;
}

It works if called as follows:

printf 'abc\n' | ./a

But the following doesn't output anything:

printf 'abc' | ./a
3

There are 3 best solutions below

4
12431234123412341234123 On

Not a direct answer, but i think there is a better approach. Your current code is hard to understand, not very modular and buggy.

You are using malloc() and free() in your code, normally there is nothing wrong with that, but since you have the requirement of

i am obligated by the project to use only the low level system calls

You can't use them.

Instead of using static variables inside the function (get_next_line() in your case for stash and potentially more since that would be required for a better implementation), i would suggest using a struct and passing the pointer to that struct when calling this functions. This would also allow you to use this functions for more than a single file at (quasi) the same time.

Something like this:

struct File_T
{
  int fd;
  char buffer[BUFFER_SIZE];
  size_t currentLineEnd; //position of the first char after the current line
  size_t validChars;
};

void file_init(struct File_T *file, int fd);
const char *file_getNextLine(struct File_T *file);


void file_init(struct File_T *file, int fd)
{
    file->fd=fd;
    file->currentLineEnd=0; 
    file->validChars=0;
}

When you read() from a file, you may receive '\0' bytes but read() itself does not add a '\0' at the end, i.e. read() does not fill a string but a buffer that can contain any data. You need to use the return value of read() to check how many characters where read. I suggest, you read to the buffer after the last valid character (which was set from the last time you read() or at 0) and you can at the amount of read characters to validChars. Also note that read() returns a ssize_t type and not int.

ssize_t n = read(file->fd,&file->buffer[file->validChars],BUFFER_SIZE-file->validChars-1); //-1 so we can always add a '\0'
if( n<0 )
  { /*do some error handling here*/ }
file->validChars+=n;

To avoid malloc() you can search for '\n' in the buffer (till you reach validChars, after that you either have to read more or have to decide that there is no space left in the buffer or you read the last line, then you have to use the next char after the validChars position) and replace it with a '\0', set currentLineEnd to 1 character after that and return a pointer to the start. When file_getNextLine() is called next time, move everything valid from currentLineEnd till validChars to the beginning (with memmove() or your own implementation of it) and repeat the process.

Edit:

If you need to handle lines of arbitrary length, without setting BUFFER_SIZE so large, you could allocate new memory for the string to store and don't use currentLineEnd. Since you don't want to use malloc(), you can reserve a buffer with mmap(). Like this:

char *buffer=mmap(NULL,newBufferSize,PROT_WRITE|PROT_READ,MAP_PRIVATE|MAP_ANONYMOUS,-1,0);

Don't forget to munmap() it later.

6
Amerigo Scamardella On

Maybe this function would help. It's called fgets and you can use it instead of read function.If you can't use just copy the idea behind it.

Function Implementation

Function Documentation

Definition: char *fgets(char *restrict s, int n, FILE *restrict stream);

The fgets() function shall read bytes from stream into the array pointed to by s, until n-1 bytes are read, or a <newline> is read and transferred to s, or an end-of-file condition is encountered. The string is then terminated with a null byte. Upon successful completion, fgets() shall return s. If the stream is at end-of-file, the end-of-file indicator for the stream shall be set and fgets() shall return a null pointer. If a read error occurs, the error indicator for the stream shall be set, fgets() shall return a null pointer.

Combine it with this:

int get_cleaned_line(char str[], int maxlen)  //I never used read, idk fd  what it is
{             //you can use BUFFER_SIZE instead of maxlen
int len = -1;

    if (fgets(str, maxlen, stdin) != NULL)  //I think stdin it's like fd=0(input from keyboard)
    {               //whatever use the proper input file
    len = 0;
    while(str[len] != '\0')
        len++;
        if (len > 0 && str[len-1] == '\n') 
        {
            str[len-1] = '\0';
            len--;
        }

return len; //can be useful
}

I just learned C one year ago and i really hate strings, Hope this helps.

0
ikegami On

It turns out to be rather complicated. Here's my attempt at solving the problem:

read_line.h:

#include <stdbool.h>

#define GET_LINE_BUFFER_SIZE 100

typedef struct {
   int fd;
   int error;
   int eof;
   size_t in_buf;
   char buffer[ GET_LINE_BUFFER_SIZE ];
} ReadLineData;

ReadLineData *ReadLine_new( int fd );
void ReadLine_init( ReadLineData *data, int fd );
void ReadLine_destroy( ReadLineData *data );
void ReadLine_free( ReadLineData *data );

// Returns -1 on error.    *line_ptr is set to NULL.  errno is set.
// Returns  0 on EOF.      *line_ptr is set to NULL.
// Returns +1 on success.  *line_ptr is set to a string to free.
int ReadLine_read_line( ReadLineData *data, char **line_ptr );

read_line.c:

#include <errno.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

#include "read_line.h"

void ReadLine_init( ReadLineData *data, int fd ) {
   data->fd     = fd;
   data->error  = 0;
   data->eof    = 0;
   data->in_buf = 0;
}

// Returns NULL on error.  errno is set.
ReadLineData *ReadLine_new( int fd ) {
   ReadLineData *data = malloc( sizeof( ReadLineData ) );
   if ( data )
      ReadLine_init( data, fd );

   return data;
}

void ReadLine_destroy( ReadLineData *data ) {
   (void)data;  // Nothing to do.
}

void ReadLine_free( ReadLineData *data ) {
   ReadLine_destroy( data );
   free( data );
}

static char *find_lf( ReadLineData *data ) {
   char *p = data->buffer;
   for ( size_t n = data->in_buf; n--; ++p ) {
      if ( *p == '\n' ) {
         return p;
      }
   }

   return NULL;
}

// Returns false on error.  errno is set.
// Returns true on success.
static bool safe_size_add( size_t *acc_ptr, size_t to_add ) {
   if ( to_add > SIZE_MAX - *acc_ptr ) {
      errno = ENOMEM;
      return false;
   }

   return true;
} 

// Returns false on error.  errno is set.
// Returns true on success.
static bool move_to_line(
   ReadLineData *data,
   char **line_ptr,        // in-out. Set to NULL on error.
   size_t *line_len_ptr,   // in-out.
   size_t n
) {
   char *line      = *line_ptr;
   size_t line_len = *line_len_ptr;

   // Calculate new line size.
   // Protect against overflow.
   size_t new_line_len_p1 = line_len;
   if ( !safe_size_add( &new_line_len_p1, n ) )
      goto ERROR;
   if ( !safe_size_add( &new_line_len_p1, 1 ) )
      goto ERROR;

   // Enlarge the buffer.
   char *new_line = realloc( line, new_line_len_p1 );
   if ( !new_line )
      goto ERROR;

   line = new_line;

   // Copy from the buffer.
   memmove( line + line_len , data->buffer, n );
   line_len += n;
   line[ line_len ] = 0;

   // Remove from the buffer.
   data->in_buf -= n;
   memcpy( data->buffer, data->buffer + n, data->in_buf );

   *line_ptr     = line;
   *line_len_ptr = line_len;
   return true;

ERROR:
   free( line );
   *line_ptr = NULL;
   return false;
}

// Returns -1 on error.    *line_ptr is set to NULL.  errno is set.
// Returns  0 on EOF.      *line_ptr is set to NULL.
// Returns +1 on success.  *line_ptr is set to a string to free.
int ReadLine_get_line( ReadLineData *data, char **line_ptr ) {
   *line_ptr = NULL;
   size_t line_len = 0;

   if ( data->eof )
      return 0;

   while ( 1 ) {
      if ( data->in_buf ) {
         char *lf = find_lf( data );
         if ( lf )
            return move_to_line( data, line_ptr, &line_len, lf - data->buffer + 1 ) ? +1 : -1;

         // We didn't find a LF, so the whole buffer is part of the line.
         if ( !move_to_line( data, line_ptr, &line_len, data->in_buf ) )
            return -1;
      }

      // We need to read more.
      ssize_t bytes_read = read( data->fd, data->buffer, GET_LINE_BUFFER_SIZE );
      if ( bytes_read < 0 ) {
         data->eof   = 1;
         data->error = 1;
         free( *line_ptr );
         *line_ptr = NULL;
         return -1;
      }

      if ( bytes_read == 0 ) {
         data->eof = 1;
         return line_len ? +1 : 0;
      }

      data->in_buf = bytes_read;
   }
}

Program:

#include <err.h>
#include <stdio.h>
#include <stdlib.h>

#include "read_line.h"

int main( void ) {
   ReadLineData data;
   ReadLine_init( &data, 0 );

   while ( 1 ) {
      char *line;
      int rv = ReadLine_read_line( &data, &line );
      if ( rv < 0 )
         err( EXIT_FAILURE, "read_line" );
      if ( rv == 0 )
         break;

      printf( "[%s]\n", line );
      free( line );      
   }

   ReadLine_destroy( &data );
}

It uses memcpy, memmove and realloc. If you want to avoids those, the first two are easy to re-implement yourself. realloc would be trickier to re-implement, and would require something like mmap. This is an exercise left to the reader.