I'm writing a small command line tool to extract and sort comments in Z80 assembly sources. I'm blocked with a random seg fault (on MacOS Ventura), lldb shows always the same line, a translation error in calloc. Seg fault appear sometimes after 1,2...8 iterations (of i) and rarely it does not. No warning during compilation. It is disappointing.
ndx[i]->text = (char *) calloc(tsz, sizeof(char));
I'd be happy if something can help me understanding the mistake I've made.
Thanks
// AX
// my Z80 assembler API extractor
//
// NOTE:
// -----
//
// API extractor will extract all the comment lines following ;;;
//
// The string following the trigger sequence of LF+';'+';'+';'
// is the sorting label of the paragraph of comments
//
// All the files in the current directory and its childhood are scanned
// Output is sorted on the label used to build the TOC (unimplemented)
//
// Output format is Markdown (unimplemented)
//
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/errno.h> // lets us directly access errno
// max constants
#define C_MAX 4096 // buffer for fgets (one line from asm source file)
#define I_MAX 1000 // number of entries
#define N_MAX 4096 // buffer for entry name
#define T_MAX 32768 // buffer for entry text
// entry counter
int i = 0;
// entry structure
struct entry
{
char *name;
char *text;
};
// array of pointers to entries
struct entry *ndx[I_MAX];
//
// function to extract API comments in a given filename
// ----------------------------------------------------
//
// trigger is a CRLF+";;;" followed by entry name (to be sorted in a TOC)
// next lines with comment (beginning with ';') are entry text
// any other line ends the text
//
int ApiExtract(char *argv)
{
// file to parse
FILE *fp = fopen(argv, "r");
if(fp == NULL) return 0;
// state : 0=detect mode, 1=scanning mode
int scanning = 0;
// sizes
int nsz = 0;
int tsz = 0;
// buffers
char chunk[C_MAX];
char nbuf[N_MAX];
char tbuf[T_MAX];
// reset buffers
memset(chunk, 0, C_MAX);
memset(nbuf, 0, N_MAX);
memset(tbuf, 0, T_MAX);
while(fgets(chunk, C_MAX-1, fp) != NULL)
{
if (!scanning)
{
// detect mode, look for trigger
// ;;; trigger sequence for API name
if (chunk[0] == ';' && chunk[1] == ';' && chunk[2] == ';')
{
// trigger found, switch to scanning mode
scanning = 1;
// store name (for TOC and entry sorting)
if (chunk[3]) strncpy(nbuf, &chunk[3], N_MAX-4);
}
}
else
{
// scanning mode
if (chunk[0] == ';')
{
// append line of text
strncat( tbuf, &chunk[1], T_MAX-strlen(tbuf)-1);
}
else
{
// text done, store name and text in a new entry
ndx[i] = (struct entry *) calloc(1, sizeof(struct entry));
// set allocation sizes
nsz = strlen(nbuf)+1;
tsz = strlen(tbuf)+1;
// allocate and store name
ndx[i]->name = (char *) calloc(nsz, sizeof(char));
strncpy( ndx[i]->name, nbuf, N_MAX-1);
// allocate and store text
ndx[i]->text = (char *) calloc(tsz, sizeof(char));
strncpy(ndx[i]->text, tbuf, T_MAX-1);
// clear buffers for next iteration
memset(nbuf, 0, N_MAX);
memset(tbuf, 0, T_MAX);
// next entry
i++;
if (i>I_MAX) return -1;
// switch to detect mode
scanning = 0;
}
}
}
fclose(fp);
return 0;
}
}
strncpy()fills the complete buffer with0/'\0', not just one byte after the string. The size of the buffer is given by the last argument.You say the target buffer has the size of
N_MAX-1, but the buffer has only the size ofnsz, which can be shorter thanN_MAX-1. Sostrncpy()writes after the buffer and you create UB.