Why does sscanf not extract all the numbers from this string format properly?

131 Views Asked by At

I am parsing GNSS data and the module QuecTel BG96 I am using has UART serial interface. When trying to extract the data associated with the message most of it can be extracted using sscanf, aside from the time.

Here is an example message, the same used in the code below:

"\r\n+QGPSLOC: 010842.0,32.04415,5.31028,1.7,55.0,2,150.36,0.0,0.0,
060224,03\r\n\r\nOK\r\n"

Just for completeness, here is the interpretation of the fields, as in the manual:

+QGPSLOC:<UTC>,<latitude>,<longitude>,<hdop>,<altitude>,<fix>,<cog>,<spkm>,<spkn>,<date>,<nsat> OK
  • UTC is UTC time. Format: hhmmss.sss (Quoted from GPGGA sentence).

The issue is that the first set of numbers, representing the time as hhmmss.sss, which when extracted are always 0.

Expected output:

 240206-010842.000,       32.04415,         5.31028,      1.7,       55.0, 0002,       150.6000,         0.0000, 00000003

My code:

#include <inttypes.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>

const char* utc_datetime_format =
    "%02u%02u%02u-%02u%02u%02u.%03u";
const char* print_format =
    "%18.18s, %14.5f, %15.5f, %8.1f, %10.1f, %04u, %14.4f, "
    "%14.4f, %08u\n";

typedef struct
{
    uint64_t year : 12;
    uint64_t month : 4;
    uint64_t day : 8;
    uint64_t hour : 8;
    uint64_t minutes : 8;
    uint64_t seconds : 8;
    uint64_t msec : 16;
} DateAndTime_Compressed;

typedef struct
{
    float latitude;
    float longitude;
} GNSS_Coordinates;

typedef struct
{
    GNSS_Coordinates       coordinates;
    float                  horizontal_precision;
    float                  altitude;
    float                  course_over_ground;
    float                  speed_over_ground_mps;  // m/s
    DateAndTime_Compressed utc_time;
    uint8_t                fix_type;
    uint8_t                num_satellites;
} GNSS_Data;

int GNSS_ParseData(char* buf, GNSS_Data* data)
{
    const char* format =
        "%s %2" PRIu16 "%2" PRIu16 "%2" PRIu16 ".%3" PRIu16
        ",%f,%f,%f,%f,%" PRIu8 ",%3" PRIu16 ".%2" PRIu16
        ",%f,%f,%2" PRIu8 "%2" PRIu8 "%2" PRIu8 ",%" PRIu32
        "%s";
    char front_padding[128] = {0}, back_padding[128] = {0};

    uint16_t year, month, day, hour, minute, second,
        millisecond, cog_deg, cog_min;
    float tmp;

    // For some reason cannot extract hhmmss.sss from
    // message directly
    int ret = sscanf(
        buf, format, front_padding, &hour, &minute, &second,
        &millisecond, &(data->coordinates.latitude),
        &(data->coordinates.longitude),
        &data->horizontal_precision, &data->altitude,
        &data->fix_type, &cog_deg, &cog_min,
        &data->speed_over_ground_mps, &tmp, &day, &month,
        &year, &data->num_satellites, back_padding);
    if (ret != 19)
    {
        // Handle Error (but no error occurs here)
        return -1;
    }
    data->utc_time = (DateAndTime_Compressed){
        .year    = year,
        .month   = month,
        .day     = day,
        .hour    = hour,
        .minutes = minute,
        .seconds = second,
        .msec    = millisecond};

    data->speed_over_ground_mps *=
        (1000.0f / 3600.0f);  // kph to mps
    data->course_over_ground =
        (float)cog_deg + (((float)cog_min) / 60.0f) /
                             100.0f;  // ddd.mm to ddd.dd

    return 0;
}

int main(int argc, char** argv)
{
    char msg[] =
        "\r\n+QGPSLOC: "
        "010842.0,32.04415,5.31028,1.7,55.0,2,150.36,0.0,0."
        "0,060224,03\r\n\r\nOK\r\n";

    GNSS_Data gnss_data = {0};

    int ret = GNSS_ParseData(
        msg,
        &gnss_data);  // ret = 0 but data.utc_time wrong
                      // (dates are correct, times wrong)
    if (ret != 0)
    {
        printf("Failed to parse data\n");
        exit(-1);
    }
    char utc_date_string[64] = {0};
    snprintf(
        utc_date_string, 32, utc_datetime_format,
        gnss_data.utc_time.year, gnss_data.utc_time.month,
        gnss_data.utc_time.day, gnss_data.utc_time.hour,
        gnss_data.utc_time.minutes,
        gnss_data.utc_time.seconds,
        gnss_data.utc_time.msec);

    printf(
        print_format, utc_date_string,
        gnss_data.coordinates.latitude,
        gnss_data.coordinates.longitude,
        gnss_data.horizontal_precision, gnss_data.altitude,
        gnss_data.fix_type, gnss_data.course_over_ground,
        gnss_data.speed_over_ground_mps,
        gnss_data.num_satellites);

    return 0;
}
2

There are 2 best solutions below

1
the busybee On BEST ANSWER

There are two types of error in your format string for sscanf():

  1. You use PRN... instead of SCN.... The former are for the printf() family, and you need to use the latter. See below for the differences.
  2. The used widths of the format specifiers do not match the widths of the variables.

So the correct format is:

    const char* format =
        "%s %2" SCNu16 "%2" SCNu16 "%2" SCNu16 ".%3" SCNu16
        ",%f,%f,%f,%f,%" SCNu8 ",%3" SCNu16 ".%2" SCNu16
        ",%f,%f,%2" SCNu16 "%2" SCNu16 "%2" SCNu16 ",%" SCNu8
        "%s";

NOTE: Please use always the maximum set of warning flags for your compiler.


Now, why do you get zeroes for some time variables?

For this we assume that you are using a standard compiler on a current system, so I use GCC on Windows 10 for investigation.

This little test program shows the differences between the format constants:

#include <inttypes.h>
#include <stdio.h>

int main(void) {
  printf("PRIu16 = \"%s\"\n", PRIu16);
  printf("SCNu16 = \"%s\"\n", SCNu16);
  uint16_t canary1 = 0x1122, value, canary2 = 0x5566;
  // 0x3344 = 13124
  printf("sscanf() = %d\n", sscanf("13124", "%" PRIu16, &value));
  printf("canary1 @%p = %04X\n", (void*)&canary1, canary1);
  printf("value   @%p = %04X\n", (void*)&value, value);
  printf("canary2 @%p = %04X\n", (void*)&canary2, canary2);
  return 0;
}

Compilation with a common set of warning flags tells us already that something is wrong:

> gcc -Wall -pedantic formats.c -o formats.exe
formats.c: In function 'main':
formats.c:9:45: warning: format '%u' expects argument of type 'unsigned int *', but argument 3 has type 'uint16_t *' {aka 'short unsigned int *'} [-Wformat=]
   printf("sscanf() = %d\n", sscanf("13124", "%" PRIu16, &value));
                                             ^~~         ~~~~~~
In file included from formats.c:1:
C:/Program Files/mingw-w64/x86_64-8.1.0-posix-seh-rt_v6-rev0/mingw64/x86_64-w64-mingw32/include/inttypes.h:92:17: note: format string is defined here
 #define PRIu16 "u"
formats.c:9:45: warning: format '%u' expects argument of type 'unsigned int *', but argument 3 has type 'uint16_t *' {aka 'short unsigned int *'} [-Wformat=]
   printf("sscanf() = %d\n", sscanf("13124", "%" PRIu16, &value));
                                             ^~~         ~~~~~~
In file included from formats.c:1:
C:/Program Files/mingw-w64/x86_64-8.1.0-posix-seh-rt_v6-rev0/mingw64/x86_64-w64-mingw32/include/inttypes.h:92:17: note: format string is defined here
 #define PRIu16 "u"

And the result reproduces your observation, canary1 is zeroed:

> formats.exe
PRIu16 = "u"
SCNu16 = "hu"
sscanf() = 1
canary1 @000000000061FE1E = 0000
value   @000000000061FE1C = 3344
canary2 @000000000061FE1A = 5566

We see that PRIu16 is actually "u", which sscanf() uses to write to an unsigned int. On our systems, this has usually the same width as uint32_t. So sscanf() happily writes 4 bytes, overwriting canary1.

Which variable gets overwritten depends on the order of the variables in the memory and the sequence by which the values are stored. None of these order/sequence are defined by the standard, so I used two canaries on both sides.

This explains the zeroes you observe.


But why is PRIu16 associated with unsigned int, when SCNu16 is associated with unsigned short int?

The secret is the integer promotion of integer arguments on variadic functions. And printf() is such a variadic function.

If you give a uint16_t value to printf(), it will be converted to an int. As int commonly suffices to hold all potential values of uint16_t, the conversion result is not an unsigned int. However, the bit pattern is identical for both types in the value range of uint16_t.

So printf() needs "u" to print such values correctly.

On the other hand, scanf() must not write beyond the space of the provided variable. So it needs to know the width, which leads to "hu".

0
chqrlie On

As documented in the busybee's answer, the main problems are

  • the use of PRN... instead of SCN... for the sscanf format,
  • the inconsistent widths of the format specifiers that do not match the widths of the variables.

Note also these other problems:

  • using a string variable instead of a string literal for the format argument in printf and scanf functions prevents the compiler from type-checking the arguments against the format string. This would have spotted the problem in gcc and clang with the -Wall.

  • the way you parse the time and date numbers is unreliable: for example a time expressed with 5 digits and 2 decimals as 12345.12 will be interpreted as 12:34:05.012 instead of 1:23:45.120. You should use a different approach: parse the time and date fields as strings of digits, pad them with leading zeroes and reparse them with a fixed format, handle the number of digits in the milliseconds field explicitly.

  • it would be simpler and more reliable to use int and unsigned temporary variables to parse the input string, then perform extra validations before storing the values into the structure.

  • to avoid unit conversion issues, use a local variable with an explicit name for the ground speed and only store the converted value into the structure.

  • I don't know if the course_over_ground can be negative, but in this case, the minutes must be negated in the conversion formula. Furthermore, the conversion of the minutes part is incorrect: you should just use (float)cog_deg + (float)cog_min / 60.0f.

#include <stdint.h>
#include <stdio.h>
#include <string.h>

#define UTC_DATETIME_FORMAT  "%02u%02u%02u-%02u%02u%02u.%03u"
#define PRINT_FORMAT  "%18.18s, %14.5f, %15.5f, %8.1f, %10.1f, %04u, %14.4f, %14.4f, %08u\n"

typedef struct {
    uint64_t year : 12;
    uint64_t month : 4;
    uint64_t day : 8;
    uint64_t hour : 8;
    uint64_t minutes : 8;
    uint64_t seconds : 8;
    uint64_t msec : 16;
} DateAndTime_Compressed;

typedef struct {
    float latitude;
    float longitude;
} GNSS_Coordinates;

typedef struct {
    GNSS_Coordinates       coordinates;
    float                  horizontal_precision;
    float                  altitude;
    float                  course_over_ground;
    float                  speed_over_ground_mps;  // m/s
    DateAndTime_Compressed utc_time;
    uint8_t                fix_type;
    uint8_t                num_satellites;
} GNSS_Data;

int GNSS_ParseData(const char *buf, GNSS_Data *data)
{
    char back_padding[128] = { 0 };
    char hhmmss_buf[5+6+1] = "00000";
    char ddmmyy_buf[5+6+1] = "00000";
    char msec_buf[4] = { 0 };
    unsigned year, month, day, hour, minute, second, msec;
    int cog_deg;
    unsigned fix_type, cog_min, num_satellites;
    float speed_over_ground_kph, tmp;

    int ret = sscanf(buf,
                     "%*127s %6[0-9].%3[0-9],%f,%f,%f,%f,%u,%d.%u,%f,%f,%6[0-9],%u %127s",
                     hhmmss_buf + 5, msec_buf,
                     &data->coordinates.latitude, &data->coordinates.longitude,
                     &data->horizontal_precision, &data->altitude,
                     &fix_type, &cog_deg, &cog_min, &speed_over_ground_kph,
                     &tmp, ddmmyy_buf + 5, &num_satellites, back_padding);
    if (ret != 14)
        return -1;

    // pad the time string with leading zeroes
    sscanf(hhmmss_buf + strlen(hhmmss_buf) - 6,
           "%02u%02u%02u", &hour, &minute, &second);
    if (second >= 60 || minute >= 60 || hour >= 24)
        return -3;
    // pad the date string with leading zeroes
    sscanf(ddmmyy_buf + strlen(ddmmyy_buf) - 6,
           "%02u%02u%02u", &day, &month, &year);
    if (day < 1 || day > 31 || month < 1 || month > 31 || year > 99)
        return -4;

    msec = (msec_buf[0] - '0') * 100;
    if (msec_buf[1] != '\0') {
        msec += (msec_buf[1] - '0') * 10;
        if (msec_buf[2] != '\0')
            msec += (msec_buf[2] - '0');
    }

    if (fix_type > 255)
        return -5;
    if (num_satellites > 255)
        return -6;

    data->utc_time = (DateAndTime_Compressed){
        .year    = year,
        .month   = month,
        .day     = day,
        .hour    = hour,
        .minutes = minute,
        .seconds = second,
        .msec    = msec
    };
    data->fix_type = (uint8_t)fix_type;
    data->num_satellites = (uint8_t)num_satellites;
    // make the unit conversion explicit: km/h to m/s
    data->speed_over_ground_mps = speed_over_ground_kph * (1000.0f / 3600.0f);
    // convert ddd.mm to ddd.dd, accept negative angles
    if (cog_deg < 0)
        cog_min = -cog_min;
    data->course_over_ground = (float)cog_deg + (float)cog_min / 60.0f;
    return 0;
}

int main(int argc, char** argv)
{
    char msg[] =
        "\r\n+QGPSLOC: "
        "010842.0,32.04415,5.31028,1.7,55.0,2,150.36,0.0,0."
        "0,060224,03\r\n\r\nOK\r\n";

    GNSS_Data gnss_data = { 0 };

    int ret = GNSS_ParseData(msg, &gnss_data);  // ret = 0 but data.utc_time wrong
    if (ret != 0) {
        printf("Failed to parse data: error %d\n", -ret);
        return -1;
    }
    char utc_date_string[32] = { 0 };
    // use explicit casts to avoid compiler warnings
    snprintf(utc_date_string, sizeof utc_date_string,
             UTC_DATETIME_FORMAT,
             (unsigned)gnss_data.utc_time.year,
             (unsigned)gnss_data.utc_time.month,
             (unsigned)gnss_data.utc_time.day,
             (unsigned)gnss_data.utc_time.hour,
             (unsigned)gnss_data.utc_time.minutes,
             (unsigned)gnss_data.utc_time.seconds,
             (unsigned)gnss_data.utc_time.msec);

    // use explicit casts to avoid compiler warnings
    printf(PRINT_FORMAT, utc_date_string,
           (double)gnss_data.coordinates.latitude,
           (double)gnss_data.coordinates.longitude,
           (double)gnss_data.horizontal_precision,
           (double)gnss_data.altitude,
           (unsigned)gnss_data.fix_type,
           (double)gnss_data.course_over_ground,
           (double)gnss_data.speed_over_ground_mps,
           (unsigned)gnss_data.num_satellites);

    return 0;
}