I'm trying to make a general text wrap in C. Basically it should handle the text in the following way:
- A maximum line length is determined
- It must always wrap the text in the spaces, when it is dealing with words smaller than the maximum line length
- For big words, it should break the word in two when it reaches the length limit
- It also handles ANSI characters to account for the "invisible" characters.
What I came up with until now is the following:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#define MAX_PRINT_LEN 50
void line_length_visualization(size_t maxlength) {
for (size_t i = 0; i < maxlength; i++) {
printf("=");
}
printf("\n");
}
unsigned int count_big_words(char *str, int max_line_length) {
unsigned int word_length = 0; // Length of the word
unsigned int big_words = 0; // Number of big words
// Iterate over the original string to check how many line breaks "\n" will be inserted and for long words
size_t str_len = strlen(str);
for (size_t i = 0; i < str_len + 1; i++) {
// Increase word_length if it don't find a space
if (str[i] != ' ') {
word_length++;
}
else {
// If a word_length is bigger than the limit, increase the number of big_words
if (word_length > max_line_length) {
big_words++;
}
// Reset the word length
word_length = 0;
}
}
// Check the last word
if (word_length > max_line_length) {
big_words++;
}
return big_words;
}
bool is_a_big_word(char *str, int max_line_length, int last_space_position) {
unsigned int word_length = 0; // Size of the analyzed word
bool inside_ANSI = false; // Flag to determine if it is within an ANSI character
// Iterate over the string from the initial position (where it has the last space) until it finds another space or "\0"
int i = last_space_position + 1;
while ((str[i] != ' ') && (str[i] != '\0')){
// Check if it is inside an ANSI escape character
if (str[i] == '\x1b') {
inside_ANSI = true;
}
// If it is a normal character, increase word length
if (inside_ANSI == false) {
word_length++;
}
if (inside_ANSI == true && str[i] == 'm') {
inside_ANSI = false;
}
// Advance one position in the string
i++;
}
// Determine if it is a big word or not
if (word_length >= max_line_length) {
return true;
}
else {
return false;
}
}
void get_last_space_pos_and_length(char *str, size_t i, bool *inside_ANSI, int *last_space_position, int *length_counter) {
/* This function updates the last space position of the string and the length counter to control the line breaks */
// Check if position is a ANSI escape character
if (str[i] == '\x1b') {
(*inside_ANSI) = true;
}
// If in position of normal string
if ((*inside_ANSI) == false) {
// Check if it is space
if (str[i] == ' ') {
// If is a space, hold as last space position
(*last_space_position) = i;
}
// Increase the length counter
(*length_counter)++;
}
// If position is inside a ANSI escape sequense and is in the last position of the sequence flag
// inside_ANSI as false (this instruction must come after all checks for inside_ANSI variable)
if ((*inside_ANSI) == true && str[i] == 'm') {
(*inside_ANSI) = false;
}
}
char *split_string_with_small_words(char *str, int max_line_length) {
// Get the size of the original string
size_t str_len = strlen(str);
// Make a duplicate of the string
char *new_str = strdup(str); // Size of the string + potential
int last_space_position = 0; // Monitor of the last space position
int length_counter = 0; // Counter to check max line length
bool inside_ANSI = false;
// Iterate over the original string to check how many line breaks "\n" will be inserted
for (size_t i = 0; i < str_len + 1; i++) {
get_last_space_pos_and_length(str, i, &inside_ANSI, &last_space_position, &length_counter);
// Check if length_counter reaches the limit of max_line_length and if it founds any space
if (length_counter == max_line_length + 1 && last_space_position != 0) {
// If reaches max_line_length, substitute the last space position with a new line char
new_str[last_space_position] = '\n';
// Reset monitor of the length counter to the position of the word after the space
length_counter = i - last_space_position;
}
}
return new_str;
}
char *my_split_string(char *str, int max_line_length) {
bool inside_big_word = false; // Flag to determine if it is within a big word
bool moved_right = false; // Flag to determine if it moved the memory to the right
int add_right = 0;
// Check if there is any big words
unsigned int big_words = count_big_words(str, max_line_length);
// Check if there is a big word
if (big_words == 0) {
char *new_str = split_string_with_small_words(str, max_line_length);
return new_str;
} else {
// If there is any big words, allocate memory accordingly
size_t str_len = strlen(str);
size_t new_str_len = str_len + 1 + (str_len/max_line_length); // Length of the string + space for \0 + additional space for line breaks of big words
char *new_str = malloc(new_str_len);
// Declare some variables
int last_space_position = 0; // Monitor of the last space position
int length_counter = 0; // Counter to check max line length
bool inside_ANSI = false; // Flag to determine if it is within an ANSI character
size_t j = 0;
// Iterate over the string
for (size_t i = 0; i < str_len; i++) {
get_last_space_pos_and_length(str, i, &inside_ANSI, &last_space_position, &length_counter);
// If reaches the length limit
if (length_counter == max_line_length + 1) {
// Check if it is a big word
bool big_word = is_a_big_word(str, max_line_length, last_space_position);
if (big_word == true) {
// Put a newline character at the end of the line
new_str[j] = '\n';
// Define last space position as the end of the big word and advance one character
last_space_position = j;
j++;
// Reset the monitor of the length
length_counter = 0;
} else {
// Put a newline character in the last space character added by the number of additional newlines for big words
new_str[last_space_position + (j - i)] = '\n';
length_counter = i - last_space_position;
}
}
// Copy str character to new_str
new_str[j] = str[i];
j++;
}
return new_str;
}
}
int main(void) {
char *str_1 = "\x1b[33m=>\x1b[32m This is a very very loong message that needs to be inserted into this program to test the split function. Lets make this string really big to test it properly.\x1b[0m\n";
char *str_2 = "\x1b[33m=>\x1b[32m File \x1b[35m'luaguedesc/data/in/DRDs/very_long_input_file.txt' (25 bytes)\x1b[32m successfully loaded! I haveeee also another input file to be loaded \x1b[35m'luaguedesc/data/DRDs/input_files/very_long_input_file_2.txt' (27 bytes)\x1b[32m that was successfully loaded! AndAFinalVeryVeryBigWordWithManyCharactersAndNoSpaces.\x1b[0m\n";
char *str_3 = "\x1b[33m=>\x1b[32m dhaisdhiasudhuasihdiusahdiusahdhasiudsiuhdsauihdsuihdsaiuhdsaihudsaiuhdsauihsaduhiasdhuadsiuhdasihudiuasduhisaiuhdasuihdasuihuidasiuhuhiadsiuhasduihdaiudas.\x1b[0m\n";
line_length_visualization(MAX_PRINT_LEN);
char *new_str_1 = my_split_string(str_1, MAX_PRINT_LEN);
printf("%s", new_str_1);
line_length_visualization(MAX_PRINT_LEN);
char *new_str_2 = my_split_string(str_2, MAX_PRINT_LEN);
printf("%s", new_str_2);
line_length_visualization(MAX_PRINT_LEN);
char *new_str_3 = my_split_string(str_3, MAX_PRINT_LEN);
printf("%s", new_str_3);
free(new_str_1);
free(new_str_2);
free(new_str_3);
}
The function line_length_visualization(...) serves as a template to check if the text is being wrapped at the right location
count_big_words(...) determines if there is a word that is bigger than the line length limit
is_a_big_word(...) determines if the next word is bigger than the line length limit
get_last_space_pos_and_length(...) determines where the last space occurred in text, and also accounts for the monitoring of the maximum length and handles ANSI characters
split_string_with_small_words(...) is a function that wraps a text with small words. This function is working properly.
my_split_string(...) should be the general function that wraps both texts with small words and with big words larger than the line length limit.
There are three strings that I'm using for tests: The first is a string with small words, the second is a general string with small and big words, and the third is a string with an arrow followed by a single big word.
The output of the code is the following:
First string:
==================================================
=> This is a very very loong message that needs to
be inserted into this program to test the split
function. Lets make this string really big to test
it properly.
Second string:
==================================================
=> File 'luaguedesc/data/in/DRDs/very_long_input_f
ile.txt' (25 bytes) successfully loaded! I haveeee
also another input file to be loaded 'luaguedesc/d
ata/DRDs/input_files/very_long_input_file_2.txt'
(27 bytes) that was successfully loaded! AndAFinal
VeryVeryBigWordWithManyCharactersAndNoSpaces.
Third string:
==================================================
=> dhaisdhiasudhuasihdiusahdiusahdhasiudsiuhdsauih
dsuihdsaiuhdsaihudsaiuhdsauihsaduhiasdhuadsiuhdasih
udiuasduhisaiuhdasuihdasuihuidasiuhuhiadsiuhasduihd
aiudas.
The second output looks fine, but if I made a slight change by adding some character after the word "haveeee", as haveeeeE, it doesnt wrap correctly:
==================================================
=> File 'luaguedesc/data/in/DRDs/very_long_input_f
ile.txt' (25 bytes) successfully loaded! I haveeeeE also another input file to be loaded 'luaguedesc/d
ata/DRDs/input_files/very_long_input_file_2.txt'
(27 bytes) that was successfully loaded! AndAFinal
VeryVeryBigWordWithManyCharactersAndNoSpaces.
The third string is also problematic. It is wrapping the text one character after it should in the second and third lines.
I tried many things but cannot find a solution to that. Could someone help?
Thanks in advance!
Here is a description of some problems (but I don't think it covers all problems).
count_big_wordscompletely ignore the escape codes, i.e. the escape codes are being counted as part of the words. So the returned value may be wrong.is_a_big_wordhave the lineint i = last_space_position + 1;to set the starting point for the iteration. When you callis_a_big_wordthe first time on a string,last_space_positionis zero so the iteration starts from index one. In other words, the first character is not handled correctly - it's ignored. So for instance, if the first character is the escape, your code doesn't detect that you are parsing an escape sequence.In general your code looks for spaces inside the string but it never looks for newlines (
\n). So for instance a trailing newline will be counted as part of a word.And this also looks strange:
Why add one to
str_len? It makes the code count the nul-termination character as part of a word. For a string like"1234567\n";the functioncount_big_wordswill end up withword_lengthbeing 9. That's obviously wrong as the word is just 7 characters. So ifMAX_PRINT_LENis set to 8, your function will count a "big word" even if there isn't any "big word" in the string.