-
Notifications
You must be signed in to change notification settings - Fork 0
/
remove_utf8_bom.c
46 lines (42 loc) · 1.86 KB
/
remove_utf8_bom.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
#include <stdio.h>
#include <stdlib.h> // needed for malloc()
#include <string.h> // needed for strlen()
#include <stdbool.h> // needed for isutf8bom()
/***************************************************************/
/* README */
/***************************************************************/
/* This program takes a UTF-8 string and removes any UTF-8
BOMs 0xEFBBBF that are found.
Written by Thomas Hedden January 2023. */
/***************************************************************
* FUNCTION DECLARATIONS *
***************************************************************/
unsigned int getu(char *, int *);
bool isutf8bom(unsigned int);
char * utf8cat(char *, unsigned int);
char * remove_utf8_bom(char * passed_string) {
int si; // iterator for passed UTF-8 string
unsigned int u; // holds each UTF-8 character
char * returned_string = malloc(strlen(passed_string) + 1);
if(returned_string == NULL) {
fprintf(stderr, "on line %d in file %s\n",
__LINE__, __FILE__);
fprintf(stderr, "insufficient memory\n");
exit(EXIT_FAILURE);
}
// look at each UTF-8 character in passed string
for(si = 0; (u = getu(passed_string, &si)) != '\0'; si++) {
if(isutf8bom(u)) {
// if u is UTF-8 BOM 0xEFBBBF, skip it
fprintf(stderr, "removed UTF-8 BOM 0xEFBBBF\n");
continue; // look at next UTF-8 character
}
utf8cat(returned_string, u); // otherwise, copy it
}
utf8cat(returned_string, '\0'); // terminate copy
return(returned_string);
}
/****************************************************************
* FUNCTION DEFINITIONS *
****************************************************************/
// all functions have been put in separate files