سؤال

I try to convert a UTF-16LE text file to ASCII using iconv but for some reason my code just hangs forever, any idea what am I doing wrong?

#include <stdio.h>
#include <iconv.h>
#include <string.h>

#define S_SIZE (1024)

#define bool int
#define true 1
#define false 0

int main(){
  iconv_t icd;
  FILE *fp_src, *fp_dst;
  char s_src[S_SIZE], s_dst[S_SIZE];
  char *p_src, *p_dst;
  size_t n_src, n_dst;

  icd = iconv_open("ASCII", "UTF-16LE");
  fp_src = fopen("utf8_test.txt", "rb");
  fp_dst = fopen("ascii_test.txt", "w");

  while(true){
    fgets(s_src, S_SIZE, fp_src);
    if (feof(fp_src))
      break;
    p_src = s_src;
    p_dst = s_dst;
    n_src = strlen(s_src);
    n_dst = S_SIZE-1;
    while(0 < n_src){
      iconv(icd, &p_src, &n_src, &p_dst, &n_dst);
    }
    *p_dst = '\0';
    fputs(s_dst, fp_dst);
  }

  fclose(fp_dst);
  fclose(fp_src);
  iconv_close(icd);

  return 0;
}

Could it be because ASCII file is terminated in EOF and UTF-16LE in WEOF?

هل كانت مفيدة؟

المحلول

Ok, found a solution with ICU library.

#include <stdio.h>
#include <stdlib.h>
#include <unicode/ustdio.h>
#include <unicode/uchar.h>

UChar* read_utf8_file(const char* filename, long* size) {
    /* open a UTF-8 file for reading */
    UFILE* f = u_fopen(filename, "r", NULL, "UTF-16LE");

    /* get the file size */
    long fsize;
    fseek(u_fgetfile(f), 0, SEEK_END);
    fsize = ftell(u_fgetfile(f));
    u_frewind(f);

    /* allocate enough memory to store the whole string plus termination */
    UChar* str = (UChar*) malloc(sizeof(UChar) * (fsize + 1));

    /* read the string into the allocated space */
    for ((*size) = 0; !u_feof(f); ++(*size)) {
        str[*size] = u_fgetc(f);
    }

    /* add NULL termination */
    str[*size] = 0;

    /* close the file resource */
    u_fclose(f);

    return str;
}

int main() {
    /* read the string and its size */
    long size;
    UChar* str = read_utf8_file("utf8_test.txt", &size);

    /* print the string size */
    //printf("String size: %ld\n\n", size);

    /* print the UTF-8 string */
    UFILE* u_stdout = u_finit(stdout, NULL, NULL);
    u_fprintf(u_stdout, "%S\n", str);
    u_fclose(u_stdout);

    /* free the allocated string */
    free(str);

    return 0;
}
مرخصة بموجب: CC-BY-SA مع الإسناد
لا تنتمي إلى StackOverflow
scroll top