/********************************************************************/ /* cnv - obecna konverze 8bit znakovych sad pres unicode */ /* (c) 2001 the_majkl, the_majkl@seznam.cz */ /* v. 0.4 - zmena nacitani druhe tabulky - nemusi byt jednoznacna */ /* (nekolik ruznych unicode -> jeden 8bit znak -> */ /* moznost prevodu do ASCII (bez diakritiky)) */ /* v. 0.3 - mirna optimalizace v mezich zakona */ /* v. 0.2 - conv, diff, char */ /********************************************************************/ /* konverzni soubory 8bit -> unicode jsou na ftp.unicode.org */ /********************************************************************/ /* konverzni soubory musi byt v adresari dir a jmenovat se stejne, */ /* jako (parametr) kodovani + pripona .txt */ /********************************************************************/ #include /*#undefine DEBUG */ char *c_from,*c_to,*c_dir; char *c_num; char *f_from, *f_to; char *pend; FILE *file_from, *file_to; typedef struct { int unicode; int prevod; } zaznam; zaznam tab_cnv[256]; zaznam tab_to[256]; int b_char,b_conv,b_diff; int nznak; void cti_file(); void cti_file2(); void kriskros(); void konvertuj(); void print_diff(); void print_conv(); main(int argc, char *argv[]) { int c; extern char *optarg; extern int optind; #ifdef DEBUG printf("parametr…: %d\n",argc); #endif if (argc < 4) { usage(); }; c_from=argv[1]; c_to=argv[2]; c_dir=argv[3]; #ifdef DEBUG printf("From: %s\n",c_from); printf("To : %s\n",c_to); printf("Dir : %s\n",c_dir); #endif b_char=0; b_conv=0; b_diff=0; if (argc >= 4) { b_char=1; c_num=argv[4]; #ifdef DEBUG printf("Char: %s\n",c_num); #endif if (! strcmp(c_num,"conv")) { b_conv=1; } if (! strcmp(c_num,"diff")) { b_diff=1; } if ((! b_conv) && (! b_diff)) nznak=strtol(c_num,&pend,16); #ifdef DEBUG printf("Nznak: 0x%x\n",nznak); #endif } f_from=malloc(strlen(c_dir)+1+strlen(c_from)+5); strcpy(f_from,c_dir); strcat(f_from,"/"); strcat(f_from,c_from); strcat(f_from,".txt"); #ifdef DEBUG printf("File From: %s\n",f_from); #endif f_to=malloc(strlen(c_dir)+1+strlen(c_to)+5); strcpy(f_to,c_dir); strcat(f_to,"/"); strcat(f_to,c_to); strcat(f_to,".txt"); #ifdef DEBUG printf("File To : %s\n",f_to); #endif /* kontrola existence prevodovych souboru */ if ( ! (file_from=fopen(f_from,"r")) ) { printf("Error: can not open %s !\n",f_from); exit (-1); } else { cti_file(file_from,f_from,tab_cnv); (void)fclose(file_from); }; if ( ! (file_to=fopen(f_to,"r")) ) { printf("Error: can not open %s !\n",f_to); exit (-1); } else { cti_file2(file_to,f_to,tab_to); (void)fclose(file_to); }; kriskros(); if (b_conv) { print_conv() ; exit(0); }; if (b_diff) { print_diff() ; exit(0); }; konvertuj(); } /*************************************************************/ /* vypise rozdilne kodovane znaky 1. sada -> 2.sada */ /*************************************************************/ void print_diff() { int x; for (x=0; x < 256; x++) { if (x != tab_cnv[x].prevod) printf("0x%.2x 0x%.2x 0x%.4x # (%c)\n", x,tab_cnv[x].prevod, tab_cnv[x].unicode,x); } } /*************************************************************/ /* vypise celou konverzni tabulku 1. sada -> 2.sada */ /*************************************************************/ void print_conv() { int x; for (x=0; x < 256; x++) printf("0x%.2x 0x%.2x 0x%.4x # (%c)\n", x,tab_cnv[x].prevod, tab_cnv[x].unicode,x); } /*************************************************************/ void konvertuj () { int znak_from, znak_do; while ( (znak_from=getc(stdin)) != EOF) { znak_do=tab_cnv[znak_from].prevod; putchar((char)znak_do); } /* putchar('\n'); */ } /*************************************************************/ void kriskros() { int x,y; int nasel; #ifdef DEBUG printf("Kriskros...\n"); #endif /* najdu pary unicode polozek a nastavim podle nich konverzi */ /* vnoreny cyklus, teoreticky max. 64k opakovani, ale problemy to nedela */ for(x=0; x<256; x++) { if (tab_cnv[x].unicode==NULL) { continue; } nasel=0; for(y=0; y <256; y++) { if (tab_cnv[x].unicode==tab_to[y].unicode) { tab_cnv[x].prevod=tab_to[y].prevod; nasel=1; break; } /* pokud se nenajde protejsek a ma se konvertovat na urcity znak */ /* a neni to pri vypisech tabulek */ if ((! nasel) && (b_char) && (! b_diff) && (!b_conv)) tab_cnv[x].prevod=nznak; } } } /*************************************************************/ void cti_file2(FILE *soubor,char *fsoub, zaznam pole[256]) { char radek[255]; char nat[5],uni[7]; int x,y; int natint, uniint,radnum; char * pend; #if DEBUG == 2 printf("Definuji seznam (%s)...\n",fsoub); #endif for (x=0; x<256; x++) { pole[x].prevod=x; pole[x].unicode=NULL; } radnum=0; while (fgets(radek,100,soubor) && (radnum < 256)) { radnum++; if ((radek[0] != '#') && (radek[0] != ' ') && (radek[0] != '\n')) { if ( sscanf(radek, "%x %x", &natint, &uniint) != 2 ) (void)fprintf(stderr,"Error reading two hex digits: %s, line %d\n", fsoub, radnum); #if DEBUG == 1 printf("NAT: %s UNI: %s\n",nat, uni); printf("NATINT: %d UNIINT: %d\n",natint, uniint); #endif pole[radnum-1].unicode=uniint; pole[radnum-1].prevod=natint; } } } /*************************************************************/ void cti_file(FILE *soubor,char *fsoub, zaznam pole[256]) { char radek[255]; char nat[5],uni[7]; int x,y; int natint, uniint,radnum; char * pend; #if DEBUG == 2 printf("Definuji seznam (%s)...\n",fsoub); #endif for (x=0; x<256; x++) { pole[x].prevod=x; pole[x].unicode=NULL; } radnum=0; while (fgets(radek,100,soubor)) { radnum++; if ((radek[0] != '#') && (radek[0] != ' ') && (radek[0] != '\n')) { if ( sscanf(radek, "%x %x", &natint, &uniint) != 2 ) (void)fprintf(stderr,"Error reading two hex digits: %s, line %d\n", fsoub, radnum); #if DEBUG == 1 printf("NAT: %s UNI: %s\n",nat, uni); printf("NATINT: %d UNIINT: %d\n",natint, uniint); #endif pole[natint].unicode=uniint; } } } /*************************************************************/ usage() { printf("cnv: usage: cnv code-in code-out dir [ hexnum | \"conv\" | \"diff\" ] \n"); printf("cnv converts national chars from stdin to other coding and prints\n"); printf("the result to stdout.\n"); printf(" dir: directory with convert tables\n"); printf(" hexnum: char which convert non-existing codes to\n"); printf(" (warning: use hexnum with uncomplete convert tables with care)\n"); printf(" conv: prints complete conversion table\n"); printf(" diff: prints differential table\n\n"); printf("Author: the_majkl, the_majkl@seznam.cz\n"); exit(1); }