80 lines
No EOL
2.9 KiB
PHP
80 lines
No EOL
2.9 KiB
PHP
<?php
|
|
// I guarantee I will not understand any of this in 48 hours
|
|
|
|
$data_dirty = file_get_contents("PREFACE_EN.txt");
|
|
$data_clean = str_replace("\u{000c}", "", $data_dirty);
|
|
file_put_contents("data_clean.txt", $data_clean);
|
|
|
|
$handle = fopen("data_clean.txt", "r");
|
|
if(!$handle) die("Could not open input file");
|
|
|
|
$out = fopen('output.csv', 'w');
|
|
if(!$handle) die("Could not open output file");
|
|
|
|
function attempt_gets(){
|
|
global $handle, $line;
|
|
|
|
if(($fileline = fgets($handle)) !== false){
|
|
$line = trim($fileline);
|
|
return true;
|
|
}else{
|
|
fclose($handle);
|
|
die();
|
|
}
|
|
}
|
|
|
|
$line = "";
|
|
$agency = "";
|
|
$agency_list = false;
|
|
$in_table = false;
|
|
$adm_finished = false;
|
|
|
|
while(attempt_gets()) {
|
|
if($line == "") continue; // ignore blank lines, usually near page markings
|
|
if($in_table && str_starts_with($line, "SECTION 4")) die(); // we're at the end of the table
|
|
if(str_starts_with($line, "Chapter ")) continue; // skip page marking lines
|
|
|
|
if(str_starts_with($line, "Symbol Geographical Area")){
|
|
if($agency_list == true){
|
|
$agency_num = trim(explode(" ", $agency, 2)[0]);
|
|
$agency_name = trim(explode(" ", $agency, 2)[1]);
|
|
if(!($cur_adm == "" || $country == "" || $agency_num == "" || $agency_name == "")){
|
|
//echo("\"$cur_adm\",\"$country\",\"$agency_num\",\"$agency_name\"\n");
|
|
fputcsv($out, [$cur_adm, $country, $agency_num, $agency_name]);
|
|
}
|
|
}
|
|
$agency_list = false; // we have finished a list of operating agencies, but print the last one
|
|
|
|
attempt_gets();
|
|
$line = trim(preg_split('/ (A|B|C) /', $line)[0]);
|
|
$cur_adm = trim(explode(" ", $line, 2)[0]);
|
|
$country = trim(explode(" ", $line, 2)[1]);
|
|
$adm_finished = true;
|
|
$in_table = true;
|
|
//echo("ADMINISTRATION $cur_adm, COUNTRY $country \n");
|
|
}else if($agency_list && $in_table){
|
|
if(ctype_digit(substr($line, 0, 3)) && substr($line, 3, 3) == " "){
|
|
//echo("Parse Agency $agency\n");
|
|
//if($agency == "") $agency = $line;
|
|
|
|
$agency_num = trim(explode(" ", $agency, 2)[0]);
|
|
$agency_name = trim(explode(" ", $agency, 2)[1]);
|
|
//echo("AGENCY " . $agency_num . " NAME " . $agency_name . "\n");
|
|
|
|
if(!($cur_adm == "" || $country == "" || $agency_num == "" || $agency_name == "") && !$adm_finished){
|
|
//echo("\"$cur_adm\",\"$country\",\"$agency_num\",\"$agency_name\"\n");
|
|
fputcsv($out, [$cur_adm, $country, $agency_num, $agency_name]);
|
|
}
|
|
$adm_finished = false;
|
|
|
|
$agency = $line;
|
|
}else{
|
|
$agency .= " " . $line;
|
|
}
|
|
}else if(str_starts_with($line, "12A Code") && $in_table){
|
|
$agency_list = true; // we are starting a list of operating agencies
|
|
}
|
|
}
|
|
|
|
fclose($handle);
|
|
fclose($out); |