itu_agency_parser/dump.php
2025-07-17 10:14:12 -04:00

80 lines
No EOL
2.9 KiB
PHP

<?php
// I guarantee I will not understand any of this in 48 hours
$data_dirty = file_get_contents("PREFACE_EN.txt");
$data_clean = str_replace("\u{000c}", "", $data_dirty);
file_put_contents("data_clean.txt", $data_clean);
$handle = fopen("data_clean.txt", "r");
if(!$handle) die("Could not open input file");
$out = fopen('output.csv', 'w');
if(!$handle) die("Could not open output file");
function attempt_gets(){
global $handle, $line;
if(($fileline = fgets($handle)) !== false){
$line = trim($fileline);
return true;
}else{
fclose($handle);
die();
}
}
$line = "";
$agency = "";
$agency_list = false;
$in_table = false;
$adm_finished = false;
while(attempt_gets()) {
if($line == "") continue; // ignore blank lines, usually near page markings
if($in_table && str_starts_with($line, "SECTION 4")) die(); // we're at the end of the table
if(str_starts_with($line, "Chapter ")) continue; // skip page marking lines
if(str_starts_with($line, "Symbol Geographical Area")){
if($agency_list == true){
$agency_num = trim(explode(" ", $agency, 2)[0]);
$agency_name = trim(explode(" ", $agency, 2)[1]);
if(!($cur_adm == "" || $country == "" || $agency_num == "" || $agency_name == "")){
//echo("\"$cur_adm\",\"$country\",\"$agency_num\",\"$agency_name\"\n");
fputcsv($out, [$cur_adm, $country, $agency_num, $agency_name]);
}
}
$agency_list = false; // we have finished a list of operating agencies, but print the last one
attempt_gets();
$line = trim(preg_split('/ (A|B|C) /', $line)[0]);
$cur_adm = trim(explode(" ", $line, 2)[0]);
$country = trim(explode(" ", $line, 2)[1]);
$adm_finished = true;
$in_table = true;
//echo("ADMINISTRATION $cur_adm, COUNTRY $country \n");
}else if($agency_list && $in_table){
if(ctype_digit(substr($line, 0, 3)) && substr($line, 3, 3) == " "){
//echo("Parse Agency $agency\n");
//if($agency == "") $agency = $line;
$agency_num = trim(explode(" ", $agency, 2)[0]);
$agency_name = trim(explode(" ", $agency, 2)[1]);
//echo("AGENCY " . $agency_num . " NAME " . $agency_name . "\n");
if(!($cur_adm == "" || $country == "" || $agency_num == "" || $agency_name == "") && !$adm_finished){
//echo("\"$cur_adm\",\"$country\",\"$agency_num\",\"$agency_name\"\n");
fputcsv($out, [$cur_adm, $country, $agency_num, $agency_name]);
}
$adm_finished = false;
$agency = $line;
}else{
$agency .= " " . $line;
}
}else if(str_starts_with($line, "12A Code") && $in_table){
$agency_list = true; // we are starting a list of operating agencies
}
}
fclose($handle);
fclose($out);