initial commit

This commit is contained in:
femtodev 2025-07-17 10:14:12 -04:00
commit 30d129ab4a
5 changed files with 9355 additions and 0 deletions

2
.gitignore vendored Normal file
View file

@ -0,0 +1,2 @@
*.txt
*.pdf

10
README.md Normal file
View file

@ -0,0 +1,10 @@
# ITU Operating Agency Table Parser
The ITU does not appear to publish a machine-readable list of Operating Agencies (except for the dropdown menus in SpaceCap).
To assist automatic ingestion of ITU filing data, this PHP script parses a text dump from the ITU-published PDF file
PREFACE_EN.pdf ("PREFACE TO THE BR INTERNATIONAL FREQUENCY INFORMATION CIRCULAR (TERRESTRIAL SERVICES)")
and generates a properly-formatted CSV of every Operating Agency (and its Administration) discovered.
The script fetch_and_convert.sh will automatically download the latest PREFACE_EN.pdf directly from the ITU website
(this file is updated regularly), then convert it to a CSV.

80
dump.php Normal file
View file

@ -0,0 +1,80 @@
<?php
// I guarantee I will not understand any of this in 48 hours
$data_dirty = file_get_contents("PREFACE_EN.txt");
$data_clean = str_replace("\u{000c}", "", $data_dirty);
file_put_contents("data_clean.txt", $data_clean);
$handle = fopen("data_clean.txt", "r");
if(!$handle) die("Could not open input file");
$out = fopen('output.csv', 'w');
if(!$handle) die("Could not open output file");
function attempt_gets(){
global $handle, $line;
if(($fileline = fgets($handle)) !== false){
$line = trim($fileline);
return true;
}else{
fclose($handle);
die();
}
}
$line = "";
$agency = "";
$agency_list = false;
$in_table = false;
$adm_finished = false;
while(attempt_gets()) {
if($line == "") continue; // ignore blank lines, usually near page markings
if($in_table && str_starts_with($line, "SECTION 4")) die(); // we're at the end of the table
if(str_starts_with($line, "Chapter ")) continue; // skip page marking lines
if(str_starts_with($line, "Symbol Geographical Area")){
if($agency_list == true){
$agency_num = trim(explode(" ", $agency, 2)[0]);
$agency_name = trim(explode(" ", $agency, 2)[1]);
if(!($cur_adm == "" || $country == "" || $agency_num == "" || $agency_name == "")){
//echo("\"$cur_adm\",\"$country\",\"$agency_num\",\"$agency_name\"\n");
fputcsv($out, [$cur_adm, $country, $agency_num, $agency_name]);
}
}
$agency_list = false; // we have finished a list of operating agencies, but print the last one
attempt_gets();
$line = trim(preg_split('/ (A|B|C) /', $line)[0]);
$cur_adm = trim(explode(" ", $line, 2)[0]);
$country = trim(explode(" ", $line, 2)[1]);
$adm_finished = true;
$in_table = true;
//echo("ADMINISTRATION $cur_adm, COUNTRY $country \n");
}else if($agency_list && $in_table){
if(ctype_digit(substr($line, 0, 3)) && substr($line, 3, 3) == " "){
//echo("Parse Agency $agency\n");
//if($agency == "") $agency = $line;
$agency_num = trim(explode(" ", $agency, 2)[0]);
$agency_name = trim(explode(" ", $agency, 2)[1]);
//echo("AGENCY " . $agency_num . " NAME " . $agency_name . "\n");
if(!($cur_adm == "" || $country == "" || $agency_num == "" || $agency_name == "") && !$adm_finished){
//echo("\"$cur_adm\",\"$country\",\"$agency_num\",\"$agency_name\"\n");
fputcsv($out, [$cur_adm, $country, $agency_num, $agency_name]);
}
$adm_finished = false;
$agency = $line;
}else{
$agency .= " " . $line;
}
}else if(str_starts_with($line, "12A Code") && $in_table){
$agency_list = true; // we are starting a list of operating agencies
}
}
fclose($handle);
fclose($out);

5
fetch_and_convert.sh Executable file
View file

@ -0,0 +1,5 @@
#!/bin/bash
rm PREFACE_EN.pdf
wget https://www.itu.int/en/ITU-R/terrestrial/brific/BRIFIC/Preface/PREFACE_EN.pdf
pdftotext -layout PREFACE_EN.pdf
php dump.php

9258
output.csv Normal file

File diff suppressed because it is too large Load diff