Initial commit

1 month ago · 6247a367cb
commit 6247a367cb
10 changed files with 142909 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,180 @@
+### Generated by gibo (https://github.com/simonwhitaker/gibo)
+### https://raw.github.com/github/gitignore/4488915eec0b3a45b5c63ead28f286819c0917de/Rust.gitignore
+
+# Generated by Cargo
+# will have compiled files and executables
+debug/
+target/
+
+# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
+# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
+Cargo.lock
+
+# These are backup files generated by rustfmt
+**/*.rs.bk
+
+# MSVC Windows builds of rustc generate these, which store debugging information
+*.pdb
+### Generated by gibo (https://github.com/simonwhitaker/gibo)
+### https://raw.github.com/github/gitignore/4488915eec0b3a45b5c63ead28f286819c0917de/Python.gitignore
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
--- a/Cargo.toml
+++ b/Cargo.toml
@ -0,0 +1,18 @@
+[package]
+name = "final_project_cpsc3620"
+version = "0.1.0"
+edition = "2021"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+csv = "1.3.0"
+itertools = "0.12.1"
+nalgebra = "0.32.4"
+nalgebra-sparse = "0.9.0"
+num-traits = "0.2.18"
+rand = "0.8.5"
+rust_decimal = "1.34.3"
+serde = { version = "1.0.197", features = ["derive"] }
+serde_with = "3.7.0"
+text_io = "0.1.12"
--- a/README.md
+++ b/README.md
@ -0,0 +1,4 @@
+# `cpsc3620 Presentation`
+
+* Simply needs `cargo` (Rust's package manager)
+- To change the `new.csv` file to adjust to new weights, you will need `latlon3` for Python
--- a/data/airports.csv
+++ b/data/airports.csv
--- a/data/new.csv
+++ b/data/new.csv
--- a/data/process_cost.py
+++ b/data/process_cost.py
@ -0,0 +1,36 @@
+import csv
+from latlon import LatLon, Latitude, Longitude
+import math
+
+# https://stackoverflow.com/questions/19412462/getting-distance-between-two-points-based-on-latitude-longitude#43211266
+# https://pypi.org/project/latlon3/
+
+airports = []
+with open("airports.csv") as airport_file:
+	for port in csv.reader(airport_file):
+		airports.append(port)
+
+with open("routes.csv") as routes_file:
+	with open("new.csv", "w") as routes2_file:
+		routes = csv.reader(routes_file)
+		new = csv.writer(routes2_file)
+		next(routes, None)
+		for route in routes:
+			p1 = None
+			p2 = None
+			for port in airports:
+				if port[4] == route[3]:
+					p1 = port
+				if port[4] == route[5]:
+					p2 = port
+			d1 = None
+			d2 = None
+			try:
+				d1 = LatLon(Latitude(float(p1[6])), Longitude(float(p1[7])))
+				d2 = LatLon(Latitude(float(p2[6])), Longitude(float(p2[7])))
+			except:
+				continue
+				
+			route[9] = d1.distance(d2)
+			if str(route[9]) != "nan":
+				new.writerow(route)
--- a/data/routes.csv
+++ b/data/routes.csv
--- a/presentation.txt
+++ b/presentation.txt
@ -0,0 +1,113 @@
+How Not To Write a Flight Route Planner
+
+1. Do not use real data
+2. Do not use DFS
+3. Do not use BFS
+4. Do not use Dijkstra's (naively)
+5. Lat/Lon are not coordinates
+
+1. Do not use real data
+
+Why?
+
+- It is impossible to track down bugs,
+  especially when you get into a loop
+  that repeatedly prints:
+  [293, 23836, 202, 223, 2235]
+- It can take a long time to load.
+- It may be old data (it was)
+
+2. Do not use DFS
+
+Why?
+
+- It may take a long time to complete
+  - It needs to check all possibilities
+    down possibly useless rabbit
+    holes before checking nearer paths
+- It may find a long connection
+  before a short one
+
+Consider the following
+depth-first-search operations:
+
+YYC -> YYZ
+YYZ -> YVR
+YYC -> YVR
+
+3. Do not use BFS
+
+Why?
+
+- Does not take into account any weights:
+  - cost of flight
+  - distance of airports
+
+Consider the following
+breadth-first-search operations:
+
+YYC -> AMS
+YYC -> YVR
+YYC -> YYZ
+AMS -> TPE
+YVR -> TPE
+
+4. Do not use Dijkstra's (naively)
+
+Why?
+
+- What do you use for a weight?
+  - cost of flight
+  - number of connections
+  - distance between airports
+- Selecting one of these,
+  without taking into account the others,
+  makes for a terrible flight planner
+- You may want to have additional bounds
+  for example: you may want to confirm
+  that the schedule for the flight lines up
+  I could not find data for this, however
+
+Q: What happens when you only take into
+   account the distance between airports?
+A: A flight from YYC to TPE
+   would look like the following:
+
+- YYC
+- MSP
+- GRR
+- DTW
+- EWR
+- MAD
+- CAI
+- TUU
+- DMM
+- BAH
+- DXB
+- TPE
+
+Q: Okay, so how do you implement a flight planner?
+A: Use Dijkstra's, with multiple weights.
+
+For mine, I used:
+- The distance between airports
+- + a fixed cost for connections
+
+The distance could be swapped out with cost of flight.
+I couldn't find a database of fare costs.
+
+5. Lat/Lon are not coordinates
+
+- You can not simply use (p1.lat - p2.lat).
+- THIS WILL NOT WORK!
+
+- Use a library, or learn a bunch of math.
+- I used latlon3
+
+Libraries:
+
+- latlon3 (Python)
+- nalgebra, nalgabra_sparse,
+  serde, rust_decimal (Rust)
+
+Demo
--- a/src/deserializers.rs
+++ b/src/deserializers.rs
@ -0,0 +1,22 @@
+use serde::{de::Deserializer, Deserialize};
+
+pub fn default_as_none<'de, D, T>(deserializer: D) -> Result<Option<T>, D::Error>
+where
+    T: Default + PartialEq + Deserialize<'de>,
+    D: Deserializer<'de>,
+{
+    let val = T::deserialize(deserializer)?;
+    if val == T::default() {
+        Ok(None)
+    } else {
+        Ok(Some(val))
+    }
+}
+
+pub fn error_as_none<'de, D, T>(deserializer: D) -> Result<Option<T>, D::Error>
+where
+    T: Default + Deserialize<'de>,
+    D: Deserializer<'de>,
+{
+    Ok(T::deserialize(deserializer).ok())
+}
--- a/src/main.rs
+++ b/src/main.rs
@ -0,0 +1,238 @@
+mod deserializers;
+use deserializers::*;
+
+use nalgebra::DMatrix;
+use nalgebra_sparse::csr::CsrMatrix;
+use nalgebra_sparse::SparseEntry;
+use serde::{Deserialize, Serialize};
+use std::fs::File;
+
+#[derive(Serialize, Deserialize, Debug, Clone)]
+pub struct Airport {
+    airport_id: i32,
+    name: String,
+    city: String,
+    country: String,
+    #[serde(rename = "IATA")]
+    iata_code: String,
+    #[serde(rename = "ICAO")]
+    icao_code: String,
+    lat: f64,
+    lon: f64,
+    altitude: i32,
+    timezone: String,
+    dst: String,
+    tzdb: String,
+    #[serde(rename = "type")]
+    port_type: String,
+    source: String,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone)]
+pub struct Route {
+    airline: String,
+    #[serde(deserialize_with = "error_as_none")]
+    airline_id: Option<i32>,
+    src_airport: String,
+    #[serde(deserialize_with = "error_as_none")]
+    src_airport_id: Option<i32>,
+    dest_airport: String,
+    #[serde(deserialize_with = "error_as_none")]
+    dest_airport_id: Option<i32>,
+    #[serde(deserialize_with = "default_as_none")]
+    codeshare: Option<String>,
+    #[serde(deserialize_with = "error_as_none")]
+    stops: Option<i32>,
+    planes: String,
+    cost: f64,
+}
+
+use std::collections::{BTreeSet, HashMap, HashSet};
+
+fn airport_map(routes: &Vec<Route>) -> HashMap<String, usize> {
+    let airports: BTreeSet<String> = routes
+        .iter()
+        .flat_map(|route| vec![route.src_airport.clone(), route.dest_airport.clone()])
+        .collect();
+    let airport_map: HashMap<String, usize> = airports
+        .into_iter()
+        .enumerate()
+        .map(|(i, k)| (k, i))
+        .collect();
+    airport_map
+}
+
+fn into_matrix(routes: &Vec<Route>, airport_map: &HashMap<String, usize>) -> CsrMatrix<f64> {
+    let num_airports = airport_map.len();
+    let mut adjacency_matrix = DMatrix::<f64>::zeros(num_airports, num_airports);
+    routes.iter().for_each(|route| {
+        let src_idx = airport_map[&route.src_airport];
+        let dest_idx = airport_map[&route.dest_airport];
+        adjacency_matrix[(src_idx, dest_idx)] = route.cost;
+    });
+    CsrMatrix::from(&adjacency_matrix)
+}
+
+fn load_data<R, T>(read: R) -> Vec<T>
+where
+    T: for<'a> Deserialize<'a> + std::fmt::Debug,
+    R: std::io::Read,
+{
+    let mut data = vec![];
+    let mut rdr = csv::Reader::from_reader(read);
+    for result in rdr.deserialize() {
+        let record: T = result.unwrap();
+        data.push(record);
+    }
+    data
+}
+
+trait Adjacent {
+    fn all_adjacent(&self, src: usize) -> Vec<usize>;
+}
+impl Adjacent for CsrMatrix<f64> {
+    fn all_adjacent(&self, src: usize) -> Vec<usize> {
+        self.row(src).col_indices().iter().cloned().collect()
+    }
+}
+
+// returns a list of a set of connections; these are guarenteed to be unique
+fn dijkstras(graph: &CsrMatrix<f64>, start: usize, end: usize) -> Option<Vec<usize>> {
+    let num_nodes = graph.nrows();
+    let mut dist: Vec<f64> = vec![f64::MAX; num_nodes];
+    let mut prev: Vec<Option<usize>> = vec![None; num_nodes];
+    let mut visited: Vec<bool> = vec![false; num_nodes];
+
+    dist[start] = 0f64;
+
+    for _ in 0..num_nodes {
+        let u = min_distance(&dist, &visited);
+        visited[u] = true;
+
+        if u == end {
+            break;
+        }
+
+        for v in graph.all_adjacent(u) {
+            let alt = dist[u]
+                + match graph.get_entry(u, v).unwrap() {
+                    SparseEntry::Zero => {
+                        continue;
+                    }
+                    SparseEntry::NonZero(i) => *i,
+                }
+                + f64::from(10000000);
+            if alt < dist[v] {
+                dist[v] = alt;
+                prev[v] = Some(u);
+            }
+        }
+    }
+
+    if dist[end] == f64::MAX {
+        None
+    } else {
+        Some(reconstruct_path(prev, start, end))
+    }
+}
+
+fn min_distance(dist: &Vec<f64>, visited: &Vec<bool>) -> usize {
+    let mut min_dist = f64::MAX;
+    let mut min_index = 0;
+
+    for (i, &d) in dist.iter().enumerate() {
+        if !visited[i] && d <= min_dist {
+            min_dist = d;
+            min_index = i;
+        }
+    }
+
+    min_index
+}
+
+fn reconstruct_path(prev: Vec<Option<usize>>, start: usize, end: usize) -> Vec<usize> {
+    let mut path = vec![end];
+    let mut current = end;
+
+    while let Some(prev_node) = prev[current] {
+        path.push(prev_node);
+        current = prev_node;
+        if current == start {
+            break;
+        }
+    }
+
+    path.reverse();
+    path
+}
+
+fn list_airports(connections: Vec<usize>, map: &HashMap<String, usize>) -> Vec<String> {
+    let reverse_map: HashMap<usize, String> = map.iter().map(|(s, i)| (*i, s.clone())).collect();
+    connections
+        .iter()
+        .map(|i| reverse_map.get(i).unwrap().clone())
+        .collect()
+}
+
+use text_io::read;
+
+fn read_iata_code(s: &str, airports: &Vec<Airport>) -> Airport {
+    loop {
+        print!("{}", s);
+        let input: String = read!();
+        if let Some(airport) = airports.iter().find(|a| a.iata_code == input) {
+            return airport.clone();
+        } else {
+            println!("Invalid IATA code! Try again.");
+        }
+    }
+}
+
+fn get_request(airports: &Vec<Airport>, map: &HashMap<String, usize>) -> (usize, usize) {
+    let from = read_iata_code("From IATA code: ", &airports);
+    let to = read_iata_code("From IATA code: ", &airports);
+    (
+        *map.get(&from.iata_code).unwrap(),
+        *map.get(&to.iata_code).unwrap(),
+    )
+}
+
+use itertools::Itertools;
+
+fn get_weight(route: &Vec<usize>, matrix: &CsrMatrix<f64>) -> Vec<f64> {
+    route
+        .iter()
+        .tuple_windows()
+        .map(|(src, dest)| match matrix.get_entry(*src, *dest).unwrap() {
+            SparseEntry::Zero => panic!("Invlaid path!"),
+            SparseEntry::NonZero(nz) => *nz,
+        })
+        .collect()
+}
+
+fn main() {
+    // read from CSV -> Vec<Route>
+    let route_file = File::open("data/new.csv").unwrap();
+    let routes: Vec<Route> = load_data(route_file);
+    println!("Loading route data!");
+    let airports_with_route: HashSet<String> = routes
+        .iter()
+        .flat_map(|route| vec![route.src_airport.clone(), route.dest_airport.clone()])
+        .collect();
+    // Vec<Route> -> HashMap<String, uint>
+    let airports_file = File::open("data/airports.csv").unwrap();
+    println!("Loading airport data!");
+    let airports: Vec<Airport> = load_data(airports_file)
+        .into_iter()
+        .filter(|airport: &Airport| airports_with_route.get(&airport.iata_code).is_some())
+        .collect();
+    println!("Generating airport mapping!");
+    let map = airport_map(&routes);
+    // Vec<Route> + HashMap<String, usint> -> Matrix<cost of flight>
+    println!("Generating route matrix!");
+    let matrix = into_matrix(&routes, &map);
+    let (from, to) = get_request(&airports, &map);
+    let first = dijkstras(&matrix, from, to).unwrap();
+    println!("{:?}", get_weight(&first, &matrix));
+    println!("{:?}", list_airports(first, &map));
+}