Initial commit

main
Tait Hoyem 1 month ago
commit 6247a367cb

180
.gitignore vendored

@ -0,0 +1,180 @@
### Generated by gibo (https://github.com/simonwhitaker/gibo)
### https://raw.github.com/github/gitignore/4488915eec0b3a45b5c63ead28f286819c0917de/Rust.gitignore
# Generated by Cargo
# will have compiled files and executables
debug/
target/
# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
Cargo.lock
# These are backup files generated by rustfmt
**/*.rs.bk
# MSVC Windows builds of rustc generate these, which store debugging information
*.pdb
### Generated by gibo (https://github.com/simonwhitaker/gibo)
### https://raw.github.com/github/gitignore/4488915eec0b3a45b5c63ead28f286819c0917de/Python.gitignore
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/

@ -0,0 +1,18 @@
[package]
name = "final_project_cpsc3620"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
csv = "1.3.0"
itertools = "0.12.1"
nalgebra = "0.32.4"
nalgebra-sparse = "0.9.0"
num-traits = "0.2.18"
rand = "0.8.5"
rust_decimal = "1.34.3"
serde = { version = "1.0.197", features = ["derive"] }
serde_with = "3.7.0"
text_io = "0.1.12"

@ -0,0 +1,4 @@
# `cpsc3620 Presentation`
* Simply needs `cargo` (Rust's package manager)
- To change the `new.csv` file to adjust to new weights, you will need `latlon3` for Python

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -0,0 +1,36 @@
import csv
from latlon import LatLon, Latitude, Longitude
import math
# https://stackoverflow.com/questions/19412462/getting-distance-between-two-points-based-on-latitude-longitude#43211266
# https://pypi.org/project/latlon3/
airports = []
with open("airports.csv") as airport_file:
for port in csv.reader(airport_file):
airports.append(port)
with open("routes.csv") as routes_file:
with open("new.csv", "w") as routes2_file:
routes = csv.reader(routes_file)
new = csv.writer(routes2_file)
next(routes, None)
for route in routes:
p1 = None
p2 = None
for port in airports:
if port[4] == route[3]:
p1 = port
if port[4] == route[5]:
p2 = port
d1 = None
d2 = None
try:
d1 = LatLon(Latitude(float(p1[6])), Longitude(float(p1[7])))
d2 = LatLon(Latitude(float(p2[6])), Longitude(float(p2[7])))
except:
continue
route[9] = d1.distance(d2)
if str(route[9]) != "nan":
new.writerow(route)

File diff suppressed because it is too large Load Diff

@ -0,0 +1,113 @@
How Not To Write a Flight Route Planner
1. Do not use real data
2. Do not use DFS
3. Do not use BFS
4. Do not use Dijkstra's (naively)
5. Lat/Lon are not coordinates
1. Do not use real data
Why?
- It is impossible to track down bugs,
especially when you get into a loop
that repeatedly prints:
[293, 23836, 202, 223, 2235]
- It can take a long time to load.
- It may be old data (it was)
2. Do not use DFS
Why?
- It may take a long time to complete
- It needs to check all possibilities
down possibly useless rabbit
holes before checking nearer paths
- It may find a long connection
before a short one
Consider the following
depth-first-search operations:
YYC -> YYZ
YYZ -> YVR
YYC -> YVR
3. Do not use BFS
Why?
- Does not take into account any weights:
- cost of flight
- distance of airports
Consider the following
breadth-first-search operations:
YYC -> AMS
YYC -> YVR
YYC -> YYZ
AMS -> TPE
YVR -> TPE
4. Do not use Dijkstra's (naively)
Why?
- What do you use for a weight?
- cost of flight
- number of connections
- distance between airports
- Selecting one of these,
without taking into account the others,
makes for a terrible flight planner
- You may want to have additional bounds
for example: you may want to confirm
that the schedule for the flight lines up
I could not find data for this, however
Q: What happens when you only take into
account the distance between airports?
A: A flight from YYC to TPE
would look like the following:
- YYC
- MSP
- GRR
- DTW
- EWR
- MAD
- CAI
- TUU
- DMM
- BAH
- DXB
- TPE
Q: Okay, so how do you implement a flight planner?
A: Use Dijkstra's, with multiple weights.
For mine, I used:
- The distance between airports
- + a fixed cost for connections
The distance could be swapped out with cost of flight.
I couldn't find a database of fare costs.
5. Lat/Lon are not coordinates
- You can not simply use (p1.lat - p2.lat).
- THIS WILL NOT WORK!
- Use a library, or learn a bunch of math.
- I used latlon3
Libraries:
- latlon3 (Python)
- nalgebra, nalgabra_sparse,
serde, rust_decimal (Rust)
Demo

@ -0,0 +1,22 @@
use serde::{de::Deserializer, Deserialize};
pub fn default_as_none<'de, D, T>(deserializer: D) -> Result<Option<T>, D::Error>
where
T: Default + PartialEq + Deserialize<'de>,
D: Deserializer<'de>,
{
let val = T::deserialize(deserializer)?;
if val == T::default() {
Ok(None)
} else {
Ok(Some(val))
}
}
pub fn error_as_none<'de, D, T>(deserializer: D) -> Result<Option<T>, D::Error>
where
T: Default + Deserialize<'de>,
D: Deserializer<'de>,
{
Ok(T::deserialize(deserializer).ok())
}

@ -0,0 +1,238 @@
mod deserializers;
use deserializers::*;
use nalgebra::DMatrix;
use nalgebra_sparse::csr::CsrMatrix;
use nalgebra_sparse::SparseEntry;
use serde::{Deserialize, Serialize};
use std::fs::File;
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct Airport {
airport_id: i32,
name: String,
city: String,
country: String,
#[serde(rename = "IATA")]
iata_code: String,
#[serde(rename = "ICAO")]
icao_code: String,
lat: f64,
lon: f64,
altitude: i32,
timezone: String,
dst: String,
tzdb: String,
#[serde(rename = "type")]
port_type: String,
source: String,
}
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct Route {
airline: String,
#[serde(deserialize_with = "error_as_none")]
airline_id: Option<i32>,
src_airport: String,
#[serde(deserialize_with = "error_as_none")]
src_airport_id: Option<i32>,
dest_airport: String,
#[serde(deserialize_with = "error_as_none")]
dest_airport_id: Option<i32>,
#[serde(deserialize_with = "default_as_none")]
codeshare: Option<String>,
#[serde(deserialize_with = "error_as_none")]
stops: Option<i32>,
planes: String,
cost: f64,
}
use std::collections::{BTreeSet, HashMap, HashSet};
fn airport_map(routes: &Vec<Route>) -> HashMap<String, usize> {
let airports: BTreeSet<String> = routes
.iter()
.flat_map(|route| vec![route.src_airport.clone(), route.dest_airport.clone()])
.collect();
let airport_map: HashMap<String, usize> = airports
.into_iter()
.enumerate()
.map(|(i, k)| (k, i))
.collect();
airport_map
}
fn into_matrix(routes: &Vec<Route>, airport_map: &HashMap<String, usize>) -> CsrMatrix<f64> {
let num_airports = airport_map.len();
let mut adjacency_matrix = DMatrix::<f64>::zeros(num_airports, num_airports);
routes.iter().for_each(|route| {
let src_idx = airport_map[&route.src_airport];
let dest_idx = airport_map[&route.dest_airport];
adjacency_matrix[(src_idx, dest_idx)] = route.cost;
});
CsrMatrix::from(&adjacency_matrix)
}
fn load_data<R, T>(read: R) -> Vec<T>
where
T: for<'a> Deserialize<'a> + std::fmt::Debug,
R: std::io::Read,
{
let mut data = vec![];
let mut rdr = csv::Reader::from_reader(read);
for result in rdr.deserialize() {
let record: T = result.unwrap();
data.push(record);
}
data
}
trait Adjacent {
fn all_adjacent(&self, src: usize) -> Vec<usize>;
}
impl Adjacent for CsrMatrix<f64> {
fn all_adjacent(&self, src: usize) -> Vec<usize> {
self.row(src).col_indices().iter().cloned().collect()
}
}
// returns a list of a set of connections; these are guarenteed to be unique
fn dijkstras(graph: &CsrMatrix<f64>, start: usize, end: usize) -> Option<Vec<usize>> {
let num_nodes = graph.nrows();
let mut dist: Vec<f64> = vec![f64::MAX; num_nodes];
let mut prev: Vec<Option<usize>> = vec![None; num_nodes];
let mut visited: Vec<bool> = vec![false; num_nodes];
dist[start] = 0f64;
for _ in 0..num_nodes {
let u = min_distance(&dist, &visited);
visited[u] = true;
if u == end {
break;
}
for v in graph.all_adjacent(u) {
let alt = dist[u]
+ match graph.get_entry(u, v).unwrap() {
SparseEntry::Zero => {
continue;
}
SparseEntry::NonZero(i) => *i,
}
+ f64::from(10000000);
if alt < dist[v] {
dist[v] = alt;
prev[v] = Some(u);
}
}
}
if dist[end] == f64::MAX {
None
} else {
Some(reconstruct_path(prev, start, end))
}
}
fn min_distance(dist: &Vec<f64>, visited: &Vec<bool>) -> usize {
let mut min_dist = f64::MAX;
let mut min_index = 0;
for (i, &d) in dist.iter().enumerate() {
if !visited[i] && d <= min_dist {
min_dist = d;
min_index = i;
}
}
min_index
}
fn reconstruct_path(prev: Vec<Option<usize>>, start: usize, end: usize) -> Vec<usize> {
let mut path = vec![end];
let mut current = end;
while let Some(prev_node) = prev[current] {
path.push(prev_node);
current = prev_node;
if current == start {
break;
}
}
path.reverse();
path
}
fn list_airports(connections: Vec<usize>, map: &HashMap<String, usize>) -> Vec<String> {
let reverse_map: HashMap<usize, String> = map.iter().map(|(s, i)| (*i, s.clone())).collect();
connections
.iter()
.map(|i| reverse_map.get(i).unwrap().clone())
.collect()
}
use text_io::read;
fn read_iata_code(s: &str, airports: &Vec<Airport>) -> Airport {
loop {
print!("{}", s);
let input: String = read!();
if let Some(airport) = airports.iter().find(|a| a.iata_code == input) {
return airport.clone();
} else {
println!("Invalid IATA code! Try again.");
}
}
}
fn get_request(airports: &Vec<Airport>, map: &HashMap<String, usize>) -> (usize, usize) {
let from = read_iata_code("From IATA code: ", &airports);
let to = read_iata_code("From IATA code: ", &airports);
(
*map.get(&from.iata_code).unwrap(),
*map.get(&to.iata_code).unwrap(),
)
}
use itertools::Itertools;
fn get_weight(route: &Vec<usize>, matrix: &CsrMatrix<f64>) -> Vec<f64> {
route
.iter()
.tuple_windows()
.map(|(src, dest)| match matrix.get_entry(*src, *dest).unwrap() {
SparseEntry::Zero => panic!("Invlaid path!"),
SparseEntry::NonZero(nz) => *nz,
})
.collect()
}
fn main() {
// read from CSV -> Vec<Route>
let route_file = File::open("data/new.csv").unwrap();
let routes: Vec<Route> = load_data(route_file);
println!("Loading route data!");
let airports_with_route: HashSet<String> = routes
.iter()
.flat_map(|route| vec![route.src_airport.clone(), route.dest_airport.clone()])
.collect();
// Vec<Route> -> HashMap<String, uint>
let airports_file = File::open("data/airports.csv").unwrap();
println!("Loading airport data!");
let airports: Vec<Airport> = load_data(airports_file)
.into_iter()
.filter(|airport: &Airport| airports_with_route.get(&airport.iata_code).is_some())
.collect();
println!("Generating airport mapping!");
let map = airport_map(&routes);
// Vec<Route> + HashMap<String, usint> -> Matrix<cost of flight>
println!("Generating route matrix!");
let matrix = into_matrix(&routes, &map);
let (from, to) = get_request(&airports, &map);
let first = dijkstras(&matrix, from, to).unwrap();
println!("{:?}", get_weight(&first, &matrix));
println!("{:?}", list_airports(first, &map));
}
Loading…
Cancel
Save