Observation on Suspicious Fishing Behaviours

Author

Group 1 - Arya Siahaan, Seng Jing Yi, Thet Myat Noe

Published

June 24, 2024

Objectives

In this article, we will explore the possible signs of suspicious fishing behaviour in relation to IUU.

IUU Possible visualisations
Illegal: Unauthorised fishing by vessels that in restricted areas (e.g., Ecological Reserves). Identifying vessels that have overstayed in the ecological preserves.
Unregulated: Overfishing that depletes natural fish stocks and disrupts marine ecosystems. Identify vessels that have consistently out-perform comparable peers, and potentially suggest fishing beyond allowable limits.
Unreported: Fishing activities that are not reported or under-reported to ports which hampers accurate assessment of fish stocks. Identify suspicious vessels that have changed their route to avoid “Ghoti Reserve” after SSEC was caught.
Code
pacman::p_load(jsonlite, tidyverse, tidyr, 
               knitr, lubridate, dplyr, 
               igraph, ggraph, ggdist, ggplot2, 
               SmartEDA, sf, tidygraph, reshape2, readr,
               DT, patchwork,plotly, RColorBrewer)

tx_qty <- read_csv("data/tx_qty.csv")
ping_activity <- read_csv("data/ping_activity.csv")
E_Hbrpt_v <- read_csv("data/hbrpt.csv")
N_vessel <- read_csv("data/N_vessel.csv")
#location_legend <- read_csv("data/location_legend.csv")
#vessel_movement <- read_rds("data/rds/vessel_movement_data.rds")
nearest_tx_date <- read_csv("data/mapped_records.csv")
time_summary_df <- read_csv("data/time-summary.csv")

Unauthorised - Vessels with Possibility of Illegal Fishing

Zooming into the 3 ecological preserves, we observed the Transponder Ping records to identify any fishing vessels that have overstayed their visits.

Steps taken:

  1. Summarise the median dwell time of vessels per month at each area.
  2. Identify those vessels that is of the top 90% of the area per month and include a counter Is above 90%. Where vessels are flagged at least once, return these vessels details for investigation.
Code
ping_activity$start_time <- ymd_hms(ping_activity$start_time)
ping_activity$month <- floor_date(ping_activity$start_time, "month")

# Convert the month column to Date format (first day of each month)
ping_activity$month <- as.Date(ping_activity$month)

# Calculate median dwell time per vessel_id per month for each area
agg_data <- ping_activity %>%
  group_by(area, vessel_id, month) %>%
  summarise(median_dwell = median(dwell, na.rm = TRUE)) %>%
  ungroup()

agg_data2 <- agg_data %>%
  group_by(area, month) %>%
  mutate(`90%_dwell_time` = quantile(median_dwell, 0.90, na.rm = TRUE)) %>%
  mutate(`Is_above_90%` = ifelse(median_dwell > `90%_dwell_time`, "Yes", "No")) %>%
  ungroup()

# count the number of times that vessel stayed >90% dwell time
summary_data <- agg_data2 %>%
  filter(area %in% c( "Ghoti Preserve", "Nemo Reef", "Don Limpet Preserve"), 
         `Is_above_90%` == "Yes") %>%
  group_by(vessel_id, area) %>%
  summarise(count_above_90 = n()) %>%
  filter(count_above_90 >=2) %>% ungroup()

summary_data_DT <- summary_data %>% 
  left_join(N_vessel %>% select("vessel_id", "vessel_name", "vessel_company", "vessel_type", "tonnage", "length_overall", "flag_country"),  
            by = c("vessel_id" = "vessel_id"))

summary_data_DT <- summary_data_DT %>%  select(-vessel_id) %>% select("area", "count_above_90", "vessel_name", "vessel_company", "vessel_type", "tonnage", "length_overall", "flag_country")

colnames(summary_data_DT) <- c("Overstayed Area", "Frequency", "Vessel Name", "Vessel Company", "Vessel Type", "Tonnage", "Overall Length", "Country Flag")

ghoti_overstay <- summary_data_DT %>% filter(`Overstayed Area` == "Ghoti Preserve")
nemo_overstay <- summary_data_DT %>% filter(`Overstayed Area` == "Nemo Reef")
don_overstay <- summary_data_DT %>% filter(`Overstayed Area` == "Don Limpet Preserve")

datatable(ghoti_overstay, options (pageLength = 3))  # 12 vessels
Code
datatable(nemo_overstay, options (pageLength = 3))   #6 vessels 
Code
datatable(nemo_overstay, options (pageLength = 2))  # 2 vessels         

Ecological Preserve 1 - Ghoti Preserve

We further observed that there maybe some false positive, when visualising on the Transponder Ping data set. Notably suspicious vessels include: Gunard Grabber, Sockeye Salmon Seeker, Largemouth Bass Looter, Brill Bandit, Sea Bass Bandit

We observed that despite SSEC’s Snapper Snatcher being caught in May 2035, some vessels seek cover for the month of June, while others continued their operations from July 2035 onwards.

Code
selected_vessels <- ghoti_overstay$`Vessel Name`

# Filter data for the specified area
subset_data <- ping_activity %>%
  filter(area == "Ghoti Preserve") %>%
  mutate(month = floor_date(ymd_hms(start_time), "month"))

# Calculate the median dwell time per vessel per month
median_dwell_data <- subset_data %>%
  group_by(month, vessel_name) %>%
  summarize(median_dwell = median(dwell, na.rm = TRUE)) %>%
  ungroup() %>% mutate(month = as.Date(month))

# Create a new column to categorize vessels into 'Selected' and 'Other'
median_dwell_data <- median_dwell_data %>%
  mutate(vessel_category = ifelse(vessel_name %in% selected_vessels, vessel_name, "Other"))

# Create a unique color for each vessel
unique_vessels <- unique(median_dwell_data$vessel_name)
n_colors <- length(unique_vessels)
color_palette <- colorRampPalette(brewer.pal(12, "Paired"))(n_colors)
vessel_colors <- setNames(color_palette, unique_vessels)

# Add black color for "Other"
vessel_colors["Other"] <- "black"

# Create the plot
p <- ggplot(median_dwell_data, aes(x = month, y = median_dwell, color = vessel_category)) +
  geom_boxplot(aes(group = month), outlier.shape = NA) +
  geom_jitter(aes(text = paste("Vessel Name:", vessel_name)), width = 1, size = 1.5, alpha = 0.9) +
  scale_color_manual(values = vessel_colors) +
  scale_x_date(date_breaks = "1 month", date_labels = "%b") +  # Ensure all months are shown
  labs(title = "Median Dwell Time of Vessels by Month in Ghoti Preserve",
       x = "Month",
       y = "Median Dwell Time",
       color = "Vessel Name") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))  # Rotate labels for better readability

# Convert to interactive plotly plot
ggplotly(p, tooltip = "text")

Ecological Preserve 2 - Nemo Reefs

We further repeated this test for Nemo Reefs, and noted vessels such as “Mr Ray” and “Louie the II” consistently overstayed in Nemo Reefs, and this is much higher than other vessels. As well as “Clown Fish” and “Bluefish Bandit” with fluctuating median dwell time above the 75%.

Note: The vessel of “Heart of Oceanus” was excluded as it has exceptionally long dwell time in February 2035 that distorted the plot. “Costa Smeralda” and “Dolphin Dasher” excluded from April 2035 plot.

Code
selected_vessels <- nemo_overstay$`Vessel Name`

# Filter data for the specified area
subset_data <- ping_activity %>%
  filter(area == "Nemo Reef", vessel_id != "heartofoceanusf11", 
         vessel_name != "Costa Smeralda", vessel_name != "Dolphin Dasher") %>%
  mutate(month = floor_date(ymd_hms(start_time), "month"))

# Calculate the median dwell time per vessel per month
median_dwell_data <- subset_data %>%
  group_by(month, vessel_name) %>%
  summarize(median_dwell = median(dwell, na.rm = TRUE)) %>%
  ungroup() %>% mutate(month = as.Date(month))

# Create a new column to categorize vessels into 'Selected' and 'Other'
median_dwell_data <- median_dwell_data %>%
  mutate(vessel_category = ifelse(vessel_name %in% selected_vessels, vessel_name, "Other"))

# Create a unique color for each vessel
unique_vessels <- unique(median_dwell_data$vessel_name)
n_colors <- length(unique_vessels)
color_palette <- colorRampPalette(brewer.pal(12, "Paired"))(n_colors)
vessel_colors <- setNames(color_palette, unique_vessels)

# Add black color for "Other"
vessel_colors["Other"] <- "black"

# Create the plot
p2 <- ggplot(median_dwell_data, aes(x = month, y = median_dwell, color = vessel_category)) +
  geom_boxplot(aes(group = month), outlier.shape = NA) +
  geom_jitter(aes(text = paste("Vessel Name:", vessel_name)), width = 1, size = 1.5, alpha = 0.9) +
  scale_color_manual(values = vessel_colors) +
  scale_x_date(date_breaks = "1 month", date_labels = "%b") +  # Ensure all months are shown
  labs(title = "Median Dwell Time of Vessels by Month in Nemo Reef",
       x = "Month",
       y = "Median Dwell Time",
       color = "Vessel Name") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))  # Rotate labels for better readability

# Convert to interactive plotly plot
ggplotly(p2, tooltip = "text")

Ecological Preserve 3 - Don Limpet Preserve

For Don Limpet Preserve, we noted 2 vessels with exceptionally long dwell time - Himark Royal and Mr Ray. However, for Himark Royal, the median time per month is relatively constant, and this may point to the vessel being permanently located there. Instead, Mr Ray is more suspicious with fluctuating activities in September 2035 and November 2035.

Code
selected_vessels <- don_overstay$`Vessel Name`

# Filter data for the specified area
subset_data <- ping_activity %>%
  filter(area == "Don Limpet Preserve") %>%
  mutate(month = floor_date(ymd_hms(start_time), "month"))

# Calculate the median dwell time per vessel per month
median_dwell_data <- subset_data %>%
  group_by(month, vessel_name) %>%
  summarize(median_dwell = median(dwell, na.rm = TRUE)) %>%
  ungroup() %>% mutate(month = as.Date(month))

# Create a new column to categorize vessels into 'Selected' and 'Other'
median_dwell_data <- median_dwell_data %>%
  mutate(vessel_category = ifelse(vessel_name %in% selected_vessels, vessel_name, "Other"))

# Create a unique color for each vessel
unique_vessels <- unique(median_dwell_data$vessel_name)
n_colors <- length(unique_vessels)
color_palette <- colorRampPalette(brewer.pal(12, "Paired"))(n_colors)
vessel_colors <- setNames(color_palette, unique_vessels)

# Add black color for "Other"
vessel_colors["Other"] <- "black"

# Create the plot
p3 <- ggplot(median_dwell_data, aes(x = month, y = median_dwell, color = vessel_category)) +
  geom_boxplot(aes(group = month), outlier.shape = NA) +
  geom_jitter(aes(text = paste("Vessel Name:", vessel_name)), width = 1, size = 1.5, alpha = 0.9) +
  scale_color_manual(values = vessel_colors) +
  scale_x_date(date_breaks = "1 month", date_labels = "%b") +  # Ensure all months are shown
  labs(title = "Median Dwell Time of Vessels by Month in Nemo Reef",
       x = "Month",
       y = "Median Dwell Time",
       color = "Vessel Name") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))  # Rotate labels for better readability

# Convert to interactive plotly plot
ggplotly(p3, tooltip = "text")

Unregulated - Vessels with possibility of Overfishing

Another sign of suspicious activity is where the vessel or it’s company is able to consistently return high volumes of catch, comparing it with its peers.

Steps taken:

  1. Create a calculated field for the column on Percent_fill to see the relative proportion the cargo associated with the vessel occupies for each cargo-vessel mapping.
  2. For vessels whose Percent_fill contributes to the top 90 percentile per month, highlight these vessel in new column of Percent_fill_above_90
  3. Return the count of suspicious vessel who is able to return Percent_fill_above_90 for 20 or more times.

We observed a total of 19 vessels have consistently returned cargo with high percentage filled of their vessels. This may be due to small vessel size or possibility of over-fishing. Hence, we will further investigate these 19 vessels.

To test out, we will be focusing on the 3 species:

  1. Beauvoir - That can be found in all ecological preserves and fishing grounds.
  2. Harland - Only found in Tuna Shelf
  3. Wrasse - Found in Wrasse Beds, Ghoti Preserve and Nemo Reefs and contributing to the highest quantity, besides Cod that peaks during September and October.

Out of these 19 vessels, we note 2 foreign fishing vessels, namely:

  1. Vessel of “Redfin Pickerel Raider” from the company “Solis PLC” from the nation of “Solterrix”
  2. Vessel of “Bigeye Tuna Buccaneer” from the company “Gomez-Mccormick” from the nation of “Calabrand”.
Code
fish_species_labels <- c(
  "gadusnspecificatae4ba" = "Cod/Gadus n.specificatae", 
  "piscesfrigus900" = "Birdseye/Pisces frigus", 
  "piscesfoetidaae7" = "Sockfish/Pisces foetida", 
  "labridaenrefert9be" = "Wrasse/Labridae n.refert", 
  "habeaspisces4eb" = "Beauvoir/Habeas pisces", 
  "piscissapidum9b7" = "Harland/Piscis sapidum", 
  "thunnininveradb7" = "Tuna/Thunnini n.vera", 
  "piscisosseusb6d" = "Offidiaa/Piscis osseus", 
  "piscessatisb87" = "Helenaa/Pisces satis", 
  "oncorhynchusrosea790" = "Salmon"
)

vessel_cargo_map <- nearest_tx_date %>% 
  left_join(N_vessel, by = c("vessel_id" = "vessel_id")) %>% 
  group_by(cargo_id, tx_date, vessel_id, fish_species) %>%
  summarise(percent_fill = qty_tons/tonnage)

vessel_cargo_map <- vessel_cargo_map %>% mutate(tx_date = as.Date(tx_date))

vessel_cargo_map <- vessel_cargo_map %>%
  group_by(fish_species, month = floor_date(tx_date, "month")) %>%
  mutate(monthly_mean = mean(percent_fill, na.rm = TRUE),
         percentile_90 = quantile(percent_fill, 0.90, na.rm = TRUE)) %>%
  mutate(percent_fill_above_90 = ifelse(percent_fill > percentile_90, "Yes", "No")) %>%
  ungroup()

vessel_percent_fill_counts <- vessel_cargo_map %>%
  filter(percent_fill_above_90 == "Yes") %>%
  group_by(vessel_id) %>%
  summarize(count_above_90 = n()) %>%
  ungroup()

# Filter for vessel_id with count >= 5
filtered_vessel_counts <- vessel_percent_fill_counts %>%
  filter(count_above_90 >= 20)

# Summarising for table 
sus_overfish_cargo <- nearest_tx_date %>% 
  filter(vessel_id %in% filtered_vessel_counts$vessel_id) %>%
  group_by(vessel_id, fish_species) %>%
  summarize(total_qty_tons = sum(qty_tons, na.rm = TRUE),
            distinct_cargo_count = n_distinct(cargo_id)) %>%
  ungroup()
            
N_vessel_overfish <- N_vessel %>% filter(vessel_id %in% sus_overfish_cargo$vessel_id) %>% select(c("vessel_name", "vessel_company", "vessel_type", "tonnage", "length_overall", "flag_country")) 

colnames(N_vessel_overfish) <- c("Vessel Name", "Vessel Company", "Vessel Type", "Tonnage", "Overall Length", "Country Flag")

datatable(N_vessel_overfish, options(page_length = 3))

Disrupted - Diversion from Ghoti Preserve after SSEC was caught

Lastly, we considered any drastic shift in vessel routes, especially away from Ghoti Reserve, where SSEC’s Snapper Snatcher has trespass.

Steps taken:

  1. Subset the trips taken per vessel into 3 time periods. (1) Before SSEC was caught (before 14 May 2035), (2) Immediately after SSEC was caught, for the month of June, and (3) July onwards.
  2. Listed down the unique areas visited by the vessel and did a comparison on the changes in areas that the vessel has Avoided in June , Avoided_after May and Newly visited after May.

We observed that there are 29 vessels that have previously visited “Ghoti Preserve” in May and before, but has purposefully avoided it in June 2035 and returned again in July onwards. While the change in travel route may be due to coincidence, there’s also a high possibility these vessels were intentionally avoiding scrutiny from the authorities for their dubious activities in June, and resumed from July onwards.

Out of this 29 vessels, we observed the usual suspects of Oceanus fishing vessels “Bigeye Tuna Buccaneer”, “Barracuda Bandit” and “Fish Finder”. And 3 foreign vessels, 2 of which were suspects of overfishing as mentioned earlier.

  1. (New) Vessel of “Sockeye Salmon Seeker”, from company of “Mcpherson-Wright” from nation of “Kethilim”
  2. (Overfishing Suspect) Vessel of “Redfin Pickerel Raider” from the company “Solis PLC” from the nation of “Solterrix”
  3. (Overfishing Suspect) Vessel of “Bigeye Tuna Buccaneer” from the company “Gomez-Mccormick” from the nation of “Calabrand”.
Code
vessel_trips <- time_summary_df %>%
  mutate(start_date = as.Date(start_date))

# Define the cutoff date for 14 May 2035
cutoff_date <- as.Date("2035-05-14")

trips_before_cutoff <- vessel_trips %>%
  filter(start_date <= cutoff_date)

# Filter trips that started in June 2035
trips_in_june <- vessel_trips %>%
  filter(start_date >= as.Date("2035-06-01") & start_date < as.Date("2035-07-01"))

# Filter trips that started after 14 May 2035
trips_after_cutoff <- vessel_trips %>%
  filter(start_date > cutoff_date)

# Function to get unique areas
get_unique_areas <- function(data) {
  data %>%
    pull(unique_areas) %>%
    strsplit(", ") %>%
    unlist() %>%
    unique()
}

# Get unique areas per vessel before the cutoff date
areas_before_cutoff <- trips_before_cutoff %>%
  group_by(vessel_id) %>%
  summarize(areas_before = list(get_unique_areas(cur_data())))

# Get unique areas per vessel after the cutoff date
areas_after_cutoff <- trips_after_cutoff %>%
  group_by(vessel_id) %>%
  summarize(areas_after = list(get_unique_areas(cur_data())))

# Get unique areas per vessel in June 2035
areas_in_june <- trips_in_june %>%
  group_by(vessel_id) %>%
  summarize(areas_june = list(get_unique_areas(cur_data())))

# Combine the data
combined_areas <- full_join(areas_before_cutoff, areas_after_cutoff, by = "vessel_id") %>%
  full_join(areas_in_june, by = "vessel_id")

# Determine new areas visited, old areas avoided, and old areas avoided in June for each vessel
results <- combined_areas %>%
  rowwise() %>%
  mutate(new_areas = if(length(setdiff(areas_after, areas_before)) > 0) 
                        paste(setdiff(areas_after, areas_before), collapse = ", ") else NA,
         old_areas_avoided = if(length(setdiff(areas_before, areas_after)) > 0) 
                                paste(setdiff(areas_before, areas_after), collapse = ", ") else NA,
         old_areas_avoided_june = if(length(setdiff(areas_before, areas_june)) > 0) 
                                    paste(setdiff(areas_before, areas_june), collapse = ", ") else NA,
         new_areas_count = ifelse(is.na(new_areas), 0, length(unlist(strsplit(new_areas, ", ")))),
         old_areas_avoided_count = ifelse(is.na(old_areas_avoided), 0, length(unlist(strsplit(old_areas_avoided, ", ")))),
         old_areas_avoided_june_count = ifelse(is.na(old_areas_avoided_june), 0, length(unlist(strsplit(old_areas_avoided_june, ", "))))
         ) %>%
  ungroup()

# Add new columns to identify vessels that avoided "Ghoti Preserve" in June but returned after the cutoff period
results <- results %>%
  mutate(avoided_ghoti_june = ifelse(grepl("Ghoti Preserve", old_areas_avoided_june), TRUE, FALSE),
         returned_to_ghoti_after_cutoff = ifelse(avoided_ghoti_june & grepl("Ghoti Preserve", areas_after), TRUE, FALSE))

# Select relevant columns for the final output
final_results <- results %>%
  select(vessel_id, new_areas, old_areas_avoided, old_areas_avoided_june, new_areas_count, old_areas_avoided_count, old_areas_avoided_june_count, avoided_ghoti_june, returned_to_ghoti_after_cutoff)


# Print the results
datatable(results, options = list(pagelength = 10), filter = "top")
Code
subset_avoid_ghoti_june_returned_aft <- final_results %>% filter(avoided_ghoti_june == TRUE & returned_to_ghoti_after_cutoff == TRUE)

N_vessel_avoid_ghoti <- N_vessel %>% 
  filter(vessel_id %in% subset_avoid_ghoti_june_returned_aft$vessel_id) %>%
  select(c("vessel_name", "vessel_company", "vessel_type", 
           "tonnage", "length_overall", "flag_country")) 

colnames(N_vessel_avoid_ghoti) <- c("Vessel Name", "Vessel Company", "Vessel Type", "Tonnage", "Overall Length", "Country Flag")

datatable(N_vessel_avoid_ghoti, options = list(pageLength = 3, autowidth = TRUE))

Illustration of Vessels Avoiding Ghoti in June and Returning in

Code
N_vessel_avoid_ghoti_c <- N_vessel_avoid_ghoti %>% 
  filter(`Vessel Name` != "White Marlin Master", 
         `Vessel Name` !=  "Ocean Oracle", 
         `Vessel Name` !=  "Sailfish Swindler")

selected_vessels <- N_vessel_avoid_ghoti_c$`Vessel Name`

# Filter data for the specified area
subset_data <- ping_activity %>%
  filter(area == "Ghoti Preserve") %>%
  mutate(month = floor_date(ymd_hms(start_time), "month"))

# Calculate the median dwell time per vessel per month
median_dwell_data <- subset_data %>%
  group_by(month, vessel_name) %>%
  summarize(median_dwell = median(dwell, na.rm = TRUE)) %>%
  ungroup() %>% mutate(month = as.Date(month))

# Create a new column to categorize vessels into 'Selected' and 'Other'
median_dwell_data <- median_dwell_data %>%
  mutate(vessel_category = ifelse(vessel_name %in% selected_vessels, vessel_name, "Other"))

# Create a unique color for each vessel
unique_vessels <- unique(median_dwell_data$vessel_name)
n_colors <- length(unique_vessels)
color_palette <- colorRampPalette(brewer.pal(12, "Paired"))(n_colors)
vessel_colors <- setNames(color_palette, unique_vessels)

# Add black color for "Other"
vessel_colors["Other"] <- "black"

# Create the plot
p <- ggplot(median_dwell_data, aes(x = month, y = median_dwell, color = vessel_category)) +
  geom_boxplot(aes(group = month), outlier.shape = NA) +
  geom_jitter(aes(text = paste("Vessel Name:", vessel_name)), width = 1, size = 1.5, alpha = 0.9) +
  scale_color_manual(values = vessel_colors) +
  scale_x_date(date_breaks = "1 month", date_labels = "%b") +  # Ensure all months are shown
  labs(title = "Median Dwell Time of Vessels by Month in Ghoti Preserve",
       x = "Month",
       y = "Median Dwell Time",
       color = "Vessel Name") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))  # Rotate labels for better readability

# Convert to interactive plotly plot
ggplotly(p, tooltip = "text")