Intro

Occasionally CATMAID users add the soma tag multiple times in error. We can find such neurons and generate CATMAID URLs to inspect them.

Setup

First load main packages

library(elmr)
cl=try(catmaid_login())
catmaid_available=inherits(cl, "catmaid_connection")
library(knitr)
# only run if catmaid available
# and cache so only run once per day
opts_chunk$set(eval=inherits(cl, "catmaid_connection"),cache.extra=Sys.Date())
library(dplyr)
rgl::setupKnitr()

Finding neurons with multiple somata

First let’s fetch information about all the labels (aka tags) applied to nodes in the current project.

label_stats=catmaid_get_label_stats()

Now let’s restrict to cases where there are multiple soma tags per skeleton

# select soma labels
soma_labels = label_stats %>%
  filter(labelName == 'soma') %>%
  group_by(skeletonID)

# select skeleton ids for neurons with multiple cell bodies
multiple_soma = soma_labels %>%
  count(skeletonID) %>%
  filter(n > 1) %>%
  arrange(desc(n))
  multiple_soma_info = soma_labels %>%
  filter(skeletonID %in% multiple_soma$skeletonID)

multiple_soma_info = soma_labels %>% 
  filter(skeletonID%in% multiple_soma$skeletonID)

XYZ position of the nodes we picked

# wrapper for tree node details
node_details <- function(tnid) {
  res=catmaid_get_treenodes_detail(tnid)
  ul=catmaid_get_user_list()
  res$login=ul$login[match(res$user_id, ul$id)]
  # we expect these to be capitalised elsewhere
  cn=colnames(res)
  cn[cn%in%c("x","y","z")]=toupper(cn[cn%in%c("x","y","z")])
  colnames(res) <- cn
  res[c("X","Y","Z","radius","login")]
}

# note that we need to transpose the results of vnode_xyz to get X,Y,Z columns
multiple_soma_info <- cbind(as.data.frame(multiple_soma_info),
                            node_details(multiple_soma_info$treenodeID))

Now let’s calculate the distance from the neuropil surface

multiple_soma_info$d=pointsinside(xyzmatrix(multiple_soma_info),
                                  FAFB.surf, rval = 'distance')
multiple_soma_info %>% 
  arrange(skeletonID, d) %>% 
  group_by(skeletonID) %>% 
  mutate(rank=row_number()) -> multiple_soma_info
kable(multiple_soma_info)
labelID labelName skeletonID treenodeID X Y Z radius login d rank
2773 soma 5714 33700955 589956 204332 13000 2354 morrisw -9264.1611 1
2773 soma 5714 33614047 570552 242540 60840 -1 morrisw -1587.8125 2
2773 soma 57019 16326951 403054 289495 79400 1848 robertsr -7490.9688 1
2773 soma 57019 40491965 441823 223466 41960 -1 drummondn 13260.9961 2
2773 soma 861352 32450161 305410 253275 277760 1856 flynnm -16501.6250 1
2773 soma 861352 3628236 305999 253240 277160 -1 reiserm -16098.1562 2
2773 soma 1277563 32595025 493176 130138 194240 3052 liangk -18194.5312 1
2773 soma 1277563 31821189 406528 112291 111400 -1 liangk 13397.7500 2
2773 soma 1363077 14837366 413621 234503 71280 1754 elbahnasawim -6167.0312 1
2773 soma 1363077 32130748 416874 258416 154440 -1 robertsr 12032.3301 2
2773 soma 2052519 32491695 346011 162977 114800 1896 liangk -18326.4062 1
2773 soma 2052519 31439008 427341 107547 128760 -1 liangk 28985.4023 2
2773 soma 2851242 6906660 320104 156382 176400 1436 robertsr -6780.7656 1
2773 soma 2851242 40430928 348144 139189 156640 -1 batesa 10451.1250 2
2773 soma 2981229 32772105 388611 278461 94640 -1 tenshawe -1777.1562 1
2773 soma 2981229 32773150 385850 279970 97120 3298 tenshawe -1602.5625 2
2773 soma 3106997 13589617 513563 144817 165880 1597 masoodpanahn -1645.4375 1
2773 soma 3106997 13588952 519345 161710 160000 1745 masoodpanahn 1982.2188 2
2773 soma 3770805 5351518 492982 205520 208080 2002 taiszi -3426.4688 1
2773 soma 3770805 35194209 498927 213740 126920 -1 yangt 8333.3467 2
2773 soma 4624362 24611820 425758 295346 56240 1415 costam -18363.3594 1
2773 soma 4624362 24608487 468503 241012 85000 -1 costam 13008.8760 2
2773 soma 4632091 15744327 414425 289909 63520 2325 schlegelp -9671.7734 1
2773 soma 4632091 33601900 459731 243545 76280 -1 baileys 13737.2520 2
2773 soma 5322252 17825737 309447 274776 263160 1580 moranc -8763.7500 1
2773 soma 5322252 17820926 305023 264242 228600 -1 moranc 7897.3750 2
2773 soma 6543698 21717689 493385 366906 147440 1751 polskyj -8782.2188 1
2773 soma 6543698 33347169 428034 269639 130320 -1 popvicip 20277.0898 2
2773 soma 6645033 6781342 617115 261993 67160 2598 calles 227.3750 1
2773 soma 6645033 23129236 491239 256409 47840 -1 dacksa 13924.7422 2
2773 soma 6645033 23129252 490647 256660 47680 -1 dacksa 14504.9561 3
2773 soma 7739698 25182877 407164 203029 77320 2090 polskyj -3244.8906 1
2773 soma 7739698 31895604 420504 213379 110760 -1 yangt 24896.4688 2
2773 soma 8818720 29105950 366934 185972 212200 1947 polskyj -5813.6694 1
2773 soma 8818720 34480826 440158 218183 182160 -1 tenshawe 27790.7148 2
2773 soma 9042276 35389521 712381 162830 91600 2049 morrisw -13003.1250 1
2773 soma 9042276 27781678 712400 161918 93280 -1 frechters -12390.2480 2
2773 soma 9416924 35178955 311637 139496 195040 1915 ludwigh -16750.4785 1
2773 soma 9416924 13377290 391244 211268 170600 -1 batesa 14531.7012 2
2773 soma 10108062 33531079 419915 224525 51080 -1 coatesk -3335.2869 1
2773 soma 10108062 40684585 513639 219860 46640 -1 sweetn 700.5312 2
2773 soma 10234277 34371718 493445 360456 122800 2392 eichlerk -4578.3750 1
2773 soma 10234277 36336623 493429 360356 122560 2197 jefferis -4491.7812 2
2773 soma 10289205 34018110 184724 272445 209080 1775 flynnm -28870.6875 1
2773 soma 10289205 42557407 239435 258372 211640 -1 ludwigh 25299.2500 2

This lets us see that in some cases there are two soma tags outside the neuropil (negative d) and close together - these are probably duplicates - whereas in other cases it is likely that points were added in error. We can also plot the points colouring them by their rank order (most external first).

# make a colour palette with as many entries as the maximum number of soma 
# tags in a neuron
pal=rainbow(max(multiple_soma_info$rank))
multiple_soma_info %>% 
  with(expr = spheres3d(X,Y,Z, col=pal[rank], rad=2000))
plot3d(FAFB)
par3d(zoom=.6)

Now we can use this information to construct an url for each node.

multiple_soma_info %>%
  rowwise() %>%
  mutate(url = open_fafb(
    cbind(X, Y, Z),
    active_skeleton_id = skeletonID,
    active_node_id = treenodeID,
    open = FALSE
  )) -> multiple_soma_info

It might be useful to know who ‘owns’ each neuron. I think the simplest way to assign this is by the user who has traced most nodes for each skeleton (since there may be different users responsible for each soma).

get_top_user <- function(x, ...) {
  ul=catmaid_get_user_list(...)
  # save time by checking unique skids only
  ddx=unique(x)
  gtu_one <- function(x, ...) {
    t <- try({
      res=catmaid_get_contributor_stats(x, ...)
      w=which.max(res$node_contributors$n)
      ul$login[match(res$node_contributors$id[w], ul$id)]
    })
    if(inherits(t, 'try-error')) NA_character_ else t
  }
  topus <- sapply(ddx, gtu_one)
  topus[match(x, ddx)]
}

Google sheet

Let’s make a google sheet with all those urls that we can then review manually:

library(googlesheets)
# helper function to upload via temp file
# since writing cells via API is very slow
gs_upload_tf <- function(x, ...) {
  tf=tempfile(fileext = '.tsv')
  on.exit(unlink(tf))
  write.table(x, file=tf, sep="\t", row.names = FALSE)
  gs_upload(tf, ...)
}

multiple_soma_info %>% 
  arrange(skeletonID, d) %>% 
  group_by(skeletonID) %>%
  mutate(user=get_top_user(skeletonID)) %>%
  gs_upload_tf(sheet_title = 'multi_soma_neurons')

As an alternative we can divide that up with one worksheet per user.

library(googlesheets)
gs <- googlesheets::gs_new("multi_soma_neurons_by_user")
gs_add_sheet <- function(x, gs, ...) {
  gs_ws_new(row_extent = nrow(x)+1, col_extent = ncol(x), ss = gs, ..., input=x, col_names=T)
}
multiple_soma_info %>% 
  arrange(skeletonID, d) %>% 
  mutate(user=factor(get_top_user(skeletonID))) -> msi2

for(u in levels(msi2$user)) {
  gs_add_sheet(subset(msi2, user==u), gs, ws_title=u)
  cat(".")
}