Article - News - RetroFox.gay

Clear sky
7.3°C

Thursday, April 9th 2026

On Query: REPLACE INTO news_data (id, data, timestamp) VALUES ('%s', '%s', %i);

MySQL Error Code: 1406

MySQL Error Msgs: Data too long for column 'data' at row 1

array(5) {
  [0]=>
  array(7) {
    ["file"]=>
    string(25) "/app/sys/base/sql.inc.php"
    ["line"]=>
    int(63)
    ["function"]=>
    string(14) "ReportSqlError"
    ["class"]=>
    string(3) "SQL"
    ["object"]=>
    object(SQL)#1 (6) {
      ["CONNECTION":"SQL":private]=>
      object(mysqli)#2 (18) {
        ["affected_rows"]=>
        int(-1)
        ["client_info"]=>
        string(14) "mysqlnd 8.2.30"
        ["client_version"]=>
        int(80230)
        ["connect_errno"]=>
        int(0)
        ["connect_error"]=>
        NULL
        ["errno"]=>
        int(1406)
        ["error"]=>
        string(40) "Data too long for column 'data' at row 1"
        ["error_list"]=>
        array(1) {
          [0]=>
          array(3) {
            ["errno"]=>
            int(1406)
            ["sqlstate"]=>
            string(5) "22001"
            ["error"]=>
            string(40) "Data too long for column 'data' at row 1"
          }
        }
        ["field_count"]=>
        int(2)
        ["host_info"]=>
        string(18) "db:3306 via TCP/IP"
        ["info"]=>
        NULL
        ["insert_id"]=>
        int(0)
        ["server_info"]=>
        string(37) "11.2.2-MariaDB-1:11.2.2+maria~ubu2204"
        ["server_version"]=>
        int(110202)
        ["sqlstate"]=>
        string(5) "22001"
        ["protocol_version"]=>
        int(10)
        ["thread_id"]=>
        int(810018)
        ["warning_count"]=>
        int(0)
      }
      ["URL":"SQL":private]=>
      string(7) "db:3306"
      ["USER":"SQL":private]=>
      string(8) "retrofox"
      ["PASS":"SQL":private]=>
      string(16) "TreeakWqQPD9qnWR"
      ["DB":"SQL":private]=>
      string(8) "retrofox"
      ["DROP_VALUES":"SQL":private]=>
      array(1) {
        [0]=>
        string(0) ""
      }
    }
    ["type"]=>
    string(2) "->"
    ["args"]=>
    array(1) {
      [0]=>
      string(69) "REPLACE INTO news_data (id, data, timestamp) VALUES ('%s', '%s', %i);"
    }
  }
  [1]=>
  array(7) {
    ["file"]=>
    string(25) "/app/sys/base/sql.inc.php"
    ["line"]=>
    int(74)
    ["function"]=>
    string(5) "Query"
    ["class"]=>
    string(3) "SQL"
    ["object"]=>
    object(SQL)#1 (6) {
      ["CONNECTION":"SQL":private]=>
      object(mysqli)#2 (18) {
        ["affected_rows"]=>
        int(-1)
        ["client_info"]=>
        string(14) "mysqlnd 8.2.30"
        ["client_version"]=>
        int(80230)
        ["connect_errno"]=>
        int(0)
        ["connect_error"]=>
        NULL
        ["errno"]=>
        int(1406)
        ["error"]=>
        string(40) "Data too long for column 'data' at row 1"
        ["error_list"]=>
        array(1) {
          [0]=>
          array(3) {
            ["errno"]=>
            int(1406)
            ["sqlstate"]=>
            string(5) "22001"
            ["error"]=>
            string(40) "Data too long for column 'data' at row 1"
          }
        }
        ["field_count"]=>
        int(2)
        ["host_info"]=>
        string(18) "db:3306 via TCP/IP"
        ["info"]=>
        NULL
        ["insert_id"]=>
        int(0)
        ["server_info"]=>
        string(37) "11.2.2-MariaDB-1:11.2.2+maria~ubu2204"
        ["server_version"]=>
        int(110202)
        ["sqlstate"]=>
        string(5) "22001"
        ["protocol_version"]=>
        int(10)
        ["thread_id"]=>
        int(810018)
        ["warning_count"]=>
        int(0)
      }
      ["URL":"SQL":private]=>
      string(7) "db:3306"
      ["USER":"SQL":private]=>
      string(8) "retrofox"
      ["PASS":"SQL":private]=>
      string(16) "TreeakWqQPD9qnWR"
      ["DB":"SQL":private]=>
      string(8) "retrofox"
      ["DROP_VALUES":"SQL":private]=>
      array(1) {
        [0]=>
        string(0) ""
      }
    }
    ["type"]=>
    string(2) "->"
    ["args"]=>
    array(4) {
      [0]=>
      string(69) "REPLACE INTO news_data (id, data, timestamp) VALUES ('%s', '%s', %i);"
      [1]=>
      string(85) "foxapi_article_SNgSFz1UYwUgnAye8Iq6AwimFnBOT9jdyRE0kXeBSMWDBytDmFUTOmTLbAFJl2KLSL4Shs"
      [2]=>
      string(89760) "{"id":"SNgSFz1UYwUgnAye8Iq6AwimFnBOT9jdyRE0kXeBSMWDBytDmFUTOmTLbAFJl2KLSL4Shs","title":"Saturation editing of RNU4-2 reveals distinct dominant and recessive disorders","description":"Recently, de novo variants in an 18-nucleotide region in the centre of RNU4-2 were shown to cause ReNU syndrome, a syndromic neurodevelopmental disorder that is predicted to affect tens of thousands of individuals worldwide1,2. RNU4-2 is a non-protein-coding gene that is transcribed into the U4 small nuclear RNA component of the major spliceosome3. ReNU syndrome variants disrupt spliceosome function and alter 5\u2032 splice site selection1,4. Here we performed saturation genome editing (SGE) of RNU4-2 to identify the functional and clinical impact of variants across the entire gene. The resulting SGE function scores, derived from variants\u2019 effects on cell fitness, discriminate ReNU syndrome variants from those observed in the population and markedly outperform in silico variant effect prediction. Using these data, we redefine the ReNU syndrome critical region at single-nucleotide resolution, resolve variant pathogenicity for variants of uncertain significance and show that SGE function scores delineate variants by phenotypic severity and the extent of observed splicing disruption. Furthermore, we identify variants affecting function in regions of RNU4-2 that are critical for interactions with other spliceosome components. We show that these variants cause a new recessive neurodevelopmental disorder that is distinct from ReNU syndrome. Together, this work defines the landscape of variant function across RNU4-2, providing critical insights for both diagnosis and therapeutic development. Saturation genome editing of RNU4-2 identifies the functional and clinical impact of variants across the entire gene and delineates variants that\u00a0cause a new recessive neurodevelopmental disorder distinct from ReNU syndrome.","author":"Findlay, Gregory M.","source":"Nature","publish":"April 8, 2026, 9:45 pm","canonical":"https:\/\/www.nature.com\/articles\/s41586-026-10334-9","text":["Main\r\nThe spliceosome is a large ribonucleoprotein complex that mediates RNA splicing. De novo variants in a gene encoding one of the small nuclear RNA (snRNA) components of the spliceosome, RNU4-2, were recently shown to cause ReNU syndrome, a prevalent neurodevelopmental disorder (NDD)1,2. ReNU syndrome is a complex multi-system disorder characterized by moderate to severe global developmental delay, intellectual disability, hypotonia, acquired microcephaly, speech and motor difficulties, low bone density and often seizures1,4.\r\n\r\nRNU4-2 encodes the U4 snRNA, which is a critical component of the major spliceosome.","In particular, U4 is tightly bound with the U6 snRNA in the U4\/U6.U5 tri-small-nuclear ribonucleoprotein and the U4\/U6 duplex needs to be unwound for activation of splicing3. Variants identified in individuals with ReNU syndrome cluster in an 18-nucleotide (nt) region in the centre of RNU4-2 that is depleted of variants in population datasets (the \u2018critical region\u2019, or CR)1. This region is known to accurately position U6 for recognition of the 5\u2032 splice site. Consistent with this, variants causing ReNU syndrome have been shown to alter 5\u2032 splice site usage1, with this disruption correlating with phenotype severity4.","Similarly, variants in two distinct structures within the 18-nt CR (the T-loop and Stem III) have been proposed to differ in clinical severity4.\r\n\r\nThe precise relationship between genetic variation in RNU4-2 and clinical impact remains incompletely characterized. The variants initially characterized in individuals with ReNU syndrome are all within the 18-nt CR; however, more recent work has proposed a role for variants outside this region, in the 5\u2032 stem loop5. It is unclear which, if any, variants outside the CR could also cause NDD. This is particularly important as the increased mutation rate of RNU4-2 and other snRNA genes means that there will be many chance occurrences of variants among sequenced individuals with syndromic NDD6.","Up to 75% of individuals with ReNU syndrome have the same single-nucleotide insertion (n.64_65insT). Whether the high recurrence of this particular variant is due to ascertainment bias, germline selection and\/or an increased mutation rate is at present unknown. Furthermore, it is unclear whether available variant effect predictors (for example, CADD7) can effectively distinguish between pathogenic and benign variants in RNU4-2.\r\n\r\nResolving these questions will be critical to ensure accurate, comprehensive diagnoses of individuals affected by ReNU syndrome.","One approach to clarifying variant impact is through the generation of functional data of variant effect, which can mechanistically inform why specific variants cause disease and improve clinical interpretation of rare variants8.","However, no experimental assay has yet been established to evaluate variants in RNU4-2, owing to its recent association with NDD.\r\n\r\nSaturation genome editing (SGE) is a powerful approach to delineate genotype\u2013phenotype relationships9.","Crucially, it does not rely on variants being observed in an individual with or without disease.","Instead, every possible variant across a gene or region can be engineered and the relative functional effects of each determined through a cellular readout. SGE experiments have been performed across numerous protein-coding genes, including BRCA110, CARD1111, DDX3X12, VHL13 and BAP114. In each case, the SGE assay has accurately differentiated between known pathogenic and benign variants.\r\n\r\nHere, we perform SGE of the human RNU4-2 noncoding RNA. We implemented an approach to combat the high sequence homology between RNU4-2 and its many homologues and pseudogenes, obtaining a variant effect map that effectively distinguishes variants known to cause ReNU syndrome from those in population controls.","We redefine the CR at single-nucleotide resolution, resolve pathogenicity assignments for variants of uncertain significance, and show that function scores for variants within the CR correlate closely with phenotypic severity. Furthermore, we identify functionally critical variants in other regions of RNU4-2 that underlie a recessive NDD marked by clinical features that are distinct from those of ReNU syndrome.\r\nSGE maps the effects of RNU4-2 variants\r\nPerforming SGE on regions of high sequence homology poses a challenge in that the protocol requires CRISPR\u2013Cas9 editing of a single locus, specific amplification of the edited locus from millions of cells and accurate variant calling from amplicon sequencing.","Alignment of RNU4-2 (RefSeq NR_003137.3) to RNU4-1 (RefSeq NR_003925.1) reveals mismatches at only 4 of the 145\u2009nt. The sequence upstream of RNU4-2, however, is both unique and poorly conserved across species, such that guide RNAs (gRNAs) predicted to be highly specific15 can be designed in conjunction with protospacer adjacent motif (PAM)-disrupting edits to block Cas9 recutting (Fig. 1a).\r\nFig. 1: SGE reveals the functional spectrum of RNU4-2 variants.\r\na, Schematic of SGE library design and CRISPR targeting strategy for RNU4-2.","Positions of library variants including all possible SNVs (navy; across the 145-nt transcript and 6-nt 3\u2032), control 1-nt insertions in loop regions (yellow), CR 1-nt insertions (red) and deletions (teal) and multi-nt insertions (light purple) are denoted on a schematic of RNU4-2 and RNU6 in complex (left) and by genomic location (right). A gRNA was designed to cleave upstream of RNU4-2 (scissors), avoiding highly repetitive sequence and allowing for a PAM-blocking variant to be installed in a region of low conservation (PhyloP 100 vertebrates basewise conservation track shown).","b, Schematic of SGE experiments in HAP1. Following editing, cells were collected on days 4 and 14. Sequencing was performed to quantify variant frequencies at each timepoint and function scores were calculated.","c, Function scores for 539 variants were correlated across biological replicates (Pearson\u2019s r\u2009=\u20090.86\u00a0for\u00a0replicates 1 and 2). The function score threshold delineating significantly depleted variants is indicated with the dashed line.","d, Function scores are plotted by genomic position in relation to RNU4-2 (RefSeq NR_003137.3). The line at n.145 marks the end of the transcript, with 18 more distal SNVs also scored. Points in c,d are coloured by variant type with a single legend included for these two panels. CRISPR\u2013Cas9 icon in b adapted from Bioicons (https:\/\/bioicons.com\/?query=CRISPR; CRISPR_Cas9 schematic), Marcel Tisch, under a Creative Commons licence CC0 1.0 Universal.\r\n\r\nLacking established models for assaying RNU4-2 variants, we chose to perform SGE in HAP1\u00a0cells, a haploid human line in which growth effects have accurately distinguished pathogenic variants across several protein-coding genes10,12,13,14,16,17.","To HAP1 cells lacking LIG4 (HAP1-LIG4-KO), we codelivered Cas9 with a gRNA directing DNA cleavage 31-nt upstream of RNU4-2 to install a library comprising 539 variants by homology-directed DNA repair (HDR). The library included all possible single base substitutions from the first transcribed nucleotide to 6\u2009nt beyond the most 3\u2032 position of the RNU4-2 transcript (GRCh38, chr12:120291753\u2013120291903), as well as all 1-nt deletions and insertions in the CR, including all but one variant known to cause NDD (omitting n.72_73del, which was reported after assay design; Fig. 1a).","Uncertain whether pathogenic variants would show phenotypes in the HAP1-based assay, we included 8 2-nt to 5-nt insertions at positions in the CR previously associated with disease, reasoning these may have strong effects. As negative controls, we included 12 1-nt insertions in stem loops outside the CR, which were not predicted to be deleterious (Supplementary Table 1).\r\n\r\nAdapting an optimized SGE protocol for HAP1 cells13 (Fig.","1b), we successfully scored all variants included in the library, observing an average of 52% editing by HDR at day 4. Editing was confirmed by sequencing to be specifically targeted to RNU4-2, and not RNU4-1. Function scores, reflecting variants\u2019 effects on growth (Methods), were highly correlated across three biological replicates (Pearson\u2019s r\u2009=\u20090.83\u20130.86; Fig. 1c and Extended Data Fig. 1). As expected, given their location in the U4\/U6 secondary structure, all 12 negative control variants scored near 0 (mean, \u22120.009, s.d.\u2009=\u20090.11). We defined a neutral distribution from these negative controls to identify 151 significantly depleted variants (q\u2009<\u20090.01, that is, function score less than \u22120.302).","The 8 multi-nucleotide insertions in the CR included as positive controls all were depleted, with function scores ranging from \u22120.73 to \u22121.82. Mapping variants\u2019 function scores to their linear transcript position reveals that depleted variants are clustered, rather than distributed evenly across the gene (Fig. 1d).\r\nSGE data resolve variant pathogenicity\r\nWe annotated all assayed variants within RNU4-2 with whether or not they had been observed in individuals with ReNU syndrome1, observed in population cohorts (UK Biobank18 or All of Us), or observed in neither (unobserved; Fig. 2a).","All 18 variants observed in ReNU syndrome were depleted in the assay (function score less than \u22120.302), whereas 81.0% (286 out of 353) of population variants scored as normal (function score \u22120.302 or more; Fig. 2b). Accordingly, function scores effectively discriminate between ReNU syndrome variants and those identified in the population (Fig. 2c; area under the receiver operating characteristic (ROC) curve (AUC) of 0.93). Most variants that are unobserved in population cohorts score normally (56.0%; 84 out of 150); however, many are as, or even more, depleted than ReNU syndrome variants. Specifically, the four variants with the lowest function scores are all unobserved (Supplementary Table 1).\r\nFig.","2: Function scores accurately discriminate variants underlying ReNU syndrome.\r\na, Function scores for 521 variants within the RNU4-2 transcript are plotted by position and coloured by their association with ReNU syndrome (red), presence in the UK Biobank\u00a0(UKB) or All of Us\u00a0(AoU) cohorts (blue) or absence from both cohorts (teal). Depleted variants within the 18-nt CR (vertical red dashed lines) are confined to two smaller regions (shaded grey) and include all ReNU syndrome variants scored (n\u2009=\u200918). These regions, n.62-70 and n.75-78, correspond closely to the T-loop and Stem III regions, respectively.","The black dashed line (function score \u22120.302) indicates significantly depleted variants and the grey dashed line (function score \u22120.90) separates \u2018moderate\u2019 from \u2018strong\u2019 depletion.","b, Stacked histogram and overlaid density plot of function scores by category comparing 18 ReNU syndrome variants with 353 variants in UK Biobank and\/or All of Us and 150 unobserved variants.","c, ROC curves show the performance of function scores and CADD scores for classifying 12 ReNU syndrome SNVs from 346 SNVs observed at least once in population controls.","d, Function scores for SNVs are plotted by combined UK Biobank and All of Us allele count. Higher allele counts were correlated with higher function scores (Spearman\u2019s \u03c1\u2009=\u20090.29, two-sided P\u2009=\u20092.8\u2009\u00d7\u200910\u221211). Among the 50 most frequently observed SNVs (combined allele count greater than 91; black dashed line), no SNVs were depleted. The grey dashed line separates absent variants (combined allele count of 0) from those observed at least once (combined allele count greater than 0).","e, Function scores for the 435 tested SNVs are plotted by CADD score. The dashed line at y\u2009=\u2009\u22120.302 indicates significantly depleted SNVs, whereas the red line at x\u2009=\u200919.25 and the blue line at x\u2009=\u200918.99 indicate median CADD scores for ReNU syndrome SNVs and SNVs present in population cohorts, respectively.\r\n\r\nWe observed a significant correlation between single-nucleotide variant (SNV) allele counts in population cohorts and function scores, with rarer SNVs tending to be more depleted by SGE (Spearman\u2019s \u03c1\u2009=\u20090.29, P\u2009=\u20092.8\u2009\u00d7\u200910\u221211; Fig. 2d). Among the 50 SNVs with the highest combined allele counts in the UK Biobank and All of Us cohorts, none were depleted in the assay.","Indeed, applying more stringent allele count thresholds to define control variants in population cohorts consistently improved the assay\u2019s classification performance (Extended Data Fig. 2). These findings indicate that depleted variants observed in population cohorts are unlikely to be the result of experimental noise and, instead, represent genuine variants affecting RNU4-2 function segregating in the general population.\r\n\r\nThe discriminatory power of our SGE assay was substantially greater than that of the genome-wide in silico tool CADD19 (Fig. 2c; AUC\u2009=\u20090.65). Given the high conservation of the entire RNU4-2 gene, most SNVs have very similar CADD scores (Fig. 2e).","Although CADD scores for ReNU syndrome SNVs are marginally higher on average than those for SNVs in population cohorts (ReNU median 19.2; UK Biobank and All of Us median 19.0; one-sided Wilcoxon P\u2009=\u20090.040), a CADD score threshold that would capture all ReNU syndrome SNVs (18.89 or greater) would also annotate 56.4% (195 out of 346) of SNVs observed in UK Biobank and All of Us, and 55.6% (183 out of 329) of SNVs with normal SGE function scores, as probably deleterious. By contrast, our SGE function score threshold of \u22120.302 captures all ReNU syndrome SNVs and only 19.1% (66 out of 346) of SNVs observed in population cohorts.","We also observe only a weak correlation of SGE function scores with changes to U4\/U6 RNA binding stability predicted by ViennaRNA (\u03c1\u2009=\u2009\u22120.27, P\u2009=\u20094.5\u2009\u00d7\u200910\u221210; Extended Data Fig. 3a). The observed effect is limited to specific regions, most notably Stem II (\u03c1\u2009=\u2009\u22120.79, P\u2009=\u20095.0\u2009\u00d7\u200910\u221210). By contrast, no significant correlation is observed in the T-loop or Stem III and, overall, \u0394\u0394G values from ViennaRNA do not classify ReNU syndrome variants as well as SGE (ROC-AUC 0.72 versus 0.93, respectively; Extended Data Fig. 3b).\r\n\r\nThe assay clearly delineates the 18-nt CR of RNU4-2 (Fig. 2a) within which variants cause ReNU syndrome; however, some variants in this region score normally.","Using these data, we redefine the CR to two smaller regions of 9\u2009nt (n.62-70, inclusive of insertions at n.61_62) and 4\u2009nt (n.75-78), corresponding to the T-loop and Stem III, respectively (Extended Data Fig. 4). Although the T-loop region matches that reported by ref.","2, the CR overlapping Stem III is 3-nt smaller than previously suggested. Within these two regions, 85.4% (76 out of 89) of tested variants (79.5% of SNVs), including all ReNU syndrome variants, have significant function scores, compared with 17.4% (75 out of 432) across the remainder of RNU4-2.\r\n\r\nWe next used our function scores to assign evidence strengths for clinical variant classification8. We deemed the 17 pathogenic or likely pathogenic variants reported in ref. 4 and assayed here to be associated with ReNU syndrome and 45 variants with combined allele counts across the UK Biobank and All of Us above 100 to be neutral.","A Gaussian mixture model was then applied to determine the odds of pathogenicity (OddsPath) for each variant (Methods, Extended Data Fig. 5 and Supplementary Table 1). Within the CR, 69 of 127 (54.3%) variants receive PS3 strong evidence of pathogenicity, including 16 of 18 variants reported to be pathogenic, with the other two variants receiving PS3 moderate or indeterminate evidence. A further 38 (29.9%) variants receive BS3 strong evidence of benignity.","As no variants outside the CR have been associated with ReNU syndrome, we refrain from assigning evidence strengths to variants outside the CR.\r\n\r\nRecent work by one research group4 classified three variants outside the CR and one deletion within the CR as variants of uncertain significance. Three of these variants were included in our assay (n.76del, n.92C>G and n.111C>T) and all three had normal function scores (0.12, 0.04 and 0.05, respectively).","Notably, all three variants are also observed in population controls. Furthermore, a recent paper proposed a link between two 5\u2032 stem loop variants, each identified in a single individual and inherited from an unaffected mother, and ReNU syndrome5. One of these variants is included in our assay (n.30A>T), and its score of \u22120.305 just crosses the threshold to be classified as depleted; however, other depleted variants in the same region are observed in population controls.","Finally, of two variants recently associated with retinitis pigmentosa20, the one that is included in our assay (n.56T>C) has a normal function score (\u22120.23).\r\nSGE depletion predicts disease severity\r\nA previous study proposed a difference in phenotypic severity between ReNU syndrome variants mapping to the T-loop and Stem III structures of the U4\/U6 duplex4. This difference is seen in our data, with Stem III variants having on average, higher function scores (T-loop mean \u22121.13; Stem III mean \u22120.75; one-sided Wilcoxon P\u2009=\u20090.012).","However, we also observe considerable variation in function scores for ReNU variants within each of the two regions. For example, two SNVs within the T-loop, n.63T>C and n.65A>G, have function scores above the mean observed for Stem III variants (\u22120.51 and \u22120.32, respectively). To investigate this, we repeated the phenotype clustering analysis of 143 individuals with ReNU syndrome from ref. 4. We classified the variants into two categories corresponding to \u2018moderate\u2019 (\u22120.9\u2009<\u2009function score\u2009<\u2009\u22120.302) and \u2018strong\u2019 (function score less than \u22120.9) levels of depletion in the assay (Fig. 3a and Extended Data Fig. 4).","All of the individuals with moderate category variants cluster together, including the four individuals with the n.63T>C (n\u2009=\u20091) and n.65A>G (n\u2009=\u20093) T-loop variants (Fig. 3b). These results remained consistent when excluding n.64_65insT from the analysis (that is, the result is not driven by the recurrent insertion variant alone) and when using a uniform manifold approximation and projection (UMAP) representation (Extended Data Fig. 6).\r\nFig.","3: Function scores predict ReNU syndrome severity and degree of splicing disruption.\r\na, Schematic showing how ReNU variants are split into two categories based on their SGE function score: strong depletion (function score less than \u22120.9; red) and moderate depletion (\u22120.9\u2009<\u2009function score\u2009<\u2009\u22120.302; yellow).","b, The first two principal components from clustering of 143 ReNU syndrome cases by phenotype using the approach from ref. 4. Individuals are coloured by their variant SGE function score class. Unlabelled triangles indicate occurrences of n.64_65insT.","c, The proportion of affected individuals with each phenotype is plotted, with cases grouped by SGE function score class. The number of individuals (n) in each comparison group is shown for each phenotype. Error bars indicate 95% confidence intervals centred on each proportion (capped at 0 and 1.0).","Full data, including statistics for comparisons between groups, are included in Extended Data Table 1.","d, Principal component analysis based on PSI values from significant 5\u2032 splice site events detected from RNA sequencing data using rMATS, comparing 19 patients with ReNU with 20 control participants (purple), as performed in ref. 4. Individuals with ReNU are coloured by their variant SGE function score class.","GDD, global developmental delay; ID, intellectual disability.\r\n\r\nTo further determine whether SGE function scores were able to discriminate between more severe and milder ReNU syndrome variants, we compared four specific phenotypes.","Individuals with variants in the strong depletion group were significantly more likely to have severe developmental delay (73.3% versus 5.9%; odds ratio\u2009=\u200942.7; 95% confidence interval (CI) 6.1\u20131,841.8; two-sided Fisher\u2019s P\u2009=\u20091.1\u2009\u00d7\u200910\u22127), severe intellectual disability (76.6% versus 5.9%; odds ratio\u2009=\u200950.4; 95%CI 7.1\u20132,197.0; two-sided Fisher\u2019s P\u2009=\u20093.6\u2009\u00d7\u200910\u22128) and absent speech or to speak only a few words (92.8% versus 5.6%; odds ratio\u2009=\u2009195.5; 95%CI 24.7\u20138,591.7; two-sided Fisher\u2019s P\u2009=\u20096.6\u2009\u00d7\u200910\u221214) than individuals with moderate depletion variants. There was no difference in the occurrence of seizures between variant groups (Fig.","3c and Extended Data Table 1).\r\n\r\nTo test whether the strength of SGE depletion also correlates with the extent of splicing disruption observed in individuals with ReNU syndrome, we repeated a second analysis from ref. 4. We regenerated a principal component analysis of percentage spliced-in (PSI) values for 5\u2032 splice sites that differed significantly in usage between ReNU cases and control participants. Individuals with strong and moderate SGE function scores clustered separately, with the strong variant individuals being more distant from control participants (Fig. 3d).\r\nA recessive NDD linked to RNU4-2 variants\r\nSeventy-five variants outside the ReNU CR are depleted in the SGE assay (Supplementary Table 1).","Unlike the depleted variants in the ReNU CR, most of these other depleted variants (84.0%; 63 out of 75) are observed in population control cohorts, albeit at low frequencies (Fig. 2a). To investigate whether these variants are associated with NDD-related traits, we compared individuals heterozygous for such variants (n\u2009=\u2009592) and individuals with non-depleted SNVs (n\u2009=\u200912,374) in RNU4-2 with individuals without any variants in RNU4-2, using the UK Biobank.","We did not find any significant differences in fluid intelligence scores, childhood developmental disorder diagnoses or age of leaving education (Extended Data Table 2).\r\n\r\nBecause our SGE assay was performed in a haploid cell line, we reasoned that depleted variants outside the CR may instead be associated with recessive phenotypes. We searched global rare disease cohorts and identified 20 individuals, with biallelic depleted variants: 10 (including 3 pairs of siblings) with homozygous variants and 10 (including 4 pairs of siblings) who were each concordant for compound heterozygous depleted variants (Extended Data Table 3). None of these variants were located in the ReNU CR, yet all 20 individuals had NDD phenotypes.","None of the individuals had an existing genetic diagnosis that fully explained their observed phenotypes (Methods). Across the rare disease cohorts, no individuals with phenotypes unrelated to NDD had biallelic depleted variants. Only a single individual across the UK Biobank and All of Us cohorts is homozygous for a SGE-depleted variant (n.31T>G, function score \u22120.730).","This individual has only primary level education (highest grade, one to four) and reports difficulties with \u2018dressing or bathing\u2019, \u2018doing errands alone\u2019 and \u2018concentrating, remembering or making decisions\u2019, consistent with a possible intellectual disability.\r\n\r\nThe clinical phenotypes of the 20 identified NDD individuals are characterized as part of a broader cohort (total n\u2009=\u200938) in a companion paper21. The 18 extra individuals reported in this broader cohort all have biallelic RNU4-2 variants, but at least 1 variant had a non-significant function score or was not scored with SGE.","In brief, we define a new NDD characterized by global developmental delay, intellectual disability, delayed or absent speech, hypotonia, spasticity, microcephaly, ophthalmological and visual impairments and seizures, with variable involvement of genitalia, skin, hair and limb anomalies.","On MRI, individuals show distinctive white matter abnormalities and cerebellar atrophy that are not seen in ReNU syndrome21.\r\n\r\nDepleted variants outside the ReNU CR broadly map to four regions of U4\/U6 secondary structure that are known to mediate interactions between U4 and other components of the spliceosome: (1) the central portion of the Stem II interaction with U6 from nucleotides 6 to 11 (ref.","3); (2) a \u2018k-turn\u2019 structure required for protein binding22,23 comprising nucleotides 27 to 33 and nucleotides 42 to 46; (3) a region from nucleotides 118 to 126 that interacts with a ring of Sm proteins that are important for U4 biogenesis and stability24,25 and (4) a portion of the terminal stem loop formed by base-pairing of nucleotides 129 to 131 with nucleotides 140 to 142 (Fig. 4). All variants identified in the 20 recessive NDD cases map to these four regions. Variants in structurally equivalent regions of RNU4ATAC, which encodes the minor spliceosome equivalent of U4, U4atac, cause rare recessive RNU4atac-opathies26,27,28.","Of the 13 unique RNU4-2 variants identified in the recessive NDD cases, 5 have exact equivalents in RNU4ATAC that are (likely) pathogenic in ClinVar (n.32G>A, n.45G>C, n.46G>A, n.119A>G and n.122T>G; Supplementary Table 2). They include n.119A>G (function score \u22120.686; RNU4ATAC equivalent n.117A>G; ClinVar variation ID 1525441), which was homozygous in two individuals and compound heterozygous in three individuals, including two brothers.\r\nFig. 4: SGE-depleted variants outside the CR cause a recessive NDD.\r\nThe lowest SGE function score class among SNVs at each position is indicated on the U4\/U6 secondary structure. Outside the CR, low SGE scores occur at positions of spliceosomal protein binding, indicated by teal shaded regions.","Grey triangles correspond to homologous positions of RNU4ATAC at which (likely) pathogenic variants have been linked to recessive disease (from ClinVar; Supplementary Table 2). RNU4-2 variants with low function scores observed in recessive NDD cases are indicated, with filled purple circles indicating variants observed as homozygous and half-filled circles indicating variants observed in the compound heterozygous state. An orange dot in the centre of a circle indicates that the variant is observed in two affected siblings. Six (likely) pathogenic RNU4ATAC variants could not be confidently assigned to an equivalent nucleotide in RNU4-2. Three of these (n.8C>A, n.13C>T and n.16G>A) are shown together as mapping to Stem II.","The other three (n.29T>G, n.30G>A and n.111G>A) are not shown.\r\n\r\nIn an attempt to distinguish recessive and dominant variants experimentally, we performed SGE of RNU4-2 once more, this time using a diploid population of HAP1 cells selected through fluorescence-activated cell sorting (Methods). This experiment revealed function scores to be attenuated across the gene due to the presence of the second allele (Extended Data Fig.","7a,b and Supplementary Table 1).","However, all variants assayed in the Stem III region scored neutrally in diploid HAP1, suggesting pathogenic Stem III variants probably affect cell fitness in a manner that is distinct from pathogenic variants elsewhere. For all other regions, function scores between haploid and diploid models were highly correlated (Extended Data Fig.","7c), indicating fitness effects in diploid HAP1 cells do not delineate dominant and recessive variants in vivo.\r\nDiscussion\r\nRNU4-2 was the first noncoding RNA to be identified as having a substantial contribution to the prevalence of NDD, with ReNU syndrome predicted to affect around 100,000 individuals worldwide1,2. Here we developed an SGE assay to systematically assess the function of variants across RNU4-2 and map genotype\u2013phenotype relationships. We show that function scores accurately identify variants underlying ReNU syndrome and can distinguish these variants by disease severity. Furthermore, we define the CR at the centre of RNU4-2 within which variants cause dominant ReNU syndrome, at nucleotide resolution.","In two regions, of 9\u2009nt and 4\u2009nt, 85.4% of all tested variants are depleted.","However, some variants in these regions, particularly in Stem III, have normal function scores and are therefore unlikely to be pathogenic. As a consequence, these data have immediate use in clinical interpretation of newly observed variants in individuals with NDD.","Indeed, calibration of the SGE function scores for use within the ACMG\/AMP framework in the context of ReNU syndrome showed that these data can be used to give strong evidence towards either a pathogenic or benign classification.\r\n\r\nWe identified four regions of the U4\/U6 duplex structure, outside the ReNU CR where variants are also depleted. This led us to uncover a new recessive NDD caused by homozygous and compound heterozygous variants in these regions that were depleted in SGE. This NDD is described comprehensively in ref.","21, in which we also expand the cohort to include 38 individuals with biallelic RNU4-2 variants: the 20 individuals presented here with significant function scores for both variants, and 18 extra individuals harbouring variants in the same functional regions with at least one variant that was not significantly depleted or not assayed by SGE. Through comprehensive clinical phenotyping and analysis of RNA sequencing data, we show that the recessive NDD is phenotypically and mechanistically distinct from ReNU syndrome.","For example, MRI findings in individuals with ReNU syndrome most commonly include enlarged ventricles and corpus callosum abnormalities4, whereas individuals with biallelic RNU4-2 variants commonly have progressive white matter changes and cerebellar atrophy. Although we cannot yet determine the prevalence of the recessive NDD, SGE-depleted variants outside the ReNU CR are found in 0.12% and 0.094% of individuals in the UK Biobank and All of Us cohorts, respectively.","Hence, the recessive NDD is rarer than ReNU syndrome, but the prevalence is likely increased in populations with higher rates of consanguinity21.\r\n\r\nDistinct mechanisms underlie dominant and recessive RNU4-2-associated NDDs. We previously showed that individuals with ReNU syndrome have an increase in use of alternative non-canonical 5\u2032 splice sites1, consistent with the role of the T-loop and Stem III regions in accurately positioning the U6 ACAGAGA sequence to receive the 5\u2032 splice site. Recessive RNU4-2 variants map to different locations within U4, outside the T-loop and Stem III. They are found in key regions of binding between U4 and other important spliceosome factors.","The same regions have previously been shown to be important in U4 mutational analyses in yeast25 and variants in the 5\u2032 stem loop k-turn that we identify as depleted occur at nucleotides that are essential for SNU13\/15.5k protein binding in vitro23. In our companion paper21, we show through analysis of blood RNA sequencing data that individuals with biallelic RNU4-2 variants do not have the ReNU signature of disrupted 5\u2032 splice site selection. Furthermore, biallelic individuals have notably decreased RNU4-2 expression, which is not observed in individuals with ReNU syndrome, supporting a distinct loss-of-function molecular mechanism.","As variants in the equivalent regions and nucleotides of RNU4ATAC that cause recessive RNU4atac-opathies have been shown to lead to intron retention29,30, a similar mechanism may underlie recessive RNU4-2 NDD.","However, this was not readily evident in RNA sequencing analysis in blood21.\r\n\r\nRNU4-2 is a striking example of genetic pleiotropy, with variants in different regions of the RNA, which is only 145\u2009nt in length, causing both two distinct NDDs and retinitis pigmentosa. This adds complexity to variant interpretation and makes it particularly important to calibrate functional evidence with consideration of underlying mechanisms.","Although we showed that function scores for variants within the ReNU CR can provide strong evidence for clinical interpretation, we were unable to calibrate our assay for variants outside the ReNU CR due to a lack of independently defined pathogenic variants in these regions8, as all individuals with recessive NDD were identified on the basis of function score. Whereas we anticipate that our SGE data will prove highly useful for delineating variant pathogenicity for recessive disease, until orthogonal calibration can be performed, we recommend PS3 supporting evidence be assigned to significantly depleted variants outside the CR.","It is important to note that we set a relatively conservative threshold to define significantly depleted variants (q\u2009<\u20090.01) using synthetic controls in the absence of bona fide benign variants. Although all variants associated with ReNU syndrome scored below this threshold, we cannot exclude the possibility that variants with more subtle effects may be clinically relevant, particularly in relation to recessive disease. We cannot fully exclude the possibility that variants that score just below the \u22120.302 function score threshold are benign and represent false positives.","The calibration of function scores to evidence strength for ReNU variant classification reflects this, as variants were not assigned PS3 strong evidence in favour of pathogenicity unless their function scores were below \u22120.45.\r\n\r\nThus far, there are no strong data linking variants outside the CR to dominantly inherited NDD. This is supported by our analysis of heterozygous SGE-depleted variants outside the CR in the UK Biobank, in which we do not find any associations with intellectual disability related phenotypes. Accordingly, SGE data should not be used as evidence for the pathogenicity of variants for dominantly inherited ReNU syndrome beyond the CR.","We note that the 5\u2032 stem loop variants n.30A>T (function score \u22120.305) and n.43_44insT have been putatively associated with NDD5, with a link initially proposed with dominant ReNU syndrome.","However, these variants are within the \u2018k-turn\u2019 region linked to recessive disease in this study, and both are inherited from unaffected parents. Furthermore, n.43_44insT is identified in an individual with NDD in our companion paper, as compound heterozygous with a variant in Stem II21. Collectively, these data indicate that 5\u2032 stem loop variants are more likely to lead to recessive NDD than dominant ReNU syndrome.\r\n\r\nOur HAP1-based SGE assay has several limitations. Most notably, the growth-based readout does not inform directly on underlying mechanisms of splice alteration (for example, altered 5\u2032 splice site usage, intron retention).","This means that in the haploid context, both dominant and recessive effects are observed, which cannot be separated by function score alone. We also performed SGE in diploid HAP1 cells. Whereas function scores from these experiments revealed differences between T-loop and Stem III variants, they were once more unable to distinguish dominant and recessive variants in vivo. It is likely that specific changes in splicing underlying certain clinical phenotypes may not occur in HAP1 due to differences between cell types. It is notable, for instance, that a variant recently associated with retinitis pigmentosa (n.56T>C) did not score significantly.","Furthermore, most individuals with ReNU syndrome (70\u201375%) have the same single base insertion, n.64_65insT. Our data indicate that this variant is not unique in its functional severity, with many variants scoring similarly or having even lower function scores. This result could argue against high recurrence being the result of a particularly damaging functional effect driving ascertainment, suggesting that positive selection in the female germline or an increased local mutation rate might be more likely explanations.","However, we cannot rule out the possibility that this variant leads to unique changes in splicing not reflected in SGE function scores.\r\n\r\nFuture experiments using more cell types will be valuable for delineating mechanisms of RNU4-2 pleiotropy.","Likewise, testing larger insertions and deletions both inside and outside the ReNU CR will add insights into the degree of tolerated disruption across different regions of RNU4-2. For example, in ref.","4, the authors identified a 2-nt deletion (n.72_73del) in 2 individuals. This variant falls between Stem III and the T-loop but suggests that larger insertions and deletions in this region may also be disruptive to these structures. As we have observed for CR variants associated with ReNU syndrome, the degree of functional impact caused by recessive NDD variants may correlate with disease severity. There may also be phenotypic differences between individuals with variants mapping to the four distinct regions we identified.","Thorough phenotyping of large cohorts of cases will be necessary to establish how the degree of functional effect influences phenotype.\r\n\r\nIn summary, this work illustrates the power of a variant effect map for a locus recently implicated in disease to discover new genotype\u2013phenotype associations and understand mechanisms underlying disease. SGE data for RNU4-2 will be critical for accurately diagnosing patients with at present unexplained NDD and provide insights that are valuable for efforts to design effective therapies.","Finally, the SGE strategy we used to overcome the high sequence homology of RNU4-2 can be replicated to dissect other snRNAs recently linked to disease31,32.\r\nMethodsSingle guide RNA design and cloning\r\nThe gRNA used for SGE was designed using Benchling\u2019s CRISPR design tool to search the RNU4-2 locus, including upstream and downstream regions of low sequence homology to RNU4-1 and pseudogenes, identifying a candidate with high on-target and low off-target scores. The selected gRNA was not predicted to target RNU4-1, owing to eight mismatches occurring in the protospacer and PAM. The gRNA spacer sequence was ligated into the pX459 backbone as previously described33.","In brief, complementary primers containing the spacer were ordered from IDT (Supplementary Table 3), phosphorylated, hybridized and ligated into the pX459 linearized backbone followed by PlasmidSafe DNase (Lucigen) digestion.","Next, 2\u2009\u00b5l of the ligation reaction were transformed in NEB Stable Competent Escherichia coli cells using the high-efficiency transformation protocol and 75\u2009\u00b5l of transformant was plated on ampicillin-resistant plates and cultured overnight at 30\u2009\u00b0C. Three colonies were then picked and grown overnight at 37\u2009\u00b0C in 7\u2009ml of Luria\u2013Bertani medium supplemented with carbenicillin (100\u2009\u00b5g\u2009ml\u22121). Plasmid DNA was extracted using the QIAprep Spin Miniprep kit (Qiagen) and verified using Plasmidsaurus whole-plasmid sequencing. The selected clone was then grown in 100\u2009ml of Luria\u2013Bertani medium at 37\u2009\u00b0C in a shaking incubator supplemented with carbenicillin.","The cells were then pelleted and the plasmid was extracted using a ZymoPure Maxiprep kit (Zymo Research), endotoxins were removed using EndoZero columns (Zymo Research) and the product was quantified with the Qubit double-stranded DNA (dsDNA) BR assay kit (Invitrogen).\r\nHDR library cloning\r\nAn oligonucleotide library comprising RNU4-2 variants was manufactured by Twist Bioscience and subsequently cloned into a vector containing homology arms for RNU4-2 to make the HDR library for SGE.\r\n\r\nTo generate the vector with homology arms, a nested PCR was performed on genomic DNA (gDNA) extracted from HAP1 cells10 using primers designed to generate homology arms of 700\u2013800\u2009base pairs (bp) flanking RNU4-2 (Supplementary Table 3).","The PCR was performed using the Kapa HiFi HotStart ReadyMix (Roche). The product was purified using AmpureXP (Beckman Coulter) magnetic beads at 1.2\u00d7 volume and eluted in 12\u2009\u00b5l of nuclease-free water. The amplicon containing RNU4-2 homology arms was then inserted in the linearized pUC19 backbone using In-Fusion HD cloning (Takara) and 2\u2009\u00b5l of cloning reaction was transformed into NEB Stable cells following the manufacturer\u2019s 5-min transformation protocol. Cells were plated on agar plates containing ampicillin and incubated at 30\u2009\u00b0C overnight. The pUC19 plasmid containing RNU4-2 homology arms (pUC19-RNU4-2-HA) was purified and sequence-verified from a successfully transformed clone.","pUC19-RNU4-2-HA was then diluted to 8.7\u2009pg in a 50-\u00b5l PCR reaction and amplified with Kapa HiFi to obtain a linearized product with 17\u201318\u2009bp complementarity to the RNU4-2 oligo library. A PAM-blocking mutation was introduced 27\u2009nt upstream of the RNU4-2 sequence (chromosome 12:120291930-C-G) by means of primer overhang extension during PCR. The location of the PAM-disrupting edit was selected to minimize recutting by Cas9, converting a 5\u2032-GGG PAM sequence to 5\u2032-GCG. The PAM-disrupting edit had a CADD score of 4.20 (Phred) and a 100 vertebrates PhyloP score of 0.11. The reaction was treated with 1\u2009\u00b5l of DpnI (NEB) for 30\u2009min at 37\u2009\u00b0C, gel extracted and quantified.","Then, the RNU4-2 oligo library was amplified using Kapa HiFi and purified using AmpureXP (1.2\u00d7). The amplified library and linearized pUC19-RNU4-2-HA plasmid were then assembled using the In-Fusion HD cloning kit, and the product was transformed into NEB Stable cells using the high-efficiency transformation protocol. To quantify efficiency, 1% of cells in the transformation reaction were plated and the remainder were cultured in 100\u2009ml of Luria\u2013Bertani medium with carbenicillin overnight at 37\u2009\u00b0C. Cells were then pelleted by centrifugation and the final RNU4-2 HDR library was extracted using the ZymoPure Maxiprep kit (Zymo Research) with endotoxin removal.","The isolated HDR library was quantified with a Qubit dsDNA BR assay kit and sequence-verified by Plasmidsaurus.\r\nHAP1 cell culture\r\nHAP1 cells used for SGE (the\u00a0HAP1-LIG4-KO line;\u00a0herein referred to as \u2018HAP1\u2019)\u00a0show increased rates of editing by HDR due to a frameshifting mutation in LIG4 (ref. 10). Frozen HAP1 cells were thawed at 37\u2009\u00b0C in a water bath, then supplemented with 10\u2009ml of prewarmed Iscove\u2019s Modified Dulbecco\u2019s Medium (IMDM) containing l-glutamine, 25\u2009nM HEPES (Gibco), 10% FBS (Gibco), 1% penicillin\u2013streptomycin (Gibco) and 2.5\u2009\u03bcM 10-deacetyl-baccatin-III (DAB, Stratech), herein referenced to as IMDMc. Cells were centrifuged at 300g for 3\u2009min.","The supernatant was then aspirated and the cells were resuspended in fresh media, plated on a 10-cm dish and cultured at 37\u2009\u00b0C with 5% CO2. The next day, the IMDMc media was replaced, and cells were cultured routinely from that point forward.\r\n\r\nThe HAP1 subculture routine included a 1:5 split every 48\u2009h or 1:10 split every 72\u2009h to prevent cells from exceeding 80% confluency. To split cells, the media was aspirated and the dish washed with 10\u2009ml of room-temperature Dulbecco\u2019s PBS (Gibco). Following Dulbecco\u2019s PBS aspiration, the cells were treated with 1\u2009ml of 0.25% trypsin\u2013EDTA (Gibco) and incubated for 3\u2009min at 37\u2009\u00b0C.","Next 14\u2009ml of prewarmed IMDMc was then added and cells were collected and centrifuged at 300g for 5\u2009min. Cells were then resuspended in 10\u2009ml of IMDMc, counted and seeded on a 10-cm dish.\r\nGeneration of diploid HAP1 cells\r\nParental HAP1 cells were cultured for 9\u2009days after thawing in IMDMc without DAB supplementation to allow for the spontaneous occurrence of diploid cells.","On day 10, cells were stained with 5\u2009\u00b5g\u2009ml\u22121 Hoechst working solution (Thermo Fisher Scientific) for 1\u2009h at 37\u2009\u00b0C, followed by fluorescence-activated cell sorting to select diploid cells using a BD FACSAria Fusion Flow Cytometer. Diploid cells were sorted on the basis of their G2\/M peak (4n), with gates established using a monoclonal diploid HAP1 control population. Sorted diploid HAP1 cells were then expanded for 10\u2009days in IMDMc without DAB supplementation before the subsequent SGE experiment.\r\nTransfection and selection\r\nThe day before transfection, 12\u2009million cells were seeded on a 10-cm dish for each replicate and 2\u2009million cells were seeded on a six-well plate for the negative control sample.","On the day of transfection (day 0), a transfection mix containing 10\u2009\u00b5g of HDR library, 30\u2009\u00b5g of the pX459 gRNA plasmid and 24\u2009\u00b5l of Xfect polymer (Takara) in a final volume of 800\u2009\u00b5l was prepared according to the manufacturer\u2019s instructions for each replicate. For the negative control sample, a pX459 plasmid with a gRNA targeting HPRT1 (ref. 13) instead of RNU4-2 was used to prevent successful editing, and the transfection volume mix was scaled down eightfold. Following transfection, cells were incubated for 24\u2009h at 37\u2009\u00b0C and supplemented with prewarmed IMDMc with 1\u2009\u00b5g\u2009ml\u22121 puromycin (Cayman Chemical).","On day 4, half of the cells for each replicate were collected for gDNA extraction and stored as a pellet at \u221270\u2009\u00b0C; the rest were kept in culture in 15-cm dishes supplemented with 15\u2009ml of IMDMc. The negative control sample was collected when reaching 70% confluency at day 6. A second sample of 10\u2009million cells per replicate was collected at day 14 and stored at \u221270\u2009\u00b0C.\r\nSequencing library preparation\r\ngDNA was extracted from cells using QIAshredder (Qiagen) columns followed by the Allprep DNA\/RNA kit (Qiagen) according to the manufacturer\u2019s instructions. Concentrations were determined using the Qubit dsDNA BR assay kit.","The RNU4-2 locus was subsequently amplified using nested PCR to avoid amplification of plasmid DNA, followed by an indexing PCR, in total using three primer sets (Supplementary Table 3). For the first reaction, the total gDNA template from each condition was partitioned into separate reactions, each containing 1.25\u2009\u00b5g of DNA in a 100\u2009\u00b5l reaction volume, using NEBNext Ultra II Q5 master mix (NEB) supplemented with MgCl2 (Ambion) to a final concentration 4\u2009mM. The amplification reaction was monitored by quantitative PCR (qPCR) using SYBR green (Invitrogen) and stopped before completion.","The reactions for each sample were pooled and mixed before 50\u2009\u00b5l of each product was purified using AmpureXP (1.2\u00d7) and eluted in 15\u2009\u00b5l of nuclease-free water. Then 1\u2009\u00b5l of purified product was loaded into the second qPCR reaction (50\u2009\u00b5l final volume) and amplified using NEBNext Ultra II Q5. The reaction was again monitored using SYBR green and stopped before completion. The AmpureXP purification was then repeated, and a final qPCR (NEBNext Ultra II Q5) to incorporate sample indexes and sequencing adapters was performed using 1\u2009\u00b5l of purified product as template in a 50\u2009\u00b5l reaction for 8 cycles. Final products were purified and quantified with the Qubit dsDNA HS kit.","The samples were then pooled for sequencing, aiming for 5\u2009million reads per experimental replicate timepoint, 2\u2009million reads for the negative control sample and 1\u2009million reads for the HDR library. The pool was purified using AmpureXP (1\u00d7), quantified and loaded on a Novaseq X sequencer (Illumina).\r\nVariant frequency quantification\r\nThe fastq files were de-multiplexed using the bcl2fastq script and the variants were quantified as previously described13.","In brief, paired-end reads were adapter trimmed and merged, and reads containing N bases were discarded. HDR editing rates were computed from fastq files directly as the fraction of reads containing the exact PAM-blocking mutation. Fastq files were then aligned to a reference RNU4-2 sequence and the frequency of each variant included in the library was determined.\r\nFunction score calculation\r\nAll variants were observed in the library and day 4 at a frequency higher than 10\u22124, and were therefore included in downstream analyses.","Function scores for library variants were first calculated per replicate, computed as the log2 ratio of day 14 to day 4 variant frequencies, normalized by subtracting the median function score of negative control insertions from all scores. Final function scores were then calculated for each variant by averaging function scores across replicates, again normalizing to the median of negative control insertions such that the median final function score of control insertions equals 0. For each variant, P values were determined using the norm.cdf function in Python, defining a normal distribution from the mean and standard deviation of function scores for negative control insertions.","The P values were corrected for multiple hypothesis testing using the multipletests function in Python (Benjamini\u2013Hochberg procedure) to derive q values. Significantly depleted variants were defined as those with q\u2009<\u20090.01, corresponding to a function score below \u22120.302. We further classified depleted variants into two categories using an arbitrary function score threshold of \u22120.9 to include sufficient variants and individuals per category to assess for phenotypic differences.\r\nVariant scoring with CADD and ViennaRNA\r\nVariants were annotated as ReNU syndrome variants if they were reported in ref. 1 or classified as pathogenic or likely pathogenic in ref. 4.","Variants were annotated with whether or not they were observed in the 490,640 genome sequenced individuals from the UK Biobank18 (DRAGEN pipeline) or in 414,840 individuals from All of Us V8. CADD v.1.7 (ref. 19) annotations were obtained by uploading a synthetic VCF to the online annotation tool (https:\/\/cadd.gs.washington.edu\/score). As we preselected which insertions and deletions to include in the SGE assay (because of assay size limitations), we restricted analyses involving CADD to SNVs within the RNU4-2 transcript.\r\n\r\nFor variants assayed within the RNU4-2 transcript, predicted changes in U4\/U6 interaction stability (\u0394\u0394Gbind) were computed using the ViennaRNA package34 (v.2.7.0).","Minimum free energies (MFEs) were obtained by use of RNA.fold_compound() at 37\u2009\u00b0C using default Turner RNA thermodynamic parameters. U4\/U6 pairing was modelled with the ViennaRNA cofold grammar by providing sequences in the dimer format (u4(AGCUUUGCGCAGUGGCAGUAUCGUAGCCAAUGAGGUUUAUCCGAGGCGCGAUUAUUGCUAAUUGAAAACUUUUCCCAAUACCCCGCCAUGACGACUUGAAAUAUAGUCGGCAUUGGCAAUUUUUGACAGUCUCUACGGAGACUGA).\r\n\r\n+ \u2018&\u2019 + u6(GUGCUCGCUUCGGCAGCACAUAUACUAAAAUUGGAACGAUACAGAGAAGAUUAGCAUGGCCCCUGCGCAAGGAUGACACGCAAAUUCGUGAAGCGUUCCAUAUUUU), and the intermolecular MFE was extracted using mfe_dimer().","Single-strand MFEs for U4 and U6 were computed independently using mfe().\r\n\r\nBinding free energy was defined as:\r\n\r\n$$\\Delta {G}_{{\\rm{bind}}}=\\Delta {G}_{{\\rm{complex}}}-(\\Delta {G}_{{\\rm{U}}4}+\\Delta {G}_{{\\rm{U}}6})$$\r\n\r\nThe same procedure was applied to RNU4-2 variant sequences, and differential stability was then calculated as:\r\n\r\n$$\\Delta \\Delta {G}_{{\\rm{b}}{\\rm{i}}{\\rm{n}}{\\rm{d}}}=\\Delta {G}_{{\\rm{b}}{\\rm{i}}{\\rm{n}}{\\rm{d}}.{\\rm{v}}{\\rm{a}}{\\rm{r}}{\\rm{i}}{\\rm{a}}{\\rm{n}}{\\rm{t}}}-\\Delta {G}_{{\\rm{b}}{\\rm{i}}{\\rm{n}}{\\rm{d}}.{\\rm{r}}{\\rm{e}}{\\rm{f}}{\\rm{e}}{\\rm{r}}{\\rm{e}}{\\rm{n}}{\\rm{c}}{\\rm{e}}}$$\r\n\r\nPositive \u0394\u0394Gbind values indicate predicted destablization of U4\/U6 pairing.\r\n\r\nVariants were mapped to the following structural regions of RNU4-2: Stem II (n.3 to n.16), k-turn within the 5\u2032 Stem loop (n.27 to n.35 and n.41 to n.46), Stem I (n.56 to n.62), T-loop (n.63 to n.70), Stem III (n.75 to n.79), 3\u2032 Stem loop (n.85 to n.117), Sm protein (n.118 to n.126) and terminal Stem loop (n.127 to n.144).\r\n\r\nROC area under the curve (AUC) values were calculated by assigning a 1 label to ReNU syndrome SNVs and a 0 label for SNVs observed in UK Biobank or All of Us.","The labels and corresponding function scores were used to compute false positive and true positive rates (using Python\u2019s roc_curve function), as well as ROC-AUC values (using the roc_auc_score function). This analysis was also restricted to SNVs only.\r\nAssigning evidence codes to variants based on function score\r\nWe followed established guidelines8 to calibrate function scores from SGE experiments in haploid cells to evidence strengths for classification of ReNU syndrome variants.","To do so, we defined a gold standard set of pathogenic, dominantly inherited variants as the 17 previously reported4 as \u2018pathogenic\u2019 or \u2018likely pathogenic\u2019 for which we derived function scores. Few RNU4-2 variants have been deemed benign in ClinVar, so we instead used reported allele counts in the UK Biobank and All of Us studies to define a neutral set of variants. This included all 45 assayed variants with a combined allele count of more than 100 between the two studies. A two-component Gaussian mixture model was then fit from the function score distributions of these variant sets, using the \u2018Mclust\u2019 package in R.","This model was then used to determine the probability of pathogenicity for each variant in the CR based on function score. The resulting posterior probabilities were then converted to OddsPath values using a uniform prior of 0.5, and evidence codes were assigned according to established OddsPath thresholds8 with the exception that PS3 evidence was capped at strong (+4 points), in line with the limited number of gold standard variants available for calibration.","We did not apply the model to variants outside the CR on account of there being no known pathogenic variants for ReNU syndrome in these regions.\r\nPhenotype severity and clustering\r\nCategorical data for 44 clinical features from 143 patients with pathogenic and likely pathogenic RNU4-2 variants4 were transformed into a 0\u20131 scale, with 0 indicating a more favourable phenotype and 1 a more severe presentation. Principal component analysis was generated after imputing missing data with 0 and performing variable scaling. UMAP representation was created using the umap package in R.","Two-sided Fisher\u2019s tests with Bonferroni adjustment to account for four tests were used to compare clinical features between SGE function score variant categories (strong versus moderate) in Extended Data Table 1.\r\nRNA sequencing cluster analysis\r\nRNA sequencing from cultured lymphocytes was performed following the protocol described in ref. 4 for RNU4-2 and\u00a0rMATS-turbo (v.4.3.0)35 was run on 19 ReNU samples and 20 control participants (excluding one individual previously deemed a control in ref. 4 who was here found to be a recessive RNU4-2 case); 101 significant alternative non-canonical 5\u2032 splice sites (A5SS) events (false discovery rate less than 0.1, \u0394PSI\u2009>\u20090.05) were retained.","Then rMATS-turbo was rerun on the 19 ReNU samples, the 20 control participants, without statistical or \u0394PSI filtering. The A5SS output was filtered on the 101 retained events and the PSI values were extracted to perform the principal component analysis.\r\nAssociation testing in UK Biobank\r\nWe extracted phenotypes associated with educational attainment from UK Biobank following an approach published previously36. Fluid intelligence scores (field ID 20016) were retrieved for all participants. Where many scores were recorded, the median value was taken. Age left education was calculated as the maximum value in age completed full time education (field ID 845).","Diagnosis with childhood developmental disorder was defined using the ICD codes for intellectual disability (ICD-10: F70\u2013F73, F78, F79; ICD-9: 317, 318, 319), epilepsy (ICD-10: G40), global developmental disorders (ICD-10: F80\u2013F84, F88\u2013F95, R62, R48, Z55; ICD-9: 299, 312, 313, 314, 315) and congenital malformations (ICD-10: Q0\u2013Q99, ICD-9: 740\u2013759).\r\n\r\nWe identified UK Biobank participants with: (1) depleted variants in the 18-bp RNU4-2 CR (n\u2009=\u20096), (2) depleted variants outside the CR (n\u2009=\u200950) and (3) participants with non-depleted SNVs outside the CR (n\u2009=\u200912,132).","We performed multiple linear regression on fluid intelligence scores and age left education, and multiple logistic regression on childhood developmental disorder for variant groups (2) and (3) defined above, compared with all individuals without any variants in any of the three groups. Age at recruitment (field ID 21022), age2 (age at recruitment\u2009\u00d7\u2009age at recruitment), sex (field ID 31) and first ten genetic principal components (field ID 22009) were included as covariates.","P values were false discovery rate-corrected using the Benjamini\u2013Hochberg method.\r\nInvestigating RNU4ATAC variants in ClinVar\r\nVariants in RNU4ATAC with classifications of pathogenic, likely pathogenic, pathogenic or likely pathogenic, benign, likely benign or benign or likely benign were downloaded from the ClinVar37 website on 4 March 2025. Two regions of RNU4-2 and RNU4ATAC with identical structures were defined, mapping to the k-turn (RNU4-2 nucleotides 26\u201352; RNU4ATAC nucleotides 31\u201357) and the Sm protein binding site (RNU4-2 nucleotides 115\u2013126; RNU4ATAC nucleotides 113\u2013124).","Variants at the same nucleotide in the structure and where the reference bases in RNU4-2 and RNU4ATAC are identical, were marked as \u2018equivalent\u2019.\r\nIdentifying biallelic variants in cohorts\r\nWe searched rare disease cohorts for individuals with biallelic variants in RNU4-2. These cohorts included the Genomics England 100,000 Genomes Project and NHS Genomic Medicine Service datasets accessed through the UK National Genomic Research Library38, the SeqOIA and Auragen clinical cohorts in France (PFMG2025), the Undiagnosed Disease Network, the Broad Institute Center for Mendelian Genomics and GREGoR (Genomics Research to Elucidate the Genetics of Rare Diseases)39 Consortium cohorts.","We only included individuals with homozygous variants with function scores less than \u22120.302, or compound heterozygous variants in which both had function scores less than \u22120.302 (n\u2009=\u200920). All individuals had previous genome analysis including investigation of variants in known NDD genes and large structural variants. One individual (individual 17) had a reported likely pathogenic variant in GLI3; however, this variant did not explain all of their reported phenotypes (see ref. 21 for more details).\r\nEthics\r\nInformed consent was obtained for all participants included in this study from their parent(s) or legal guardian, with the study approved by the local regulatory authority.","The 100,000 Genomes Project Protocol has ethical approval from the Health Research Authority Committee East of England Cambridge South (Research Ethics Committee ref. 14\/EE\/1112). This study was approved by Genomics England under Research Registry Projects 354.","Health related research in UK Biobank was approved by the Research Ethics Committee under reference 20\/NW\/0274 with this research conducted under application number 81050.\r\n\r\nWe received an exception to the Data and Statistics Dissemination Policy from the All of Us Resource Access Board to report questionnaire response data for the single individual with a homozygous depleted variant as well as variant counts below 20 for all variants in RNU4-2.\r\nReporting summary\r\nFurther information on research design is available in the\u00a0Nature Portfolio Reporting Summary linked to this article.\r\n\nData availability\r\nSGE data including all RNU4-2 function scores are available in Supplementary Table 1.","Fastq files from SGE experiments are available through the European Nucleotide Archive at accession PRJEB87505. RNA sequencing data (Fig. 3d) were taken from ref. 4 and are available in the European Genome\u2013Phenome Archive at http:\/\/www.ebi.ac.uk\/ega; study accession EGAS50000000889. UK Biobank and All of Us V8 data are available to researchers on approval of application (https:\/\/www.ukbiobank.ac.uk\/use-our-data\/apply-for-access\/; https:\/\/www.researchallofus.org\/).\r\n\nCode availability\r\nCustom scripts used to analyse SGE experiments and generate figures are available at GitHub (https:\/\/github.com\/FrancisCrickInstitute\/RNU4-2_Saturation_Genome_Editing).\r\n\nReferences\r\nChen, Y. et al.","De novo variants in the RNU4-2 snRNA cause a frequent neurodevelopmental syndrome. Nature 632, 832\u2013840 (2024).\r\n\r\nGreene, D. et al. Mutations in the U4 snRNA gene RNU4-2 cause one of the most prevalent monogenic neurodevelopmental disorders. Nat. Med.","30, 2165\u20132169 (2024).\r\n\r\nNguyen, T. H. D. et al. The architecture of the spliceosomal U4\/U6.U5 tri-snRNP. Nature 523, 47\u201352 (2015).\r\n\r\nArticle\nADS\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nNava, C. et al. Dominant variants in major spliceosome U4 and U5 small nuclear RNA genes cause neurodevelopmental disorders through splicing disruption. Nat. Genet.","57, 1374\u20131388 (2025).\r\n\r\nArticle\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nBruselles, A. et al. Expanding the mutational spectrum of ReNU syndrome: insights into 5\u2032 stem-loop variants. Eur. J. Hum. Genet.","33, 432\u2013440 (2025).\r\n\r\nSeplyarskiy, V. et al. A mutation rate model at the basepair resolution identifies the mutagenic effect of polymerase III transcription. Nat. Genet.","55, 2235\u20132242 (2023).\r\n\r\nArticle\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nRentzsch, P.,","Witten, D.,","Cooper, G.","M.,","Shendure, J.","& Kircher, M. CADD: predicting the deleteriousness of variants throughout the human genome. Nucleic Acids Res.","47, D886\u2013D894 (2019).\r\n\r\nArticle\nCAS\nPubMed\nGoogle Scholar\r\n\r\nBrnich, S. E. et al. Recommendations for application of the functional evidence PS3\/BS3 criterion using the ACMG\/AMP sequence variant interpretation framework. Genome Med.","12, 3 (2019).\r\n\r\nArticle\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nFindlay, G.","M.,","Boyle, E.","A.,","Hause, R.","J.,","Klein, J. C. & Shendure, J. Saturation editing of genomic regions by multiplex homology-directed repair. Nature 513, 120\u2013123 (2014).\r\n\r\nArticle\nADS\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nFindlay, G. M. et al. Accurate classification of BRCA1 variants with saturation genome editing. Nature 562, 217\u2013222 (2018).\r\n\r\nArticle\nADS\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nMeitlis, I. et al. Multiplexed functional assessment of genetic variants in CARD11. Am. J. Hum. Genet.","107, 1029\u20131043 (2020).\r\n\r\nArticle\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nRadford, E. J. et al. Saturation genome editing of DDX3X clarifies pathogenicity of germline and somatic variation. Nat. Commun.","14, 7702 (2023).\r\n\r\nArticle\nADS\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nBuckley, M. et al. Saturation genome editing maps the functional spectrum of pathogenic VHL alleles. Nat. Genet.","56, 1446\u20131455 (2024).\r\n\r\nArticle\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nWaters, A. J. et al. Saturation genome editing of BAP1 functionally classifies somatic and germline variants. Nat. Genet.","56, 1434\u20131445 (2024).\r\n\r\nArticle\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nHsu, P. D. et al. DNA targeting specificity of RNA-guided Cas9 nucleases. Nat. Biotechnol.","31, 827\u2013832 (2013).\r\n\r\nArticle\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nOlvera-Le\u00f3n, R. et al. High-resolution functional mapping of RAD51C by saturation genome editing.","Cell 187, 5719\u20135734.e19 (2024).\r\n\r\nArticle\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nHuang, H. et al. Functional evaluation and clinical classification of BRCA2 variants. Nature 638, 528\u2013537 (2025).\r\n\r\nArticle\nADS\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nSudlow, C. et al. UK Biobank: an open access resource for identifying the causes of a wide range of complex diseases of middle and old age. PLoS Med.","12, e1001779 (2015).\r\n\r\nArticle\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nSchubach, M.,","Maass, T., Nazaretyan, L.,","R\u00f6ner, S.","& Kircher, M. CADD v1.7: using protein language models, regulatory CNNs and other nucleotide-level scores to improve genome-wide variant predictions. Nucleic Acids Res.","52, D1143\u2013D1154 (2024).\r\n\r\nArticle\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nQuinodoz, M. et al. De novo and inherited dominant variants in U4 and U6 snRNA genes cause retinitis pigmentosa. Nat. Genet.","58, 169\u2013179 (2026).\r\n\r\nArticle\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nRius, R. et al. Biallelic variants in the noncoding RNA gene RNU4-2 cause a recessive neurodevelopmental syndrome with distinct white matter changes. Nat. Genet. https:\/\/doi.org\/10.1038\/s41588-026-02554-6 (2026).\r\n\r\nLiu, S. et al. Binding of the human Prp31 Nop domain to a composite RNA-protein platform in U4 snRNP. Science 316, 115\u2013120 (2007).\r\n\r\nArticle\nADS\nCAS\nPubMed\nGoogle Scholar\r\n\r\nNottrott, S. et al. Functional interaction of a novel 15.5kD [U4\/U6\u00b7U5] tri-snRNP protein with the 5\u2032 stem\u2013loop of U4 snRNA. EMBO J. https:\/\/doi.org\/10.1093\/emboj\/18.21.6119 (1999).\r\n\r\nPannone, B. K.","& Wolin, S. L. Sm-like proteins wRING the neck of mRNA. Curr. Biol.","10, R478\u201381 (2000).\r\n\r\nArticle\nCAS\nPubMed\nGoogle Scholar\r\n\r\nHu, J.,","Xu, D.,","Schappert, K.,","Xu, Y.","& Friesen, J. D. Mutational analysis of Saccharomyces cerevisiae U4 small nuclear RNA identifies functionally important domains. Mol. Cell. Biol.","15, 1274\u20131285 (1995).\r\n\r\nArticle\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nEdery, P. et al. Association of TALS developmental disorder with defect in minor splicing component U4atac snRNA. Science 332, 240\u2013243 (2011).\r\n\r\nArticle\nADS\nCAS\nPubMed\nGoogle Scholar\r\n\r\nFarach, L. S. et al. The expanding phenotype of RNU4ATAC pathogenic variants to Lowry Wood syndrome. Am. J. Med. Genet.","A 176, 465\u2013469 (2018).\r\n\r\nArticle\nCAS\nPubMed\nGoogle Scholar\r\n\r\nMerico, D. et al. Compound heterozygous mutations in the noncoding RNU4ATAC cause Roifman Syndrome by disrupting minor intron splicing. Nat. Commun.","6, 8718 (2015).\r\n\r\nArticle\nADS\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nOlthof, A. M. et al. Disruption of exon-bridging interactions between the minor and major spliceosomes results in alternative splicing around minor introns. Nucleic Acids Res.","49, 3524\u20133545 (2021).\r\n\r\nArticle\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nArriaga, T. M. et al. Transcriptome-wide outlier approach identifies individuals with minor spliceopathies. Am. J. Hum. Genet.","112, 2458\u20132475 (2025).\r\n\r\nArticle\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nJackson, A. et al. Analysis of R-loop forming regions identifies RNU2-2 and RNU5B-1 as neurodevelopmental disorder genes. Nat. Genet.","57, 1362\u20131366 (2025).\r\n\r\nArticle\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nGreene, D. et al. Mutations in the small nuclear RNA gene RNU2-2 cause a severe neurodevelopmental disorder with prominent epilepsy. Nat. Genet.","57, 1367\u20131373 (2025).\r\n\r\nArticle\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nRan, F. A. et al. Genome engineering using the CRISPR\u2013Cas9 system. Nat. Protoc.","8, 2281\u20132308 (2013).\r\n\r\nArticle\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nLorenz, R. et al. ViennaRNA package 2.0. Algorithms Mol. Biol.","6, 26 (2011).\r\n\r\nArticle\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nWang, Y. et al. rMATS-turbo: an efficient and flexible computational tool for alternative splicing analysis of large-scale RNA-seq data. Nat. Protoc.","19, 1083\u20131104 (2024).\r\n\r\nArticle\nCAS\nPubMed\nGoogle Scholar\r\n\r\nKingdom, R.,","Beaumont, R.","N.,","Wood, A.","R.,","Weedon, M. N.","& Wright, C. F. Genetic modifiers of rare variants in monogenic developmental disorder loci. Nat. Genet.","56, 861\u2013868 (2024).\r\n\r\nArticle\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nLandrum, M. J. et al. ClinVar: public archive of relationships among sequence variation and human phenotype. Nucleic Acids Res.","42, D980\u2013D985 (2014).\r\n\r\nArticle\nCAS\nPubMed\nGoogle Scholar\r\n\r\nGenomics England. National Genomic Research Library. Dataset. figshare https:\/\/doi.org\/10.6084\/m9.figshare.4530893.v8 (2025).\r\n\r\nDawood, M. et al. GREGoR: accelerating genomics for rare diseases. Nature 647, 331\u2013342 (2025).\r\n\r\nArticle\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nDownload references\r\nAcknowledgements\r\nWe thank the Crick\u2019s Genomics Scientific Technology Platform for performing sequencing and the Flow Cytometry and Cell Sciences Scientific Technology Platforms for assisting in maintaining cell lines. We also thank P. O\u2019Donovan, M. Sato and E. Miller from the Genomics England Airlock team. N.W.","is supported by a Sir Henry Dale Fellowship jointly funded by the Wellcome Trust and the Royal Society (grant 220134\/Z\/20\/Z), a Lister Institute research prize and grant funding from Novo Nordisk. Y.C. is supported by a studentship from Novo Nordisk. The Francis Crick Institute receives its core funding (G.M.F.) from Cancer Research UK (grant CC2190), the UK Medical Research Council (grant CC2190) and the Wellcome Trust (grant CC2190). G.M.F. is supported by a European Research Council Starting grant (Seq2Func-NC). A.J.M.B. is supported by a Wellcome PhD Training Fellowship for Clinicians and the 4Ward North PhD Programme for Health Professionals (grant 223521\/Z\/21\/Z). C.D.","is supported by research grants from the Deutsche Forschungsgemeinschaft (DFG) (project grants 455314768, 458099954 and 505514143). C.N. has received support from the Health philanthropic program of Mutuelles AXA dedicated to supporting innovative research projects in France (RNU-SPLICE project). Patients 4, 5, 6, 13, 14, 15 and 16 included in this study were diagnosed through Plan France M\u00e9decine G\u00e9nomique 2025 (PFMG2025). Patients 11 and 12 were sequenced at the Baylor College of Medicine Human Genome Sequencing Center through the GREGoR Consortium with support from US National Human Genome Research Institute grants U01HG011758 and U54HG003273.","Analysis of individuals 9 and 10 was supported by National Human Genome Research Institute grant R01HG009141. D.G.C. was supported by the Child Neurologist Career Development Program CNCDP-K12 (US National Institute of Neurological Disorders and Strokes grant NS098482). C.A.-T. is supported in part by the National Human Genome Research Institute grant U01HG011755 (GREGoR consortium). O.M. is supported by the Hazem Ben-Gacem Tunisia Medical Fellowship Fund. Research reported in this publication was supported by the National Institute Of Neurological Disorders And Stroke of the National Institutes of Health under grant awards U01HG010218 and U01HG010233.","The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health. This research was made possible through access to data in the National Genomic Research Library, which is managed by Genomics England Limited (a wholly owned company of the Department of Health and Social Care). The National Genomic Research Library holds data provided by patients and collected by the NHS as part of their care and data collected as part of their participation in research. The National Genomic Research Library is funded by the National Institute for Health Research and NHS England.","The Wellcome Trust, Cancer Research UK and the Medical Research Council have also funded research infrastructure. This study was registered with Genomics England under Research Registry Projects 354. This research has been conducted using the UK Biobank Resource under application number 81050. We gratefully acknowledge All of Us and UK Biobank participants for their contributions. We also thank the National Institutes of Health\u2019s All of Us Research Program for making available the participant and variant data examined in this study.","For the purpose of Open Access, the authors have applied a CC BY public copyright licence to any Author Accepted Manuscript version arising from this submission.\r\nAuthor information\r\nAuthor notes\r\n\r\nThese authors contributed equally: Nicola Whiffin, Gregory M. Findlay\r\nAuthors and Affiliations\r\nThe Genome Function Laboratory, The Francis Crick Institute, London, UK\r\n\r\nJoachim De Jonghe,\u00a0Ayanfeoluwa Adedeji,\u00a0Christina M. Kajba\u00a0&\u00a0Gregory M.","Findlay\r\n\r\nBig Data Institute, University of Oxford, Oxford, UK\r\n\r\nHyung Chul Kim,\u00a0Ruebena Dawes,\u00a0Yuyang Chen\u00a0&\u00a0Nicola Whiffin\r\n\r\nCentre for Human Genetics, University of Oxford, Oxford, UK\r\n\r\nHyung Chul Kim,\u00a0Ruebena Dawes,\u00a0Yuyang Chen\u00a0&\u00a0Nicola Whiffin\r\n\r\nDepartment of Biochemical Engineering, University College London, London, UK\r\n\r\nAyanfeoluwa Adedeji\r\n\r\nInstitute of Human Genetics, University Hospital Essen, University Duisburg-Essen, Essen, Germany\r\n\r\nElsa Leit\u00e3o\u00a0&\u00a0Christel Depienne\r\n\r\nNantes Universit\u00e9, CHU de Nantes, CNRS, INSERM, L\u2019Institut du Thorax, Nantes, France\r\n\r\nBenjamin Cogn\u00e9\r\n\r\nNantes Universit\u00e9, CHU de Nantes, CNRS, INSERM, G\u00e9n\u00e9tique m\u00e9dicale, Nantes, France\r\n\r\nBenjamin Cogn\u00e9\r\n\r\nManchester Centre for Genomic Medicine, Division of Evolution and Genomic Sciences, School of Biological Sciences, Faculty of Biology, Medicine and Health, University of Manchester, Manchester, UK\r\n\r\nAlexander J.","M. Blakes\r\n\r\nCentre for Population Genomics, Garvan Institute of Medical Research, Sydney, New South Wales, Australia\r\n\r\nCas Simons,\u00a0Rocio Rius\u00a0&\u00a0Daniel G. MacArthur\r\n\r\nCentre for Population Genomics, Murdoch Children\u2019s Research Institute, Melbourne, Victoria, Australia\r\n\r\nCas Simons,\u00a0Rocio Rius\u00a0&\u00a0Daniel G. MacArthur\r\n\r\nDepartment of Pediatric Neurology, University of Child Health Sciences, The Children\u2019s Hospital, Lahore, Pakistan\r\n\r\nJaveria R.","Alvi\u00a0&\u00a0Tipu Sultan\r\n\r\nService de G\u00e9n\u00e9tique, G\u00e9nomique et Procr\u00e9ation, CHU Grenoble Alpes, Grenoble, France\r\n\r\nFlorence Amblard,\u00a0Charles Coutton,\u00a0Radu Harbuz\u00a0&\u00a0Julien Thevenon\r\n\r\nGCS AURAGEN, Lyon, France\r\n\r\nFlorence Amblard,\u00a0Charles Coutton\u00a0&\u00a0Julien Thevenon\r\n\r\nUniversit\u00e9 Grenoble Alpes, INSERM U 1209, CNRS UMR 5309, Institut for Advanced Biosciences, Grenoble, France\r\n\r\nFlorence Amblard,\u00a0Charles Coutton\u00a0&\u00a0Julien Thevenon\r\n\r\nBroad Center for Mendelian Genomics, Program in Medical and Population Genetics, Broad Institute of MIT and Harvard, Cambridge, MA, USA\r\n\r\nChristina Austin-Tse,\u00a0Olfa Messaoud\u00a0&\u00a0Nicola Whiffin\r\n\r\nService de p\u00e9diatrie, H\u00f4pitaux Universitaires de Strasbourg, Strasbourg, France\r\n\r\nSarah Baer\r\n\r\nDepartment of Medicine, University of Washington School of Medicine, Seattle, WA, USA\r\n\r\nElsa V.","Balton\u00a0&\u00a0Nitsuh Dargie\r\n\r\nLaboratoire SeqOIA, Paris, France\r\n\r\nPierre Blanc,\u00a0Olivier Grunewald,\u00a0Paul Gueguen,\u00a0Pierre Marijon\u00a0&\u00a0Caroline Nava\r\n\r\nSection of Pediatric Neurology, Department of Pediatrics, Baylor College of Medicine, Houston, TX, USA\r\n\r\nDaniel G. Calame\r\n\r\nTexas Children\u2019s Hospital, Houston, TX, USA\r\n\r\nDaniel G. Calame\r\n\r\nVictorian Clinical Genetics Services, Murdoch Children\u2019s Research Institute, Melbourne, Victoria, Australia\r\n\r\nChloe A. Cunningham\u00a0&\u00a0Richard J. Leventer\r\n\r\nDepartment of Paediatrics, University of Melbourne, Melbourne, Victoria, Australia\r\n\r\nChloe A. Cunningham\u00a0&\u00a0Richard J. Leventer\r\n\r\nDepartment of Pediatrics, University of Washington, Seattle, WA, USA\r\n\r\nKatrina M.","Dipple\u00a0&\u00a0Ian Glass\r\n\r\nBrotman Baty Institute for Precision Medicine, Seattle, WA, USA\r\n\r\nKatrina M. Dipple\u00a0&\u00a0Ian Glass\r\n\r\nDepartment of Molecular and Human Genetics, Baylor College of Medicine, Houston, TX, USA\r\n\r\nHaowei Du\r\n\r\nService de G\u00e9n\u00e9tique M\u00e9dicale, Institut de G\u00e9n\u00e9tique M\u00e9dicale D\u2019Alsace, H\u00f4pitaux Universitaires de Strasbourg, Strasbourg, France\r\n\r\nSalima El Chehadeh\r\n\r\nLaboratoire de G\u00e9n\u00e9tique M\u00e9dicale, Institut de G\u00e9n\u00e9tique M\u00e9dicale d\u2019Alsace, INSERM UMRS_1112, CRBS, Universit\u00e9 de Strasbourg, Strasbourg, France\r\n\r\nSalima El Chehadeh\r\n\r\nRady Children\u2019s Institute for Genomic Medicine, San Diego, CA, USA\r\n\r\nJoseph G.","Gleeson\r\n\r\nDepartment of Neurosciences and Pediatrics, University of California, San Diego, San Diego, CA, USA\r\n\r\nJoseph G.","Gleeson\r\n\r\nU1172-LilNCog-Lille Neuroscience and Cognition, CHU de Lille, Lille, France\r\n\r\nOlivier Grunewald\r\n\r\nLaboratoire de Genopathies, CHU Lille, Lille, France\r\n\r\nOlivier Grunewald\r\n\r\nService de G\u00e9n\u00e9tique, CHRU de Tours, Tours, France\r\n\r\nPaul Gueguen\u00a0&\u00a0Marie-Line Jacquemont\r\n\r\nUniversit\u00e9 de Tours, Imaging Brain and Neuropsychiatry iBraiN, Tours, France\r\n\r\nPaul Gueguen\u00a0&\u00a0Marie-Line Jacquemont\r\n\r\nCentre de R\u00e9f\u00e9rence Maladies Rares \u2018Anomalies du D\u00e9veloppement et Syndromes Malformatifs\u2019, FHU Genomeds, CHRU de Tours, Tours, France\r\n\r\nMarie-Line Jacquemont\r\n\r\nRoyal Children\u2019s Hospital, Melbourne, Victoria, Australia\r\n\r\nRichard J.","Leventer\r\n\r\nHarvard Medical School, Boston, MA, USA\r\n\r\nOlfa Messaoud\r\n\r\nCentre de r\u00e9f\u00e9rence maladies rares, D\u00e9ficiences Intellectuelles de Causes Rares, Centre de G\u00e9n\u00e9tique, FHU-TRANSLAD, CHU Dijon Bourgogne, Dijon, France\r\n\r\nChristel Thauvin\r\n\r\nUnit\u00e9 Fonctionnelle Innovation en Diagnostic G\u00e9nomique des Maladies Rares, F\u00e9d\u00e9ration Hospitalo-Universitaire-TRANSLAD, CHU Dijon Bourgogne, Dijon, France\r\n\r\nChristel Thauvin\r\n\r\nUMR1231 GAD, Inserm, Universit\u00e9 Bourgogne-Franche Comt\u00e9, Dijon, France\r\n\r\nChristel Thauvin\r\n\r\nClinique de G\u00e9n\u00e9tique, H\u00f4pital Jeanne de Flandre, CHU de Lille, Lille, France\r\n\r\nCatherine Vincent-Delorme\r\n\r\nConsultation de g\u00e9n\u00e9tique, CH Arras, Arras, France\r\n\r\nCatherine Vincent-Delorme\r\n\r\nDepartment of Medical Genetics, Istanbul Medeniyet University Medical School, Istanbul, Turkey\r\n\r\nElif Yilmaz Gulec\r\n\r\nMedical Genetics Clinic, Istanbul Goztepe Prof Dr Suleyman Yalcin City Hospital, Istanbul, Turkey\r\n\r\nElif Yilmaz Gulec\r\n\r\nCardiovascular Medicine, Stanford University, Stanford, CA, USA\r\n\r\nRodrigo Mendez\r\n\r\nSorbonne Universit\u00e9, Institut du Cerveau\u2014Paris Brain Institute\u2014ICM, Inserm, CNRS, APHP, D\u00e9partement de G\u00e9n\u00e9tique, H\u00f4pital de la Piti\u00e9 Salp\u00eatri\u00e8re, Paris, France\r\n\r\nCaroline Nava\r\nContributions\r\nJ.D.J.,","A.A. and C.M.K. performed experiments.","J.D.J.,","H.C.K.,","R.D.,","E.L.,","B.C., Y.C. and\u00a0A.J.M.B. analysed data and contributed to the figures and tables in the paper.","C.S.,","R.R.,\u00a0J.T.,","R.M.,","D.G.M.,","C.D., N.W. and G.M.F. collected data, provided funding and supervised the work. All other authors provided clinical and\/or genomic data and are listed alphabetically.","J.D.J., N.W. and G.M.F. wrote the paper with input from all the authors.\r\nCorresponding authors\r\nCorrespondence to\nNicola Whiffin or Gregory M. Findlay.\r\nEthics declarations\nCompeting interests\r\nN.W. receives research funding from Novo Nordisk and Biomarin Pharmaceutical. D.G.M. is a paid consultant for GlaxoSmithKline, Insitro and Overtone Therapeutics and receives research support from Microsoft. The other authors declare no competing interests.\r\n\nPeer review\nPeer review information\r\nNature thanks Karine Choquet and the other, anonymous, reviewer(s) for their contribution to the peer review of this work.","Peer reviewer reports are available.\r\n\nAdditional information\r\nPublisher\u2019s note Springer Nature remains neutral with regard to jurisdictional claims in published maps and institutional affiliations.\r\nExtended data figures and tablesExtended Data Fig. 1 Quality control metrics for RNU4-2 SGE experiments.\r\na,\u00a0The distribution of variant read counts in the HDR library is plotted for all n\u2009=\u2009539 variants included in library design. Of reads from the HDR library, 0.0068% and 4.6% matched unedited reference and PAM-edit only, respectively.","b, The distribution of variant read counts in day 4 gDNA is plotted, with counts averaged across biological replicates.","c,\u00a0Inter-replicate function score correlations are plotted, with Pearson\u2019s r shown and variants coloured by mutation type.\r\nExtended Data Fig. 2 ReNU syndrome variants are discriminated with high precision from variants seen frequently in population controls.\r\nROC-AUC measurements for distinguishing 12 ReNU syndrome SNVs from population control SNVs by SGE score are displayed as a heatmap. Each AUC was determined using only variants in UK Biobank and All of Us with allele counts above the thresholds indicated on the axes. For select allele count thresholds applied to both cohorts (10, 20, 40, 60, and 80), the number of population variants retained for the ROC-AUC calculation is indicated.\r\nExtended Data Fig.","3 Correlations between function scores and predicted effects on RNA binding stability.\r\nViennaRNA was used to predict the effects of variants (n\u2009=\u2009521) on the minimum free energy of U4\/U6 RNA binding compared to reference (\u0394\u0394G).","a, Predicted \u0394\u0394G values are plotted versus function scores for the whole transcript, as well as for individual regions (Spearman\u2019s \u03c1).","b, ROC curve for classifying ReNU syndrome variants from population controls using ViennaRNA-predicted \u0394\u0394G values (AUC\u2009=\u20090.72).\r\nExtended Data Fig. 4 Function scores for variants within the RNU4-2 critical region.\r\nFunction scores are plotted by position and coloured by their association with ReNU syndrome (red), presence in the UK Biobank or All of Us cohorts (blue), or no observation in either (teal). Variants score lowly in two regions within the CR (shaded), n.62-70 and n.75-78, which correspond to the T-loop and Stem III, respectively.","The black dashed line (function score\u2009=\u2009\u22120.302) indicates significantly depleted variants and the gray dashed line (function score\u2009=\u2009\u22120.90) separates \u201cmoderate\u201d from \u201cstrong\u201d depletion. The vertical red dashed lines represent the boundaries of the 18 nucleotide ReNU CR reported by Chen et al.1 drawn to include insertions at n.61_62 and n.79_80.\r\nExtended Data Fig. 5 Calibration of function scores to evidence for clinical classification of variants in relation to ReNU syndrome.\r\nGaussian mixture modelling was used to estimate odds of pathogenicity (OddsPath). Function scores are plotted against OddsPath values for n\u2009=\u2009127 variants within the ReNU syndrome critical region.","Vertical dotted lines mark the median of insertion controls (x\u2009=\u20090), as well as thresholds for \u201cmoderate\u201d (\u22120.302) and \u201cstrong\u201d (\u22120.90) depletion. Horizontal dashed lines indicate OddsPath thresholds for assigning evidence strengths in accordance with ACMG guidelines8. OddsPath values are capped for variants with function scores below \u22121.0 to display all points.\r\nExtended Data Fig. 6 Phenotype clustering of ReNU patients.\r\na, PCA clustering as in Fig. 3a but removing individuals with the recurrent n.64_65insT variant.","b, Phenotype clustering of all individuals represented in Fig. 3a using a UMAP representation.\r\nExtended Data Fig. 7 Correlation of the SGE assay in haploid versus diploid HAP1 cells.\r\na, Function scores (n\u2009=\u2009539) from SGE in diploid HAP1 cells, plotted by transcript position and coloured by variant type.","b, Function scores from SGE in diploid HAP1 cells coloured by the function score from SGE in haploid HAP1 cells.","c, Correlation of function scores in diploid versus haploid HAP1 cells, coloured by the region in which each variant is located (Pearson\u2019s r\u2009=\u20090.75).\r\nExtended Data Table 1 Comparison of clinical features by function score categories\r\nFull size table\r\nExtended Data Table 2 Results from association testing with intelligence-related metrics in the UK Biobank\r\nFull size table\r\nExtended Data Table 3 Homozygous and compound heterozygous variants in individuals with undiagnosed neurodevelopmental disorders\r\nFull size table\r\nSupplementary informationRights and permissions\r\nOpen Access This article is licensed under a Creative Commons Attribution 4.0 International License, which permits use, sharing, adaptation, distribution and reproduction in any medium or format, as long as you give appropriate credit to the original author(s) and the source, provide a link to the Creative Commons licence, and indicate if changes were made.","The images or other third party material in this article are included in the article\u2019s Creative Commons licence, unless indicated otherwise in a credit line to the material. If material is not included in the article\u2019s Creative Commons licence and your intended use is not permitted by statutory regulation or exceeds the permitted use, you will need to obtain permission directly from the copyright holder. To view a copy of this licence, visit http:\/\/creativecommons.org\/licenses\/by\/4.0\/.\r\n\r\nReprints and permissions\r\nAbout this article\r\n\r\nCite this article\r\nDe Jonghe, J.,","Kim, H.C.,","Adedeji, A. et al. Saturation editing of RNU4-2 reveals distinct dominant and recessive disorders.\nNature (2026). https:\/\/doi.org\/10.1038\/s41586-026-10334-9\r\n\r\nDownload citation\r\n\r\nReceived: 24 April 2025\r\n\r\nAccepted: 26 February 2026\r\n\r\nPublished: 08 April 2026\r\n\r\nVersion of record: 08 April 2026\r\n\r\nDOI: https:\/\/doi.org\/10.1038\/s41586-026-10334-9"],"images":[{"src":"\/news\/images\/TkbmuTA5An6pq0jkF31YSHaroE6S7TsKDrDN3Z0FIoltBva7I0do6Hv56SGRKYL85pVN6H7y6avsjnd9v9uqf4eFGmhcax0V6l2tuZNaytYfYHkUjKasu7XoZ0L4dajsjqh9NQVtaBnNYLsiwO6HiBpTgkUj9.jpg","width":0,"height":0,"source":"featured","size":0}]}"
      [3]=>
      int(1775726512)
    }
  }
  [2]=>
  array(7) {
    ["file"]=>
    string(28) "/app/sys/data/newsdb.inc.php"
    ["line"]=>
    int(14)
    ["function"]=>
    string(6) "Insert"
    ["class"]=>
    string(3) "SQL"
    ["object"]=>
    object(SQL)#1 (6) {
      ["CONNECTION":"SQL":private]=>
      object(mysqli)#2 (18) {
        ["affected_rows"]=>
        int(-1)
        ["client_info"]=>
        string(14) "mysqlnd 8.2.30"
        ["client_version"]=>
        int(80230)
        ["connect_errno"]=>
        int(0)
        ["connect_error"]=>
        NULL
        ["errno"]=>
        int(1406)
        ["error"]=>
        string(40) "Data too long for column 'data' at row 1"
        ["error_list"]=>
        array(1) {
          [0]=>
          array(3) {
            ["errno"]=>
            int(1406)
            ["sqlstate"]=>
            string(5) "22001"
            ["error"]=>
            string(40) "Data too long for column 'data' at row 1"
          }
        }
        ["field_count"]=>
        int(2)
        ["host_info"]=>
        string(18) "db:3306 via TCP/IP"
        ["info"]=>
        NULL
        ["insert_id"]=>
        int(0)
        ["server_info"]=>
        string(37) "11.2.2-MariaDB-1:11.2.2+maria~ubu2204"
        ["server_version"]=>
        int(110202)
        ["sqlstate"]=>
        string(5) "22001"
        ["protocol_version"]=>
        int(10)
        ["thread_id"]=>
        int(810018)
        ["warning_count"]=>
        int(0)
      }
      ["URL":"SQL":private]=>
      string(7) "db:3306"
      ["USER":"SQL":private]=>
      string(8) "retrofox"
      ["PASS":"SQL":private]=>
      string(16) "TreeakWqQPD9qnWR"
      ["DB":"SQL":private]=>
      string(8) "retrofox"
      ["DROP_VALUES":"SQL":private]=>
      array(1) {
        [0]=>
        string(0) ""
      }
    }
    ["type"]=>
    string(2) "->"
    ["args"]=>
    array(4) {
      [0]=>
      string(69) "REPLACE INTO news_data (id, data, timestamp) VALUES ('%s', '%s', %i);"
      [1]=>
      string(85) "foxapi_article_SNgSFz1UYwUgnAye8Iq6AwimFnBOT9jdyRE0kXeBSMWDBytDmFUTOmTLbAFJl2KLSL4Shs"
      [2]=>
      string(89760) "{"id":"SNgSFz1UYwUgnAye8Iq6AwimFnBOT9jdyRE0kXeBSMWDBytDmFUTOmTLbAFJl2KLSL4Shs","title":"Saturation editing of RNU4-2 reveals distinct dominant and recessive disorders","description":"Recently, de novo variants in an 18-nucleotide region in the centre of RNU4-2 were shown to cause ReNU syndrome, a syndromic neurodevelopmental disorder that is predicted to affect tens of thousands of individuals worldwide1,2. RNU4-2 is a non-protein-coding gene that is transcribed into the U4 small nuclear RNA component of the major spliceosome3. ReNU syndrome variants disrupt spliceosome function and alter 5\u2032 splice site selection1,4. Here we performed saturation genome editing (SGE) of RNU4-2 to identify the functional and clinical impact of variants across the entire gene. The resulting SGE function scores, derived from variants\u2019 effects on cell fitness, discriminate ReNU syndrome variants from those observed in the population and markedly outperform in silico variant effect prediction. Using these data, we redefine the ReNU syndrome critical region at single-nucleotide resolution, resolve variant pathogenicity for variants of uncertain significance and show that SGE function scores delineate variants by phenotypic severity and the extent of observed splicing disruption. Furthermore, we identify variants affecting function in regions of RNU4-2 that are critical for interactions with other spliceosome components. We show that these variants cause a new recessive neurodevelopmental disorder that is distinct from ReNU syndrome. Together, this work defines the landscape of variant function across RNU4-2, providing critical insights for both diagnosis and therapeutic development. Saturation genome editing of RNU4-2 identifies the functional and clinical impact of variants across the entire gene and delineates variants that\u00a0cause a new recessive neurodevelopmental disorder distinct from ReNU syndrome.","author":"Findlay, Gregory M.","source":"Nature","publish":"April 8, 2026, 9:45 pm","canonical":"https:\/\/www.nature.com\/articles\/s41586-026-10334-9","text":["Main\r\nThe spliceosome is a large ribonucleoprotein complex that mediates RNA splicing. De novo variants in a gene encoding one of the small nuclear RNA (snRNA) components of the spliceosome, RNU4-2, were recently shown to cause ReNU syndrome, a prevalent neurodevelopmental disorder (NDD)1,2. ReNU syndrome is a complex multi-system disorder characterized by moderate to severe global developmental delay, intellectual disability, hypotonia, acquired microcephaly, speech and motor difficulties, low bone density and often seizures1,4.\r\n\r\nRNU4-2 encodes the U4 snRNA, which is a critical component of the major spliceosome.","In particular, U4 is tightly bound with the U6 snRNA in the U4\/U6.U5 tri-small-nuclear ribonucleoprotein and the U4\/U6 duplex needs to be unwound for activation of splicing3. Variants identified in individuals with ReNU syndrome cluster in an 18-nucleotide (nt) region in the centre of RNU4-2 that is depleted of variants in population datasets (the \u2018critical region\u2019, or CR)1. This region is known to accurately position U6 for recognition of the 5\u2032 splice site. Consistent with this, variants causing ReNU syndrome have been shown to alter 5\u2032 splice site usage1, with this disruption correlating with phenotype severity4.","Similarly, variants in two distinct structures within the 18-nt CR (the T-loop and Stem III) have been proposed to differ in clinical severity4.\r\n\r\nThe precise relationship between genetic variation in RNU4-2 and clinical impact remains incompletely characterized. The variants initially characterized in individuals with ReNU syndrome are all within the 18-nt CR; however, more recent work has proposed a role for variants outside this region, in the 5\u2032 stem loop5. It is unclear which, if any, variants outside the CR could also cause NDD. This is particularly important as the increased mutation rate of RNU4-2 and other snRNA genes means that there will be many chance occurrences of variants among sequenced individuals with syndromic NDD6.","Up to 75% of individuals with ReNU syndrome have the same single-nucleotide insertion (n.64_65insT). Whether the high recurrence of this particular variant is due to ascertainment bias, germline selection and\/or an increased mutation rate is at present unknown. Furthermore, it is unclear whether available variant effect predictors (for example, CADD7) can effectively distinguish between pathogenic and benign variants in RNU4-2.\r\n\r\nResolving these questions will be critical to ensure accurate, comprehensive diagnoses of individuals affected by ReNU syndrome.","One approach to clarifying variant impact is through the generation of functional data of variant effect, which can mechanistically inform why specific variants cause disease and improve clinical interpretation of rare variants8.","However, no experimental assay has yet been established to evaluate variants in RNU4-2, owing to its recent association with NDD.\r\n\r\nSaturation genome editing (SGE) is a powerful approach to delineate genotype\u2013phenotype relationships9.","Crucially, it does not rely on variants being observed in an individual with or without disease.","Instead, every possible variant across a gene or region can be engineered and the relative functional effects of each determined through a cellular readout. SGE experiments have been performed across numerous protein-coding genes, including BRCA110, CARD1111, DDX3X12, VHL13 and BAP114. In each case, the SGE assay has accurately differentiated between known pathogenic and benign variants.\r\n\r\nHere, we perform SGE of the human RNU4-2 noncoding RNA. We implemented an approach to combat the high sequence homology between RNU4-2 and its many homologues and pseudogenes, obtaining a variant effect map that effectively distinguishes variants known to cause ReNU syndrome from those in population controls.","We redefine the CR at single-nucleotide resolution, resolve pathogenicity assignments for variants of uncertain significance, and show that function scores for variants within the CR correlate closely with phenotypic severity. Furthermore, we identify functionally critical variants in other regions of RNU4-2 that underlie a recessive NDD marked by clinical features that are distinct from those of ReNU syndrome.\r\nSGE maps the effects of RNU4-2 variants\r\nPerforming SGE on regions of high sequence homology poses a challenge in that the protocol requires CRISPR\u2013Cas9 editing of a single locus, specific amplification of the edited locus from millions of cells and accurate variant calling from amplicon sequencing.","Alignment of RNU4-2 (RefSeq NR_003137.3) to RNU4-1 (RefSeq NR_003925.1) reveals mismatches at only 4 of the 145\u2009nt. The sequence upstream of RNU4-2, however, is both unique and poorly conserved across species, such that guide RNAs (gRNAs) predicted to be highly specific15 can be designed in conjunction with protospacer adjacent motif (PAM)-disrupting edits to block Cas9 recutting (Fig. 1a).\r\nFig. 1: SGE reveals the functional spectrum of RNU4-2 variants.\r\na, Schematic of SGE library design and CRISPR targeting strategy for RNU4-2.","Positions of library variants including all possible SNVs (navy; across the 145-nt transcript and 6-nt 3\u2032), control 1-nt insertions in loop regions (yellow), CR 1-nt insertions (red) and deletions (teal) and multi-nt insertions (light purple) are denoted on a schematic of RNU4-2 and RNU6 in complex (left) and by genomic location (right). A gRNA was designed to cleave upstream of RNU4-2 (scissors), avoiding highly repetitive sequence and allowing for a PAM-blocking variant to be installed in a region of low conservation (PhyloP 100 vertebrates basewise conservation track shown).","b, Schematic of SGE experiments in HAP1. Following editing, cells were collected on days 4 and 14. Sequencing was performed to quantify variant frequencies at each timepoint and function scores were calculated.","c, Function scores for 539 variants were correlated across biological replicates (Pearson\u2019s r\u2009=\u20090.86\u00a0for\u00a0replicates 1 and 2). The function score threshold delineating significantly depleted variants is indicated with the dashed line.","d, Function scores are plotted by genomic position in relation to RNU4-2 (RefSeq NR_003137.3). The line at n.145 marks the end of the transcript, with 18 more distal SNVs also scored. Points in c,d are coloured by variant type with a single legend included for these two panels. CRISPR\u2013Cas9 icon in b adapted from Bioicons (https:\/\/bioicons.com\/?query=CRISPR; CRISPR_Cas9 schematic), Marcel Tisch, under a Creative Commons licence CC0 1.0 Universal.\r\n\r\nLacking established models for assaying RNU4-2 variants, we chose to perform SGE in HAP1\u00a0cells, a haploid human line in which growth effects have accurately distinguished pathogenic variants across several protein-coding genes10,12,13,14,16,17.","To HAP1 cells lacking LIG4 (HAP1-LIG4-KO), we codelivered Cas9 with a gRNA directing DNA cleavage 31-nt upstream of RNU4-2 to install a library comprising 539 variants by homology-directed DNA repair (HDR). The library included all possible single base substitutions from the first transcribed nucleotide to 6\u2009nt beyond the most 3\u2032 position of the RNU4-2 transcript (GRCh38, chr12:120291753\u2013120291903), as well as all 1-nt deletions and insertions in the CR, including all but one variant known to cause NDD (omitting n.72_73del, which was reported after assay design; Fig. 1a).","Uncertain whether pathogenic variants would show phenotypes in the HAP1-based assay, we included 8 2-nt to 5-nt insertions at positions in the CR previously associated with disease, reasoning these may have strong effects. As negative controls, we included 12 1-nt insertions in stem loops outside the CR, which were not predicted to be deleterious (Supplementary Table 1).\r\n\r\nAdapting an optimized SGE protocol for HAP1 cells13 (Fig.","1b), we successfully scored all variants included in the library, observing an average of 52% editing by HDR at day 4. Editing was confirmed by sequencing to be specifically targeted to RNU4-2, and not RNU4-1. Function scores, reflecting variants\u2019 effects on growth (Methods), were highly correlated across three biological replicates (Pearson\u2019s r\u2009=\u20090.83\u20130.86; Fig. 1c and Extended Data Fig. 1). As expected, given their location in the U4\/U6 secondary structure, all 12 negative control variants scored near 0 (mean, \u22120.009, s.d.\u2009=\u20090.11). We defined a neutral distribution from these negative controls to identify 151 significantly depleted variants (q\u2009<\u20090.01, that is, function score less than \u22120.302).","The 8 multi-nucleotide insertions in the CR included as positive controls all were depleted, with function scores ranging from \u22120.73 to \u22121.82. Mapping variants\u2019 function scores to their linear transcript position reveals that depleted variants are clustered, rather than distributed evenly across the gene (Fig. 1d).\r\nSGE data resolve variant pathogenicity\r\nWe annotated all assayed variants within RNU4-2 with whether or not they had been observed in individuals with ReNU syndrome1, observed in population cohorts (UK Biobank18 or All of Us), or observed in neither (unobserved; Fig. 2a).","All 18 variants observed in ReNU syndrome were depleted in the assay (function score less than \u22120.302), whereas 81.0% (286 out of 353) of population variants scored as normal (function score \u22120.302 or more; Fig. 2b). Accordingly, function scores effectively discriminate between ReNU syndrome variants and those identified in the population (Fig. 2c; area under the receiver operating characteristic (ROC) curve (AUC) of 0.93). Most variants that are unobserved in population cohorts score normally (56.0%; 84 out of 150); however, many are as, or even more, depleted than ReNU syndrome variants. Specifically, the four variants with the lowest function scores are all unobserved (Supplementary Table 1).\r\nFig.","2: Function scores accurately discriminate variants underlying ReNU syndrome.\r\na, Function scores for 521 variants within the RNU4-2 transcript are plotted by position and coloured by their association with ReNU syndrome (red), presence in the UK Biobank\u00a0(UKB) or All of Us\u00a0(AoU) cohorts (blue) or absence from both cohorts (teal). Depleted variants within the 18-nt CR (vertical red dashed lines) are confined to two smaller regions (shaded grey) and include all ReNU syndrome variants scored (n\u2009=\u200918). These regions, n.62-70 and n.75-78, correspond closely to the T-loop and Stem III regions, respectively.","The black dashed line (function score \u22120.302) indicates significantly depleted variants and the grey dashed line (function score \u22120.90) separates \u2018moderate\u2019 from \u2018strong\u2019 depletion.","b, Stacked histogram and overlaid density plot of function scores by category comparing 18 ReNU syndrome variants with 353 variants in UK Biobank and\/or All of Us and 150 unobserved variants.","c, ROC curves show the performance of function scores and CADD scores for classifying 12 ReNU syndrome SNVs from 346 SNVs observed at least once in population controls.","d, Function scores for SNVs are plotted by combined UK Biobank and All of Us allele count. Higher allele counts were correlated with higher function scores (Spearman\u2019s \u03c1\u2009=\u20090.29, two-sided P\u2009=\u20092.8\u2009\u00d7\u200910\u221211). Among the 50 most frequently observed SNVs (combined allele count greater than 91; black dashed line), no SNVs were depleted. The grey dashed line separates absent variants (combined allele count of 0) from those observed at least once (combined allele count greater than 0).","e, Function scores for the 435 tested SNVs are plotted by CADD score. The dashed line at y\u2009=\u2009\u22120.302 indicates significantly depleted SNVs, whereas the red line at x\u2009=\u200919.25 and the blue line at x\u2009=\u200918.99 indicate median CADD scores for ReNU syndrome SNVs and SNVs present in population cohorts, respectively.\r\n\r\nWe observed a significant correlation between single-nucleotide variant (SNV) allele counts in population cohorts and function scores, with rarer SNVs tending to be more depleted by SGE (Spearman\u2019s \u03c1\u2009=\u20090.29, P\u2009=\u20092.8\u2009\u00d7\u200910\u221211; Fig. 2d). Among the 50 SNVs with the highest combined allele counts in the UK Biobank and All of Us cohorts, none were depleted in the assay.","Indeed, applying more stringent allele count thresholds to define control variants in population cohorts consistently improved the assay\u2019s classification performance (Extended Data Fig. 2). These findings indicate that depleted variants observed in population cohorts are unlikely to be the result of experimental noise and, instead, represent genuine variants affecting RNU4-2 function segregating in the general population.\r\n\r\nThe discriminatory power of our SGE assay was substantially greater than that of the genome-wide in silico tool CADD19 (Fig. 2c; AUC\u2009=\u20090.65). Given the high conservation of the entire RNU4-2 gene, most SNVs have very similar CADD scores (Fig. 2e).","Although CADD scores for ReNU syndrome SNVs are marginally higher on average than those for SNVs in population cohorts (ReNU median 19.2; UK Biobank and All of Us median 19.0; one-sided Wilcoxon P\u2009=\u20090.040), a CADD score threshold that would capture all ReNU syndrome SNVs (18.89 or greater) would also annotate 56.4% (195 out of 346) of SNVs observed in UK Biobank and All of Us, and 55.6% (183 out of 329) of SNVs with normal SGE function scores, as probably deleterious. By contrast, our SGE function score threshold of \u22120.302 captures all ReNU syndrome SNVs and only 19.1% (66 out of 346) of SNVs observed in population cohorts.","We also observe only a weak correlation of SGE function scores with changes to U4\/U6 RNA binding stability predicted by ViennaRNA (\u03c1\u2009=\u2009\u22120.27, P\u2009=\u20094.5\u2009\u00d7\u200910\u221210; Extended Data Fig. 3a). The observed effect is limited to specific regions, most notably Stem II (\u03c1\u2009=\u2009\u22120.79, P\u2009=\u20095.0\u2009\u00d7\u200910\u221210). By contrast, no significant correlation is observed in the T-loop or Stem III and, overall, \u0394\u0394G values from ViennaRNA do not classify ReNU syndrome variants as well as SGE (ROC-AUC 0.72 versus 0.93, respectively; Extended Data Fig. 3b).\r\n\r\nThe assay clearly delineates the 18-nt CR of RNU4-2 (Fig. 2a) within which variants cause ReNU syndrome; however, some variants in this region score normally.","Using these data, we redefine the CR to two smaller regions of 9\u2009nt (n.62-70, inclusive of insertions at n.61_62) and 4\u2009nt (n.75-78), corresponding to the T-loop and Stem III, respectively (Extended Data Fig. 4). Although the T-loop region matches that reported by ref.","2, the CR overlapping Stem III is 3-nt smaller than previously suggested. Within these two regions, 85.4% (76 out of 89) of tested variants (79.5% of SNVs), including all ReNU syndrome variants, have significant function scores, compared with 17.4% (75 out of 432) across the remainder of RNU4-2.\r\n\r\nWe next used our function scores to assign evidence strengths for clinical variant classification8. We deemed the 17 pathogenic or likely pathogenic variants reported in ref. 4 and assayed here to be associated with ReNU syndrome and 45 variants with combined allele counts across the UK Biobank and All of Us above 100 to be neutral.","A Gaussian mixture model was then applied to determine the odds of pathogenicity (OddsPath) for each variant (Methods, Extended Data Fig. 5 and Supplementary Table 1). Within the CR, 69 of 127 (54.3%) variants receive PS3 strong evidence of pathogenicity, including 16 of 18 variants reported to be pathogenic, with the other two variants receiving PS3 moderate or indeterminate evidence. A further 38 (29.9%) variants receive BS3 strong evidence of benignity.","As no variants outside the CR have been associated with ReNU syndrome, we refrain from assigning evidence strengths to variants outside the CR.\r\n\r\nRecent work by one research group4 classified three variants outside the CR and one deletion within the CR as variants of uncertain significance. Three of these variants were included in our assay (n.76del, n.92C>G and n.111C>T) and all three had normal function scores (0.12, 0.04 and 0.05, respectively).","Notably, all three variants are also observed in population controls. Furthermore, a recent paper proposed a link between two 5\u2032 stem loop variants, each identified in a single individual and inherited from an unaffected mother, and ReNU syndrome5. One of these variants is included in our assay (n.30A>T), and its score of \u22120.305 just crosses the threshold to be classified as depleted; however, other depleted variants in the same region are observed in population controls.","Finally, of two variants recently associated with retinitis pigmentosa20, the one that is included in our assay (n.56T>C) has a normal function score (\u22120.23).\r\nSGE depletion predicts disease severity\r\nA previous study proposed a difference in phenotypic severity between ReNU syndrome variants mapping to the T-loop and Stem III structures of the U4\/U6 duplex4. This difference is seen in our data, with Stem III variants having on average, higher function scores (T-loop mean \u22121.13; Stem III mean \u22120.75; one-sided Wilcoxon P\u2009=\u20090.012).","However, we also observe considerable variation in function scores for ReNU variants within each of the two regions. For example, two SNVs within the T-loop, n.63T>C and n.65A>G, have function scores above the mean observed for Stem III variants (\u22120.51 and \u22120.32, respectively). To investigate this, we repeated the phenotype clustering analysis of 143 individuals with ReNU syndrome from ref. 4. We classified the variants into two categories corresponding to \u2018moderate\u2019 (\u22120.9\u2009<\u2009function score\u2009<\u2009\u22120.302) and \u2018strong\u2019 (function score less than \u22120.9) levels of depletion in the assay (Fig. 3a and Extended Data Fig. 4).","All of the individuals with moderate category variants cluster together, including the four individuals with the n.63T>C (n\u2009=\u20091) and n.65A>G (n\u2009=\u20093) T-loop variants (Fig. 3b). These results remained consistent when excluding n.64_65insT from the analysis (that is, the result is not driven by the recurrent insertion variant alone) and when using a uniform manifold approximation and projection (UMAP) representation (Extended Data Fig. 6).\r\nFig.","3: Function scores predict ReNU syndrome severity and degree of splicing disruption.\r\na, Schematic showing how ReNU variants are split into two categories based on their SGE function score: strong depletion (function score less than \u22120.9; red) and moderate depletion (\u22120.9\u2009<\u2009function score\u2009<\u2009\u22120.302; yellow).","b, The first two principal components from clustering of 143 ReNU syndrome cases by phenotype using the approach from ref. 4. Individuals are coloured by their variant SGE function score class. Unlabelled triangles indicate occurrences of n.64_65insT.","c, The proportion of affected individuals with each phenotype is plotted, with cases grouped by SGE function score class. The number of individuals (n) in each comparison group is shown for each phenotype. Error bars indicate 95% confidence intervals centred on each proportion (capped at 0 and 1.0).","Full data, including statistics for comparisons between groups, are included in Extended Data Table 1.","d, Principal component analysis based on PSI values from significant 5\u2032 splice site events detected from RNA sequencing data using rMATS, comparing 19 patients with ReNU with 20 control participants (purple), as performed in ref. 4. Individuals with ReNU are coloured by their variant SGE function score class.","GDD, global developmental delay; ID, intellectual disability.\r\n\r\nTo further determine whether SGE function scores were able to discriminate between more severe and milder ReNU syndrome variants, we compared four specific phenotypes.","Individuals with variants in the strong depletion group were significantly more likely to have severe developmental delay (73.3% versus 5.9%; odds ratio\u2009=\u200942.7; 95% confidence interval (CI) 6.1\u20131,841.8; two-sided Fisher\u2019s P\u2009=\u20091.1\u2009\u00d7\u200910\u22127), severe intellectual disability (76.6% versus 5.9%; odds ratio\u2009=\u200950.4; 95%CI 7.1\u20132,197.0; two-sided Fisher\u2019s P\u2009=\u20093.6\u2009\u00d7\u200910\u22128) and absent speech or to speak only a few words (92.8% versus 5.6%; odds ratio\u2009=\u2009195.5; 95%CI 24.7\u20138,591.7; two-sided Fisher\u2019s P\u2009=\u20096.6\u2009\u00d7\u200910\u221214) than individuals with moderate depletion variants. There was no difference in the occurrence of seizures between variant groups (Fig.","3c and Extended Data Table 1).\r\n\r\nTo test whether the strength of SGE depletion also correlates with the extent of splicing disruption observed in individuals with ReNU syndrome, we repeated a second analysis from ref. 4. We regenerated a principal component analysis of percentage spliced-in (PSI) values for 5\u2032 splice sites that differed significantly in usage between ReNU cases and control participants. Individuals with strong and moderate SGE function scores clustered separately, with the strong variant individuals being more distant from control participants (Fig. 3d).\r\nA recessive NDD linked to RNU4-2 variants\r\nSeventy-five variants outside the ReNU CR are depleted in the SGE assay (Supplementary Table 1).","Unlike the depleted variants in the ReNU CR, most of these other depleted variants (84.0%; 63 out of 75) are observed in population control cohorts, albeit at low frequencies (Fig. 2a). To investigate whether these variants are associated with NDD-related traits, we compared individuals heterozygous for such variants (n\u2009=\u2009592) and individuals with non-depleted SNVs (n\u2009=\u200912,374) in RNU4-2 with individuals without any variants in RNU4-2, using the UK Biobank.","We did not find any significant differences in fluid intelligence scores, childhood developmental disorder diagnoses or age of leaving education (Extended Data Table 2).\r\n\r\nBecause our SGE assay was performed in a haploid cell line, we reasoned that depleted variants outside the CR may instead be associated with recessive phenotypes. We searched global rare disease cohorts and identified 20 individuals, with biallelic depleted variants: 10 (including 3 pairs of siblings) with homozygous variants and 10 (including 4 pairs of siblings) who were each concordant for compound heterozygous depleted variants (Extended Data Table 3). None of these variants were located in the ReNU CR, yet all 20 individuals had NDD phenotypes.","None of the individuals had an existing genetic diagnosis that fully explained their observed phenotypes (Methods). Across the rare disease cohorts, no individuals with phenotypes unrelated to NDD had biallelic depleted variants. Only a single individual across the UK Biobank and All of Us cohorts is homozygous for a SGE-depleted variant (n.31T>G, function score \u22120.730).","This individual has only primary level education (highest grade, one to four) and reports difficulties with \u2018dressing or bathing\u2019, \u2018doing errands alone\u2019 and \u2018concentrating, remembering or making decisions\u2019, consistent with a possible intellectual disability.\r\n\r\nThe clinical phenotypes of the 20 identified NDD individuals are characterized as part of a broader cohort (total n\u2009=\u200938) in a companion paper21. The 18 extra individuals reported in this broader cohort all have biallelic RNU4-2 variants, but at least 1 variant had a non-significant function score or was not scored with SGE.","In brief, we define a new NDD characterized by global developmental delay, intellectual disability, delayed or absent speech, hypotonia, spasticity, microcephaly, ophthalmological and visual impairments and seizures, with variable involvement of genitalia, skin, hair and limb anomalies.","On MRI, individuals show distinctive white matter abnormalities and cerebellar atrophy that are not seen in ReNU syndrome21.\r\n\r\nDepleted variants outside the ReNU CR broadly map to four regions of U4\/U6 secondary structure that are known to mediate interactions between U4 and other components of the spliceosome: (1) the central portion of the Stem II interaction with U6 from nucleotides 6 to 11 (ref.","3); (2) a \u2018k-turn\u2019 structure required for protein binding22,23 comprising nucleotides 27 to 33 and nucleotides 42 to 46; (3) a region from nucleotides 118 to 126 that interacts with a ring of Sm proteins that are important for U4 biogenesis and stability24,25 and (4) a portion of the terminal stem loop formed by base-pairing of nucleotides 129 to 131 with nucleotides 140 to 142 (Fig. 4). All variants identified in the 20 recessive NDD cases map to these four regions. Variants in structurally equivalent regions of RNU4ATAC, which encodes the minor spliceosome equivalent of U4, U4atac, cause rare recessive RNU4atac-opathies26,27,28.","Of the 13 unique RNU4-2 variants identified in the recessive NDD cases, 5 have exact equivalents in RNU4ATAC that are (likely) pathogenic in ClinVar (n.32G>A, n.45G>C, n.46G>A, n.119A>G and n.122T>G; Supplementary Table 2). They include n.119A>G (function score \u22120.686; RNU4ATAC equivalent n.117A>G; ClinVar variation ID 1525441), which was homozygous in two individuals and compound heterozygous in three individuals, including two brothers.\r\nFig. 4: SGE-depleted variants outside the CR cause a recessive NDD.\r\nThe lowest SGE function score class among SNVs at each position is indicated on the U4\/U6 secondary structure. Outside the CR, low SGE scores occur at positions of spliceosomal protein binding, indicated by teal shaded regions.","Grey triangles correspond to homologous positions of RNU4ATAC at which (likely) pathogenic variants have been linked to recessive disease (from ClinVar; Supplementary Table 2). RNU4-2 variants with low function scores observed in recessive NDD cases are indicated, with filled purple circles indicating variants observed as homozygous and half-filled circles indicating variants observed in the compound heterozygous state. An orange dot in the centre of a circle indicates that the variant is observed in two affected siblings. Six (likely) pathogenic RNU4ATAC variants could not be confidently assigned to an equivalent nucleotide in RNU4-2. Three of these (n.8C>A, n.13C>T and n.16G>A) are shown together as mapping to Stem II.","The other three (n.29T>G, n.30G>A and n.111G>A) are not shown.\r\n\r\nIn an attempt to distinguish recessive and dominant variants experimentally, we performed SGE of RNU4-2 once more, this time using a diploid population of HAP1 cells selected through fluorescence-activated cell sorting (Methods). This experiment revealed function scores to be attenuated across the gene due to the presence of the second allele (Extended Data Fig.","7a,b and Supplementary Table 1).","However, all variants assayed in the Stem III region scored neutrally in diploid HAP1, suggesting pathogenic Stem III variants probably affect cell fitness in a manner that is distinct from pathogenic variants elsewhere. For all other regions, function scores between haploid and diploid models were highly correlated (Extended Data Fig.","7c), indicating fitness effects in diploid HAP1 cells do not delineate dominant and recessive variants in vivo.\r\nDiscussion\r\nRNU4-2 was the first noncoding RNA to be identified as having a substantial contribution to the prevalence of NDD, with ReNU syndrome predicted to affect around 100,000 individuals worldwide1,2. Here we developed an SGE assay to systematically assess the function of variants across RNU4-2 and map genotype\u2013phenotype relationships. We show that function scores accurately identify variants underlying ReNU syndrome and can distinguish these variants by disease severity. Furthermore, we define the CR at the centre of RNU4-2 within which variants cause dominant ReNU syndrome, at nucleotide resolution.","In two regions, of 9\u2009nt and 4\u2009nt, 85.4% of all tested variants are depleted.","However, some variants in these regions, particularly in Stem III, have normal function scores and are therefore unlikely to be pathogenic. As a consequence, these data have immediate use in clinical interpretation of newly observed variants in individuals with NDD.","Indeed, calibration of the SGE function scores for use within the ACMG\/AMP framework in the context of ReNU syndrome showed that these data can be used to give strong evidence towards either a pathogenic or benign classification.\r\n\r\nWe identified four regions of the U4\/U6 duplex structure, outside the ReNU CR where variants are also depleted. This led us to uncover a new recessive NDD caused by homozygous and compound heterozygous variants in these regions that were depleted in SGE. This NDD is described comprehensively in ref.","21, in which we also expand the cohort to include 38 individuals with biallelic RNU4-2 variants: the 20 individuals presented here with significant function scores for both variants, and 18 extra individuals harbouring variants in the same functional regions with at least one variant that was not significantly depleted or not assayed by SGE. Through comprehensive clinical phenotyping and analysis of RNA sequencing data, we show that the recessive NDD is phenotypically and mechanistically distinct from ReNU syndrome.","For example, MRI findings in individuals with ReNU syndrome most commonly include enlarged ventricles and corpus callosum abnormalities4, whereas individuals with biallelic RNU4-2 variants commonly have progressive white matter changes and cerebellar atrophy. Although we cannot yet determine the prevalence of the recessive NDD, SGE-depleted variants outside the ReNU CR are found in 0.12% and 0.094% of individuals in the UK Biobank and All of Us cohorts, respectively.","Hence, the recessive NDD is rarer than ReNU syndrome, but the prevalence is likely increased in populations with higher rates of consanguinity21.\r\n\r\nDistinct mechanisms underlie dominant and recessive RNU4-2-associated NDDs. We previously showed that individuals with ReNU syndrome have an increase in use of alternative non-canonical 5\u2032 splice sites1, consistent with the role of the T-loop and Stem III regions in accurately positioning the U6 ACAGAGA sequence to receive the 5\u2032 splice site. Recessive RNU4-2 variants map to different locations within U4, outside the T-loop and Stem III. They are found in key regions of binding between U4 and other important spliceosome factors.","The same regions have previously been shown to be important in U4 mutational analyses in yeast25 and variants in the 5\u2032 stem loop k-turn that we identify as depleted occur at nucleotides that are essential for SNU13\/15.5k protein binding in vitro23. In our companion paper21, we show through analysis of blood RNA sequencing data that individuals with biallelic RNU4-2 variants do not have the ReNU signature of disrupted 5\u2032 splice site selection. Furthermore, biallelic individuals have notably decreased RNU4-2 expression, which is not observed in individuals with ReNU syndrome, supporting a distinct loss-of-function molecular mechanism.","As variants in the equivalent regions and nucleotides of RNU4ATAC that cause recessive RNU4atac-opathies have been shown to lead to intron retention29,30, a similar mechanism may underlie recessive RNU4-2 NDD.","However, this was not readily evident in RNA sequencing analysis in blood21.\r\n\r\nRNU4-2 is a striking example of genetic pleiotropy, with variants in different regions of the RNA, which is only 145\u2009nt in length, causing both two distinct NDDs and retinitis pigmentosa. This adds complexity to variant interpretation and makes it particularly important to calibrate functional evidence with consideration of underlying mechanisms.","Although we showed that function scores for variants within the ReNU CR can provide strong evidence for clinical interpretation, we were unable to calibrate our assay for variants outside the ReNU CR due to a lack of independently defined pathogenic variants in these regions8, as all individuals with recessive NDD were identified on the basis of function score. Whereas we anticipate that our SGE data will prove highly useful for delineating variant pathogenicity for recessive disease, until orthogonal calibration can be performed, we recommend PS3 supporting evidence be assigned to significantly depleted variants outside the CR.","It is important to note that we set a relatively conservative threshold to define significantly depleted variants (q\u2009<\u20090.01) using synthetic controls in the absence of bona fide benign variants. Although all variants associated with ReNU syndrome scored below this threshold, we cannot exclude the possibility that variants with more subtle effects may be clinically relevant, particularly in relation to recessive disease. We cannot fully exclude the possibility that variants that score just below the \u22120.302 function score threshold are benign and represent false positives.","The calibration of function scores to evidence strength for ReNU variant classification reflects this, as variants were not assigned PS3 strong evidence in favour of pathogenicity unless their function scores were below \u22120.45.\r\n\r\nThus far, there are no strong data linking variants outside the CR to dominantly inherited NDD. This is supported by our analysis of heterozygous SGE-depleted variants outside the CR in the UK Biobank, in which we do not find any associations with intellectual disability related phenotypes. Accordingly, SGE data should not be used as evidence for the pathogenicity of variants for dominantly inherited ReNU syndrome beyond the CR.","We note that the 5\u2032 stem loop variants n.30A>T (function score \u22120.305) and n.43_44insT have been putatively associated with NDD5, with a link initially proposed with dominant ReNU syndrome.","However, these variants are within the \u2018k-turn\u2019 region linked to recessive disease in this study, and both are inherited from unaffected parents. Furthermore, n.43_44insT is identified in an individual with NDD in our companion paper, as compound heterozygous with a variant in Stem II21. Collectively, these data indicate that 5\u2032 stem loop variants are more likely to lead to recessive NDD than dominant ReNU syndrome.\r\n\r\nOur HAP1-based SGE assay has several limitations. Most notably, the growth-based readout does not inform directly on underlying mechanisms of splice alteration (for example, altered 5\u2032 splice site usage, intron retention).","This means that in the haploid context, both dominant and recessive effects are observed, which cannot be separated by function score alone. We also performed SGE in diploid HAP1 cells. Whereas function scores from these experiments revealed differences between T-loop and Stem III variants, they were once more unable to distinguish dominant and recessive variants in vivo. It is likely that specific changes in splicing underlying certain clinical phenotypes may not occur in HAP1 due to differences between cell types. It is notable, for instance, that a variant recently associated with retinitis pigmentosa (n.56T>C) did not score significantly.","Furthermore, most individuals with ReNU syndrome (70\u201375%) have the same single base insertion, n.64_65insT. Our data indicate that this variant is not unique in its functional severity, with many variants scoring similarly or having even lower function scores. This result could argue against high recurrence being the result of a particularly damaging functional effect driving ascertainment, suggesting that positive selection in the female germline or an increased local mutation rate might be more likely explanations.","However, we cannot rule out the possibility that this variant leads to unique changes in splicing not reflected in SGE function scores.\r\n\r\nFuture experiments using more cell types will be valuable for delineating mechanisms of RNU4-2 pleiotropy.","Likewise, testing larger insertions and deletions both inside and outside the ReNU CR will add insights into the degree of tolerated disruption across different regions of RNU4-2. For example, in ref.","4, the authors identified a 2-nt deletion (n.72_73del) in 2 individuals. This variant falls between Stem III and the T-loop but suggests that larger insertions and deletions in this region may also be disruptive to these structures. As we have observed for CR variants associated with ReNU syndrome, the degree of functional impact caused by recessive NDD variants may correlate with disease severity. There may also be phenotypic differences between individuals with variants mapping to the four distinct regions we identified.","Thorough phenotyping of large cohorts of cases will be necessary to establish how the degree of functional effect influences phenotype.\r\n\r\nIn summary, this work illustrates the power of a variant effect map for a locus recently implicated in disease to discover new genotype\u2013phenotype associations and understand mechanisms underlying disease. SGE data for RNU4-2 will be critical for accurately diagnosing patients with at present unexplained NDD and provide insights that are valuable for efforts to design effective therapies.","Finally, the SGE strategy we used to overcome the high sequence homology of RNU4-2 can be replicated to dissect other snRNAs recently linked to disease31,32.\r\nMethodsSingle guide RNA design and cloning\r\nThe gRNA used for SGE was designed using Benchling\u2019s CRISPR design tool to search the RNU4-2 locus, including upstream and downstream regions of low sequence homology to RNU4-1 and pseudogenes, identifying a candidate with high on-target and low off-target scores. The selected gRNA was not predicted to target RNU4-1, owing to eight mismatches occurring in the protospacer and PAM. The gRNA spacer sequence was ligated into the pX459 backbone as previously described33.","In brief, complementary primers containing the spacer were ordered from IDT (Supplementary Table 3), phosphorylated, hybridized and ligated into the pX459 linearized backbone followed by PlasmidSafe DNase (Lucigen) digestion.","Next, 2\u2009\u00b5l of the ligation reaction were transformed in NEB Stable Competent Escherichia coli cells using the high-efficiency transformation protocol and 75\u2009\u00b5l of transformant was plated on ampicillin-resistant plates and cultured overnight at 30\u2009\u00b0C. Three colonies were then picked and grown overnight at 37\u2009\u00b0C in 7\u2009ml of Luria\u2013Bertani medium supplemented with carbenicillin (100\u2009\u00b5g\u2009ml\u22121). Plasmid DNA was extracted using the QIAprep Spin Miniprep kit (Qiagen) and verified using Plasmidsaurus whole-plasmid sequencing. The selected clone was then grown in 100\u2009ml of Luria\u2013Bertani medium at 37\u2009\u00b0C in a shaking incubator supplemented with carbenicillin.","The cells were then pelleted and the plasmid was extracted using a ZymoPure Maxiprep kit (Zymo Research), endotoxins were removed using EndoZero columns (Zymo Research) and the product was quantified with the Qubit double-stranded DNA (dsDNA) BR assay kit (Invitrogen).\r\nHDR library cloning\r\nAn oligonucleotide library comprising RNU4-2 variants was manufactured by Twist Bioscience and subsequently cloned into a vector containing homology arms for RNU4-2 to make the HDR library for SGE.\r\n\r\nTo generate the vector with homology arms, a nested PCR was performed on genomic DNA (gDNA) extracted from HAP1 cells10 using primers designed to generate homology arms of 700\u2013800\u2009base pairs (bp) flanking RNU4-2 (Supplementary Table 3).","The PCR was performed using the Kapa HiFi HotStart ReadyMix (Roche). The product was purified using AmpureXP (Beckman Coulter) magnetic beads at 1.2\u00d7 volume and eluted in 12\u2009\u00b5l of nuclease-free water. The amplicon containing RNU4-2 homology arms was then inserted in the linearized pUC19 backbone using In-Fusion HD cloning (Takara) and 2\u2009\u00b5l of cloning reaction was transformed into NEB Stable cells following the manufacturer\u2019s 5-min transformation protocol. Cells were plated on agar plates containing ampicillin and incubated at 30\u2009\u00b0C overnight. The pUC19 plasmid containing RNU4-2 homology arms (pUC19-RNU4-2-HA) was purified and sequence-verified from a successfully transformed clone.","pUC19-RNU4-2-HA was then diluted to 8.7\u2009pg in a 50-\u00b5l PCR reaction and amplified with Kapa HiFi to obtain a linearized product with 17\u201318\u2009bp complementarity to the RNU4-2 oligo library. A PAM-blocking mutation was introduced 27\u2009nt upstream of the RNU4-2 sequence (chromosome 12:120291930-C-G) by means of primer overhang extension during PCR. The location of the PAM-disrupting edit was selected to minimize recutting by Cas9, converting a 5\u2032-GGG PAM sequence to 5\u2032-GCG. The PAM-disrupting edit had a CADD score of 4.20 (Phred) and a 100 vertebrates PhyloP score of 0.11. The reaction was treated with 1\u2009\u00b5l of DpnI (NEB) for 30\u2009min at 37\u2009\u00b0C, gel extracted and quantified.","Then, the RNU4-2 oligo library was amplified using Kapa HiFi and purified using AmpureXP (1.2\u00d7). The amplified library and linearized pUC19-RNU4-2-HA plasmid were then assembled using the In-Fusion HD cloning kit, and the product was transformed into NEB Stable cells using the high-efficiency transformation protocol. To quantify efficiency, 1% of cells in the transformation reaction were plated and the remainder were cultured in 100\u2009ml of Luria\u2013Bertani medium with carbenicillin overnight at 37\u2009\u00b0C. Cells were then pelleted by centrifugation and the final RNU4-2 HDR library was extracted using the ZymoPure Maxiprep kit (Zymo Research) with endotoxin removal.","The isolated HDR library was quantified with a Qubit dsDNA BR assay kit and sequence-verified by Plasmidsaurus.\r\nHAP1 cell culture\r\nHAP1 cells used for SGE (the\u00a0HAP1-LIG4-KO line;\u00a0herein referred to as \u2018HAP1\u2019)\u00a0show increased rates of editing by HDR due to a frameshifting mutation in LIG4 (ref. 10). Frozen HAP1 cells were thawed at 37\u2009\u00b0C in a water bath, then supplemented with 10\u2009ml of prewarmed Iscove\u2019s Modified Dulbecco\u2019s Medium (IMDM) containing l-glutamine, 25\u2009nM HEPES (Gibco), 10% FBS (Gibco), 1% penicillin\u2013streptomycin (Gibco) and 2.5\u2009\u03bcM 10-deacetyl-baccatin-III (DAB, Stratech), herein referenced to as IMDMc. Cells were centrifuged at 300g for 3\u2009min.","The supernatant was then aspirated and the cells were resuspended in fresh media, plated on a 10-cm dish and cultured at 37\u2009\u00b0C with 5% CO2. The next day, the IMDMc media was replaced, and cells were cultured routinely from that point forward.\r\n\r\nThe HAP1 subculture routine included a 1:5 split every 48\u2009h or 1:10 split every 72\u2009h to prevent cells from exceeding 80% confluency. To split cells, the media was aspirated and the dish washed with 10\u2009ml of room-temperature Dulbecco\u2019s PBS (Gibco). Following Dulbecco\u2019s PBS aspiration, the cells were treated with 1\u2009ml of 0.25% trypsin\u2013EDTA (Gibco) and incubated for 3\u2009min at 37\u2009\u00b0C.","Next 14\u2009ml of prewarmed IMDMc was then added and cells were collected and centrifuged at 300g for 5\u2009min. Cells were then resuspended in 10\u2009ml of IMDMc, counted and seeded on a 10-cm dish.\r\nGeneration of diploid HAP1 cells\r\nParental HAP1 cells were cultured for 9\u2009days after thawing in IMDMc without DAB supplementation to allow for the spontaneous occurrence of diploid cells.","On day 10, cells were stained with 5\u2009\u00b5g\u2009ml\u22121 Hoechst working solution (Thermo Fisher Scientific) for 1\u2009h at 37\u2009\u00b0C, followed by fluorescence-activated cell sorting to select diploid cells using a BD FACSAria Fusion Flow Cytometer. Diploid cells were sorted on the basis of their G2\/M peak (4n), with gates established using a monoclonal diploid HAP1 control population. Sorted diploid HAP1 cells were then expanded for 10\u2009days in IMDMc without DAB supplementation before the subsequent SGE experiment.\r\nTransfection and selection\r\nThe day before transfection, 12\u2009million cells were seeded on a 10-cm dish for each replicate and 2\u2009million cells were seeded on a six-well plate for the negative control sample.","On the day of transfection (day 0), a transfection mix containing 10\u2009\u00b5g of HDR library, 30\u2009\u00b5g of the pX459 gRNA plasmid and 24\u2009\u00b5l of Xfect polymer (Takara) in a final volume of 800\u2009\u00b5l was prepared according to the manufacturer\u2019s instructions for each replicate. For the negative control sample, a pX459 plasmid with a gRNA targeting HPRT1 (ref. 13) instead of RNU4-2 was used to prevent successful editing, and the transfection volume mix was scaled down eightfold. Following transfection, cells were incubated for 24\u2009h at 37\u2009\u00b0C and supplemented with prewarmed IMDMc with 1\u2009\u00b5g\u2009ml\u22121 puromycin (Cayman Chemical).","On day 4, half of the cells for each replicate were collected for gDNA extraction and stored as a pellet at \u221270\u2009\u00b0C; the rest were kept in culture in 15-cm dishes supplemented with 15\u2009ml of IMDMc. The negative control sample was collected when reaching 70% confluency at day 6. A second sample of 10\u2009million cells per replicate was collected at day 14 and stored at \u221270\u2009\u00b0C.\r\nSequencing library preparation\r\ngDNA was extracted from cells using QIAshredder (Qiagen) columns followed by the Allprep DNA\/RNA kit (Qiagen) according to the manufacturer\u2019s instructions. Concentrations were determined using the Qubit dsDNA BR assay kit.","The RNU4-2 locus was subsequently amplified using nested PCR to avoid amplification of plasmid DNA, followed by an indexing PCR, in total using three primer sets (Supplementary Table 3). For the first reaction, the total gDNA template from each condition was partitioned into separate reactions, each containing 1.25\u2009\u00b5g of DNA in a 100\u2009\u00b5l reaction volume, using NEBNext Ultra II Q5 master mix (NEB) supplemented with MgCl2 (Ambion) to a final concentration 4\u2009mM. The amplification reaction was monitored by quantitative PCR (qPCR) using SYBR green (Invitrogen) and stopped before completion.","The reactions for each sample were pooled and mixed before 50\u2009\u00b5l of each product was purified using AmpureXP (1.2\u00d7) and eluted in 15\u2009\u00b5l of nuclease-free water. Then 1\u2009\u00b5l of purified product was loaded into the second qPCR reaction (50\u2009\u00b5l final volume) and amplified using NEBNext Ultra II Q5. The reaction was again monitored using SYBR green and stopped before completion. The AmpureXP purification was then repeated, and a final qPCR (NEBNext Ultra II Q5) to incorporate sample indexes and sequencing adapters was performed using 1\u2009\u00b5l of purified product as template in a 50\u2009\u00b5l reaction for 8 cycles. Final products were purified and quantified with the Qubit dsDNA HS kit.","The samples were then pooled for sequencing, aiming for 5\u2009million reads per experimental replicate timepoint, 2\u2009million reads for the negative control sample and 1\u2009million reads for the HDR library. The pool was purified using AmpureXP (1\u00d7), quantified and loaded on a Novaseq X sequencer (Illumina).\r\nVariant frequency quantification\r\nThe fastq files were de-multiplexed using the bcl2fastq script and the variants were quantified as previously described13.","In brief, paired-end reads were adapter trimmed and merged, and reads containing N bases were discarded. HDR editing rates were computed from fastq files directly as the fraction of reads containing the exact PAM-blocking mutation. Fastq files were then aligned to a reference RNU4-2 sequence and the frequency of each variant included in the library was determined.\r\nFunction score calculation\r\nAll variants were observed in the library and day 4 at a frequency higher than 10\u22124, and were therefore included in downstream analyses.","Function scores for library variants were first calculated per replicate, computed as the log2 ratio of day 14 to day 4 variant frequencies, normalized by subtracting the median function score of negative control insertions from all scores. Final function scores were then calculated for each variant by averaging function scores across replicates, again normalizing to the median of negative control insertions such that the median final function score of control insertions equals 0. For each variant, P values were determined using the norm.cdf function in Python, defining a normal distribution from the mean and standard deviation of function scores for negative control insertions.","The P values were corrected for multiple hypothesis testing using the multipletests function in Python (Benjamini\u2013Hochberg procedure) to derive q values. Significantly depleted variants were defined as those with q\u2009<\u20090.01, corresponding to a function score below \u22120.302. We further classified depleted variants into two categories using an arbitrary function score threshold of \u22120.9 to include sufficient variants and individuals per category to assess for phenotypic differences.\r\nVariant scoring with CADD and ViennaRNA\r\nVariants were annotated as ReNU syndrome variants if they were reported in ref. 1 or classified as pathogenic or likely pathogenic in ref. 4.","Variants were annotated with whether or not they were observed in the 490,640 genome sequenced individuals from the UK Biobank18 (DRAGEN pipeline) or in 414,840 individuals from All of Us V8. CADD v.1.7 (ref. 19) annotations were obtained by uploading a synthetic VCF to the online annotation tool (https:\/\/cadd.gs.washington.edu\/score). As we preselected which insertions and deletions to include in the SGE assay (because of assay size limitations), we restricted analyses involving CADD to SNVs within the RNU4-2 transcript.\r\n\r\nFor variants assayed within the RNU4-2 transcript, predicted changes in U4\/U6 interaction stability (\u0394\u0394Gbind) were computed using the ViennaRNA package34 (v.2.7.0).","Minimum free energies (MFEs) were obtained by use of RNA.fold_compound() at 37\u2009\u00b0C using default Turner RNA thermodynamic parameters. U4\/U6 pairing was modelled with the ViennaRNA cofold grammar by providing sequences in the dimer format (u4(AGCUUUGCGCAGUGGCAGUAUCGUAGCCAAUGAGGUUUAUCCGAGGCGCGAUUAUUGCUAAUUGAAAACUUUUCCCAAUACCCCGCCAUGACGACUUGAAAUAUAGUCGGCAUUGGCAAUUUUUGACAGUCUCUACGGAGACUGA).\r\n\r\n+ \u2018&\u2019 + u6(GUGCUCGCUUCGGCAGCACAUAUACUAAAAUUGGAACGAUACAGAGAAGAUUAGCAUGGCCCCUGCGCAAGGAUGACACGCAAAUUCGUGAAGCGUUCCAUAUUUU), and the intermolecular MFE was extracted using mfe_dimer().","Single-strand MFEs for U4 and U6 were computed independently using mfe().\r\n\r\nBinding free energy was defined as:\r\n\r\n$$\\Delta {G}_{{\\rm{bind}}}=\\Delta {G}_{{\\rm{complex}}}-(\\Delta {G}_{{\\rm{U}}4}+\\Delta {G}_{{\\rm{U}}6})$$\r\n\r\nThe same procedure was applied to RNU4-2 variant sequences, and differential stability was then calculated as:\r\n\r\n$$\\Delta \\Delta {G}_{{\\rm{b}}{\\rm{i}}{\\rm{n}}{\\rm{d}}}=\\Delta {G}_{{\\rm{b}}{\\rm{i}}{\\rm{n}}{\\rm{d}}.{\\rm{v}}{\\rm{a}}{\\rm{r}}{\\rm{i}}{\\rm{a}}{\\rm{n}}{\\rm{t}}}-\\Delta {G}_{{\\rm{b}}{\\rm{i}}{\\rm{n}}{\\rm{d}}.{\\rm{r}}{\\rm{e}}{\\rm{f}}{\\rm{e}}{\\rm{r}}{\\rm{e}}{\\rm{n}}{\\rm{c}}{\\rm{e}}}$$\r\n\r\nPositive \u0394\u0394Gbind values indicate predicted destablization of U4\/U6 pairing.\r\n\r\nVariants were mapped to the following structural regions of RNU4-2: Stem II (n.3 to n.16), k-turn within the 5\u2032 Stem loop (n.27 to n.35 and n.41 to n.46), Stem I (n.56 to n.62), T-loop (n.63 to n.70), Stem III (n.75 to n.79), 3\u2032 Stem loop (n.85 to n.117), Sm protein (n.118 to n.126) and terminal Stem loop (n.127 to n.144).\r\n\r\nROC area under the curve (AUC) values were calculated by assigning a 1 label to ReNU syndrome SNVs and a 0 label for SNVs observed in UK Biobank or All of Us.","The labels and corresponding function scores were used to compute false positive and true positive rates (using Python\u2019s roc_curve function), as well as ROC-AUC values (using the roc_auc_score function). This analysis was also restricted to SNVs only.\r\nAssigning evidence codes to variants based on function score\r\nWe followed established guidelines8 to calibrate function scores from SGE experiments in haploid cells to evidence strengths for classification of ReNU syndrome variants.","To do so, we defined a gold standard set of pathogenic, dominantly inherited variants as the 17 previously reported4 as \u2018pathogenic\u2019 or \u2018likely pathogenic\u2019 for which we derived function scores. Few RNU4-2 variants have been deemed benign in ClinVar, so we instead used reported allele counts in the UK Biobank and All of Us studies to define a neutral set of variants. This included all 45 assayed variants with a combined allele count of more than 100 between the two studies. A two-component Gaussian mixture model was then fit from the function score distributions of these variant sets, using the \u2018Mclust\u2019 package in R.","This model was then used to determine the probability of pathogenicity for each variant in the CR based on function score. The resulting posterior probabilities were then converted to OddsPath values using a uniform prior of 0.5, and evidence codes were assigned according to established OddsPath thresholds8 with the exception that PS3 evidence was capped at strong (+4 points), in line with the limited number of gold standard variants available for calibration.","We did not apply the model to variants outside the CR on account of there being no known pathogenic variants for ReNU syndrome in these regions.\r\nPhenotype severity and clustering\r\nCategorical data for 44 clinical features from 143 patients with pathogenic and likely pathogenic RNU4-2 variants4 were transformed into a 0\u20131 scale, with 0 indicating a more favourable phenotype and 1 a more severe presentation. Principal component analysis was generated after imputing missing data with 0 and performing variable scaling. UMAP representation was created using the umap package in R.","Two-sided Fisher\u2019s tests with Bonferroni adjustment to account for four tests were used to compare clinical features between SGE function score variant categories (strong versus moderate) in Extended Data Table 1.\r\nRNA sequencing cluster analysis\r\nRNA sequencing from cultured lymphocytes was performed following the protocol described in ref. 4 for RNU4-2 and\u00a0rMATS-turbo (v.4.3.0)35 was run on 19 ReNU samples and 20 control participants (excluding one individual previously deemed a control in ref. 4 who was here found to be a recessive RNU4-2 case); 101 significant alternative non-canonical 5\u2032 splice sites (A5SS) events (false discovery rate less than 0.1, \u0394PSI\u2009>\u20090.05) were retained.","Then rMATS-turbo was rerun on the 19 ReNU samples, the 20 control participants, without statistical or \u0394PSI filtering. The A5SS output was filtered on the 101 retained events and the PSI values were extracted to perform the principal component analysis.\r\nAssociation testing in UK Biobank\r\nWe extracted phenotypes associated with educational attainment from UK Biobank following an approach published previously36. Fluid intelligence scores (field ID 20016) were retrieved for all participants. Where many scores were recorded, the median value was taken. Age left education was calculated as the maximum value in age completed full time education (field ID 845).","Diagnosis with childhood developmental disorder was defined using the ICD codes for intellectual disability (ICD-10: F70\u2013F73, F78, F79; ICD-9: 317, 318, 319), epilepsy (ICD-10: G40), global developmental disorders (ICD-10: F80\u2013F84, F88\u2013F95, R62, R48, Z55; ICD-9: 299, 312, 313, 314, 315) and congenital malformations (ICD-10: Q0\u2013Q99, ICD-9: 740\u2013759).\r\n\r\nWe identified UK Biobank participants with: (1) depleted variants in the 18-bp RNU4-2 CR (n\u2009=\u20096), (2) depleted variants outside the CR (n\u2009=\u200950) and (3) participants with non-depleted SNVs outside the CR (n\u2009=\u200912,132).","We performed multiple linear regression on fluid intelligence scores and age left education, and multiple logistic regression on childhood developmental disorder for variant groups (2) and (3) defined above, compared with all individuals without any variants in any of the three groups. Age at recruitment (field ID 21022), age2 (age at recruitment\u2009\u00d7\u2009age at recruitment), sex (field ID 31) and first ten genetic principal components (field ID 22009) were included as covariates.","P values were false discovery rate-corrected using the Benjamini\u2013Hochberg method.\r\nInvestigating RNU4ATAC variants in ClinVar\r\nVariants in RNU4ATAC with classifications of pathogenic, likely pathogenic, pathogenic or likely pathogenic, benign, likely benign or benign or likely benign were downloaded from the ClinVar37 website on 4 March 2025. Two regions of RNU4-2 and RNU4ATAC with identical structures were defined, mapping to the k-turn (RNU4-2 nucleotides 26\u201352; RNU4ATAC nucleotides 31\u201357) and the Sm protein binding site (RNU4-2 nucleotides 115\u2013126; RNU4ATAC nucleotides 113\u2013124).","Variants at the same nucleotide in the structure and where the reference bases in RNU4-2 and RNU4ATAC are identical, were marked as \u2018equivalent\u2019.\r\nIdentifying biallelic variants in cohorts\r\nWe searched rare disease cohorts for individuals with biallelic variants in RNU4-2. These cohorts included the Genomics England 100,000 Genomes Project and NHS Genomic Medicine Service datasets accessed through the UK National Genomic Research Library38, the SeqOIA and Auragen clinical cohorts in France (PFMG2025), the Undiagnosed Disease Network, the Broad Institute Center for Mendelian Genomics and GREGoR (Genomics Research to Elucidate the Genetics of Rare Diseases)39 Consortium cohorts.","We only included individuals with homozygous variants with function scores less than \u22120.302, or compound heterozygous variants in which both had function scores less than \u22120.302 (n\u2009=\u200920). All individuals had previous genome analysis including investigation of variants in known NDD genes and large structural variants. One individual (individual 17) had a reported likely pathogenic variant in GLI3; however, this variant did not explain all of their reported phenotypes (see ref. 21 for more details).\r\nEthics\r\nInformed consent was obtained for all participants included in this study from their parent(s) or legal guardian, with the study approved by the local regulatory authority.","The 100,000 Genomes Project Protocol has ethical approval from the Health Research Authority Committee East of England Cambridge South (Research Ethics Committee ref. 14\/EE\/1112). This study was approved by Genomics England under Research Registry Projects 354.","Health related research in UK Biobank was approved by the Research Ethics Committee under reference 20\/NW\/0274 with this research conducted under application number 81050.\r\n\r\nWe received an exception to the Data and Statistics Dissemination Policy from the All of Us Resource Access Board to report questionnaire response data for the single individual with a homozygous depleted variant as well as variant counts below 20 for all variants in RNU4-2.\r\nReporting summary\r\nFurther information on research design is available in the\u00a0Nature Portfolio Reporting Summary linked to this article.\r\n\nData availability\r\nSGE data including all RNU4-2 function scores are available in Supplementary Table 1.","Fastq files from SGE experiments are available through the European Nucleotide Archive at accession PRJEB87505. RNA sequencing data (Fig. 3d) were taken from ref. 4 and are available in the European Genome\u2013Phenome Archive at http:\/\/www.ebi.ac.uk\/ega; study accession EGAS50000000889. UK Biobank and All of Us V8 data are available to researchers on approval of application (https:\/\/www.ukbiobank.ac.uk\/use-our-data\/apply-for-access\/; https:\/\/www.researchallofus.org\/).\r\n\nCode availability\r\nCustom scripts used to analyse SGE experiments and generate figures are available at GitHub (https:\/\/github.com\/FrancisCrickInstitute\/RNU4-2_Saturation_Genome_Editing).\r\n\nReferences\r\nChen, Y. et al.","De novo variants in the RNU4-2 snRNA cause a frequent neurodevelopmental syndrome. Nature 632, 832\u2013840 (2024).\r\n\r\nGreene, D. et al. Mutations in the U4 snRNA gene RNU4-2 cause one of the most prevalent monogenic neurodevelopmental disorders. Nat. Med.","30, 2165\u20132169 (2024).\r\n\r\nNguyen, T. H. D. et al. The architecture of the spliceosomal U4\/U6.U5 tri-snRNP. Nature 523, 47\u201352 (2015).\r\n\r\nArticle\nADS\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nNava, C. et al. Dominant variants in major spliceosome U4 and U5 small nuclear RNA genes cause neurodevelopmental disorders through splicing disruption. Nat. Genet.","57, 1374\u20131388 (2025).\r\n\r\nArticle\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nBruselles, A. et al. Expanding the mutational spectrum of ReNU syndrome: insights into 5\u2032 stem-loop variants. Eur. J. Hum. Genet.","33, 432\u2013440 (2025).\r\n\r\nSeplyarskiy, V. et al. A mutation rate model at the basepair resolution identifies the mutagenic effect of polymerase III transcription. Nat. Genet.","55, 2235\u20132242 (2023).\r\n\r\nArticle\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nRentzsch, P.,","Witten, D.,","Cooper, G.","M.,","Shendure, J.","& Kircher, M. CADD: predicting the deleteriousness of variants throughout the human genome. Nucleic Acids Res.","47, D886\u2013D894 (2019).\r\n\r\nArticle\nCAS\nPubMed\nGoogle Scholar\r\n\r\nBrnich, S. E. et al. Recommendations for application of the functional evidence PS3\/BS3 criterion using the ACMG\/AMP sequence variant interpretation framework. Genome Med.","12, 3 (2019).\r\n\r\nArticle\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nFindlay, G.","M.,","Boyle, E.","A.,","Hause, R.","J.,","Klein, J. C. & Shendure, J. Saturation editing of genomic regions by multiplex homology-directed repair. Nature 513, 120\u2013123 (2014).\r\n\r\nArticle\nADS\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nFindlay, G. M. et al. Accurate classification of BRCA1 variants with saturation genome editing. Nature 562, 217\u2013222 (2018).\r\n\r\nArticle\nADS\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nMeitlis, I. et al. Multiplexed functional assessment of genetic variants in CARD11. Am. J. Hum. Genet.","107, 1029\u20131043 (2020).\r\n\r\nArticle\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nRadford, E. J. et al. Saturation genome editing of DDX3X clarifies pathogenicity of germline and somatic variation. Nat. Commun.","14, 7702 (2023).\r\n\r\nArticle\nADS\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nBuckley, M. et al. Saturation genome editing maps the functional spectrum of pathogenic VHL alleles. Nat. Genet.","56, 1446\u20131455 (2024).\r\n\r\nArticle\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nWaters, A. J. et al. Saturation genome editing of BAP1 functionally classifies somatic and germline variants. Nat. Genet.","56, 1434\u20131445 (2024).\r\n\r\nArticle\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nHsu, P. D. et al. DNA targeting specificity of RNA-guided Cas9 nucleases. Nat. Biotechnol.","31, 827\u2013832 (2013).\r\n\r\nArticle\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nOlvera-Le\u00f3n, R. et al. High-resolution functional mapping of RAD51C by saturation genome editing.","Cell 187, 5719\u20135734.e19 (2024).\r\n\r\nArticle\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nHuang, H. et al. Functional evaluation and clinical classification of BRCA2 variants. Nature 638, 528\u2013537 (2025).\r\n\r\nArticle\nADS\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nSudlow, C. et al. UK Biobank: an open access resource for identifying the causes of a wide range of complex diseases of middle and old age. PLoS Med.","12, e1001779 (2015).\r\n\r\nArticle\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nSchubach, M.,","Maass, T., Nazaretyan, L.,","R\u00f6ner, S.","& Kircher, M. CADD v1.7: using protein language models, regulatory CNNs and other nucleotide-level scores to improve genome-wide variant predictions. Nucleic Acids Res.","52, D1143\u2013D1154 (2024).\r\n\r\nArticle\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nQuinodoz, M. et al. De novo and inherited dominant variants in U4 and U6 snRNA genes cause retinitis pigmentosa. Nat. Genet.","58, 169\u2013179 (2026).\r\n\r\nArticle\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nRius, R. et al. Biallelic variants in the noncoding RNA gene RNU4-2 cause a recessive neurodevelopmental syndrome with distinct white matter changes. Nat. Genet. https:\/\/doi.org\/10.1038\/s41588-026-02554-6 (2026).\r\n\r\nLiu, S. et al. Binding of the human Prp31 Nop domain to a composite RNA-protein platform in U4 snRNP. Science 316, 115\u2013120 (2007).\r\n\r\nArticle\nADS\nCAS\nPubMed\nGoogle Scholar\r\n\r\nNottrott, S. et al. Functional interaction of a novel 15.5kD [U4\/U6\u00b7U5] tri-snRNP protein with the 5\u2032 stem\u2013loop of U4 snRNA. EMBO J. https:\/\/doi.org\/10.1093\/emboj\/18.21.6119 (1999).\r\n\r\nPannone, B. K.","& Wolin, S. L. Sm-like proteins wRING the neck of mRNA. Curr. Biol.","10, R478\u201381 (2000).\r\n\r\nArticle\nCAS\nPubMed\nGoogle Scholar\r\n\r\nHu, J.,","Xu, D.,","Schappert, K.,","Xu, Y.","& Friesen, J. D. Mutational analysis of Saccharomyces cerevisiae U4 small nuclear RNA identifies functionally important domains. Mol. Cell. Biol.","15, 1274\u20131285 (1995).\r\n\r\nArticle\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nEdery, P. et al. Association of TALS developmental disorder with defect in minor splicing component U4atac snRNA. Science 332, 240\u2013243 (2011).\r\n\r\nArticle\nADS\nCAS\nPubMed\nGoogle Scholar\r\n\r\nFarach, L. S. et al. The expanding phenotype of RNU4ATAC pathogenic variants to Lowry Wood syndrome. Am. J. Med. Genet.","A 176, 465\u2013469 (2018).\r\n\r\nArticle\nCAS\nPubMed\nGoogle Scholar\r\n\r\nMerico, D. et al. Compound heterozygous mutations in the noncoding RNU4ATAC cause Roifman Syndrome by disrupting minor intron splicing. Nat. Commun.","6, 8718 (2015).\r\n\r\nArticle\nADS\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nOlthof, A. M. et al. Disruption of exon-bridging interactions between the minor and major spliceosomes results in alternative splicing around minor introns. Nucleic Acids Res.","49, 3524\u20133545 (2021).\r\n\r\nArticle\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nArriaga, T. M. et al. Transcriptome-wide outlier approach identifies individuals with minor spliceopathies. Am. J. Hum. Genet.","112, 2458\u20132475 (2025).\r\n\r\nArticle\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nJackson, A. et al. Analysis of R-loop forming regions identifies RNU2-2 and RNU5B-1 as neurodevelopmental disorder genes. Nat. Genet.","57, 1362\u20131366 (2025).\r\n\r\nArticle\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nGreene, D. et al. Mutations in the small nuclear RNA gene RNU2-2 cause a severe neurodevelopmental disorder with prominent epilepsy. Nat. Genet.","57, 1367\u20131373 (2025).\r\n\r\nArticle\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nRan, F. A. et al. Genome engineering using the CRISPR\u2013Cas9 system. Nat. Protoc.","8, 2281\u20132308 (2013).\r\n\r\nArticle\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nLorenz, R. et al. ViennaRNA package 2.0. Algorithms Mol. Biol.","6, 26 (2011).\r\n\r\nArticle\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nWang, Y. et al. rMATS-turbo: an efficient and flexible computational tool for alternative splicing analysis of large-scale RNA-seq data. Nat. Protoc.","19, 1083\u20131104 (2024).\r\n\r\nArticle\nCAS\nPubMed\nGoogle Scholar\r\n\r\nKingdom, R.,","Beaumont, R.","N.,","Wood, A.","R.,","Weedon, M. N.","& Wright, C. F. Genetic modifiers of rare variants in monogenic developmental disorder loci. Nat. Genet.","56, 861\u2013868 (2024).\r\n\r\nArticle\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nLandrum, M. J. et al. ClinVar: public archive of relationships among sequence variation and human phenotype. Nucleic Acids Res.","42, D980\u2013D985 (2014).\r\n\r\nArticle\nCAS\nPubMed\nGoogle Scholar\r\n\r\nGenomics England. National Genomic Research Library. Dataset. figshare https:\/\/doi.org\/10.6084\/m9.figshare.4530893.v8 (2025).\r\n\r\nDawood, M. et al. GREGoR: accelerating genomics for rare diseases. Nature 647, 331\u2013342 (2025).\r\n\r\nArticle\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nDownload references\r\nAcknowledgements\r\nWe thank the Crick\u2019s Genomics Scientific Technology Platform for performing sequencing and the Flow Cytometry and Cell Sciences Scientific Technology Platforms for assisting in maintaining cell lines. We also thank P. O\u2019Donovan, M. Sato and E. Miller from the Genomics England Airlock team. N.W.","is supported by a Sir Henry Dale Fellowship jointly funded by the Wellcome Trust and the Royal Society (grant 220134\/Z\/20\/Z), a Lister Institute research prize and grant funding from Novo Nordisk. Y.C. is supported by a studentship from Novo Nordisk. The Francis Crick Institute receives its core funding (G.M.F.) from Cancer Research UK (grant CC2190), the UK Medical Research Council (grant CC2190) and the Wellcome Trust (grant CC2190). G.M.F. is supported by a European Research Council Starting grant (Seq2Func-NC). A.J.M.B. is supported by a Wellcome PhD Training Fellowship for Clinicians and the 4Ward North PhD Programme for Health Professionals (grant 223521\/Z\/21\/Z). C.D.","is supported by research grants from the Deutsche Forschungsgemeinschaft (DFG) (project grants 455314768, 458099954 and 505514143). C.N. has received support from the Health philanthropic program of Mutuelles AXA dedicated to supporting innovative research projects in France (RNU-SPLICE project). Patients 4, 5, 6, 13, 14, 15 and 16 included in this study were diagnosed through Plan France M\u00e9decine G\u00e9nomique 2025 (PFMG2025). Patients 11 and 12 were sequenced at the Baylor College of Medicine Human Genome Sequencing Center through the GREGoR Consortium with support from US National Human Genome Research Institute grants U01HG011758 and U54HG003273.","Analysis of individuals 9 and 10 was supported by National Human Genome Research Institute grant R01HG009141. D.G.C. was supported by the Child Neurologist Career Development Program CNCDP-K12 (US National Institute of Neurological Disorders and Strokes grant NS098482). C.A.-T. is supported in part by the National Human Genome Research Institute grant U01HG011755 (GREGoR consortium). O.M. is supported by the Hazem Ben-Gacem Tunisia Medical Fellowship Fund. Research reported in this publication was supported by the National Institute Of Neurological Disorders And Stroke of the National Institutes of Health under grant awards U01HG010218 and U01HG010233.","The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health. This research was made possible through access to data in the National Genomic Research Library, which is managed by Genomics England Limited (a wholly owned company of the Department of Health and Social Care). The National Genomic Research Library holds data provided by patients and collected by the NHS as part of their care and data collected as part of their participation in research. The National Genomic Research Library is funded by the National Institute for Health Research and NHS England.","The Wellcome Trust, Cancer Research UK and the Medical Research Council have also funded research infrastructure. This study was registered with Genomics England under Research Registry Projects 354. This research has been conducted using the UK Biobank Resource under application number 81050. We gratefully acknowledge All of Us and UK Biobank participants for their contributions. We also thank the National Institutes of Health\u2019s All of Us Research Program for making available the participant and variant data examined in this study.","For the purpose of Open Access, the authors have applied a CC BY public copyright licence to any Author Accepted Manuscript version arising from this submission.\r\nAuthor information\r\nAuthor notes\r\n\r\nThese authors contributed equally: Nicola Whiffin, Gregory M. Findlay\r\nAuthors and Affiliations\r\nThe Genome Function Laboratory, The Francis Crick Institute, London, UK\r\n\r\nJoachim De Jonghe,\u00a0Ayanfeoluwa Adedeji,\u00a0Christina M. Kajba\u00a0&\u00a0Gregory M.","Findlay\r\n\r\nBig Data Institute, University of Oxford, Oxford, UK\r\n\r\nHyung Chul Kim,\u00a0Ruebena Dawes,\u00a0Yuyang Chen\u00a0&\u00a0Nicola Whiffin\r\n\r\nCentre for Human Genetics, University of Oxford, Oxford, UK\r\n\r\nHyung Chul Kim,\u00a0Ruebena Dawes,\u00a0Yuyang Chen\u00a0&\u00a0Nicola Whiffin\r\n\r\nDepartment of Biochemical Engineering, University College London, London, UK\r\n\r\nAyanfeoluwa Adedeji\r\n\r\nInstitute of Human Genetics, University Hospital Essen, University Duisburg-Essen, Essen, Germany\r\n\r\nElsa Leit\u00e3o\u00a0&\u00a0Christel Depienne\r\n\r\nNantes Universit\u00e9, CHU de Nantes, CNRS, INSERM, L\u2019Institut du Thorax, Nantes, France\r\n\r\nBenjamin Cogn\u00e9\r\n\r\nNantes Universit\u00e9, CHU de Nantes, CNRS, INSERM, G\u00e9n\u00e9tique m\u00e9dicale, Nantes, France\r\n\r\nBenjamin Cogn\u00e9\r\n\r\nManchester Centre for Genomic Medicine, Division of Evolution and Genomic Sciences, School of Biological Sciences, Faculty of Biology, Medicine and Health, University of Manchester, Manchester, UK\r\n\r\nAlexander J.","M. Blakes\r\n\r\nCentre for Population Genomics, Garvan Institute of Medical Research, Sydney, New South Wales, Australia\r\n\r\nCas Simons,\u00a0Rocio Rius\u00a0&\u00a0Daniel G. MacArthur\r\n\r\nCentre for Population Genomics, Murdoch Children\u2019s Research Institute, Melbourne, Victoria, Australia\r\n\r\nCas Simons,\u00a0Rocio Rius\u00a0&\u00a0Daniel G. MacArthur\r\n\r\nDepartment of Pediatric Neurology, University of Child Health Sciences, The Children\u2019s Hospital, Lahore, Pakistan\r\n\r\nJaveria R.","Alvi\u00a0&\u00a0Tipu Sultan\r\n\r\nService de G\u00e9n\u00e9tique, G\u00e9nomique et Procr\u00e9ation, CHU Grenoble Alpes, Grenoble, France\r\n\r\nFlorence Amblard,\u00a0Charles Coutton,\u00a0Radu Harbuz\u00a0&\u00a0Julien Thevenon\r\n\r\nGCS AURAGEN, Lyon, France\r\n\r\nFlorence Amblard,\u00a0Charles Coutton\u00a0&\u00a0Julien Thevenon\r\n\r\nUniversit\u00e9 Grenoble Alpes, INSERM U 1209, CNRS UMR 5309, Institut for Advanced Biosciences, Grenoble, France\r\n\r\nFlorence Amblard,\u00a0Charles Coutton\u00a0&\u00a0Julien Thevenon\r\n\r\nBroad Center for Mendelian Genomics, Program in Medical and Population Genetics, Broad Institute of MIT and Harvard, Cambridge, MA, USA\r\n\r\nChristina Austin-Tse,\u00a0Olfa Messaoud\u00a0&\u00a0Nicola Whiffin\r\n\r\nService de p\u00e9diatrie, H\u00f4pitaux Universitaires de Strasbourg, Strasbourg, France\r\n\r\nSarah Baer\r\n\r\nDepartment of Medicine, University of Washington School of Medicine, Seattle, WA, USA\r\n\r\nElsa V.","Balton\u00a0&\u00a0Nitsuh Dargie\r\n\r\nLaboratoire SeqOIA, Paris, France\r\n\r\nPierre Blanc,\u00a0Olivier Grunewald,\u00a0Paul Gueguen,\u00a0Pierre Marijon\u00a0&\u00a0Caroline Nava\r\n\r\nSection of Pediatric Neurology, Department of Pediatrics, Baylor College of Medicine, Houston, TX, USA\r\n\r\nDaniel G. Calame\r\n\r\nTexas Children\u2019s Hospital, Houston, TX, USA\r\n\r\nDaniel G. Calame\r\n\r\nVictorian Clinical Genetics Services, Murdoch Children\u2019s Research Institute, Melbourne, Victoria, Australia\r\n\r\nChloe A. Cunningham\u00a0&\u00a0Richard J. Leventer\r\n\r\nDepartment of Paediatrics, University of Melbourne, Melbourne, Victoria, Australia\r\n\r\nChloe A. Cunningham\u00a0&\u00a0Richard J. Leventer\r\n\r\nDepartment of Pediatrics, University of Washington, Seattle, WA, USA\r\n\r\nKatrina M.","Dipple\u00a0&\u00a0Ian Glass\r\n\r\nBrotman Baty Institute for Precision Medicine, Seattle, WA, USA\r\n\r\nKatrina M. Dipple\u00a0&\u00a0Ian Glass\r\n\r\nDepartment of Molecular and Human Genetics, Baylor College of Medicine, Houston, TX, USA\r\n\r\nHaowei Du\r\n\r\nService de G\u00e9n\u00e9tique M\u00e9dicale, Institut de G\u00e9n\u00e9tique M\u00e9dicale D\u2019Alsace, H\u00f4pitaux Universitaires de Strasbourg, Strasbourg, France\r\n\r\nSalima El Chehadeh\r\n\r\nLaboratoire de G\u00e9n\u00e9tique M\u00e9dicale, Institut de G\u00e9n\u00e9tique M\u00e9dicale d\u2019Alsace, INSERM UMRS_1112, CRBS, Universit\u00e9 de Strasbourg, Strasbourg, France\r\n\r\nSalima El Chehadeh\r\n\r\nRady Children\u2019s Institute for Genomic Medicine, San Diego, CA, USA\r\n\r\nJoseph G.","Gleeson\r\n\r\nDepartment of Neurosciences and Pediatrics, University of California, San Diego, San Diego, CA, USA\r\n\r\nJoseph G.","Gleeson\r\n\r\nU1172-LilNCog-Lille Neuroscience and Cognition, CHU de Lille, Lille, France\r\n\r\nOlivier Grunewald\r\n\r\nLaboratoire de Genopathies, CHU Lille, Lille, France\r\n\r\nOlivier Grunewald\r\n\r\nService de G\u00e9n\u00e9tique, CHRU de Tours, Tours, France\r\n\r\nPaul Gueguen\u00a0&\u00a0Marie-Line Jacquemont\r\n\r\nUniversit\u00e9 de Tours, Imaging Brain and Neuropsychiatry iBraiN, Tours, France\r\n\r\nPaul Gueguen\u00a0&\u00a0Marie-Line Jacquemont\r\n\r\nCentre de R\u00e9f\u00e9rence Maladies Rares \u2018Anomalies du D\u00e9veloppement et Syndromes Malformatifs\u2019, FHU Genomeds, CHRU de Tours, Tours, France\r\n\r\nMarie-Line Jacquemont\r\n\r\nRoyal Children\u2019s Hospital, Melbourne, Victoria, Australia\r\n\r\nRichard J.","Leventer\r\n\r\nHarvard Medical School, Boston, MA, USA\r\n\r\nOlfa Messaoud\r\n\r\nCentre de r\u00e9f\u00e9rence maladies rares, D\u00e9ficiences Intellectuelles de Causes Rares, Centre de G\u00e9n\u00e9tique, FHU-TRANSLAD, CHU Dijon Bourgogne, Dijon, France\r\n\r\nChristel Thauvin\r\n\r\nUnit\u00e9 Fonctionnelle Innovation en Diagnostic G\u00e9nomique des Maladies Rares, F\u00e9d\u00e9ration Hospitalo-Universitaire-TRANSLAD, CHU Dijon Bourgogne, Dijon, France\r\n\r\nChristel Thauvin\r\n\r\nUMR1231 GAD, Inserm, Universit\u00e9 Bourgogne-Franche Comt\u00e9, Dijon, France\r\n\r\nChristel Thauvin\r\n\r\nClinique de G\u00e9n\u00e9tique, H\u00f4pital Jeanne de Flandre, CHU de Lille, Lille, France\r\n\r\nCatherine Vincent-Delorme\r\n\r\nConsultation de g\u00e9n\u00e9tique, CH Arras, Arras, France\r\n\r\nCatherine Vincent-Delorme\r\n\r\nDepartment of Medical Genetics, Istanbul Medeniyet University Medical School, Istanbul, Turkey\r\n\r\nElif Yilmaz Gulec\r\n\r\nMedical Genetics Clinic, Istanbul Goztepe Prof Dr Suleyman Yalcin City Hospital, Istanbul, Turkey\r\n\r\nElif Yilmaz Gulec\r\n\r\nCardiovascular Medicine, Stanford University, Stanford, CA, USA\r\n\r\nRodrigo Mendez\r\n\r\nSorbonne Universit\u00e9, Institut du Cerveau\u2014Paris Brain Institute\u2014ICM, Inserm, CNRS, APHP, D\u00e9partement de G\u00e9n\u00e9tique, H\u00f4pital de la Piti\u00e9 Salp\u00eatri\u00e8re, Paris, France\r\n\r\nCaroline Nava\r\nContributions\r\nJ.D.J.,","A.A. and C.M.K. performed experiments.","J.D.J.,","H.C.K.,","R.D.,","E.L.,","B.C., Y.C. and\u00a0A.J.M.B. analysed data and contributed to the figures and tables in the paper.","C.S.,","R.R.,\u00a0J.T.,","R.M.,","D.G.M.,","C.D., N.W. and G.M.F. collected data, provided funding and supervised the work. All other authors provided clinical and\/or genomic data and are listed alphabetically.","J.D.J., N.W. and G.M.F. wrote the paper with input from all the authors.\r\nCorresponding authors\r\nCorrespondence to\nNicola Whiffin or Gregory M. Findlay.\r\nEthics declarations\nCompeting interests\r\nN.W. receives research funding from Novo Nordisk and Biomarin Pharmaceutical. D.G.M. is a paid consultant for GlaxoSmithKline, Insitro and Overtone Therapeutics and receives research support from Microsoft. The other authors declare no competing interests.\r\n\nPeer review\nPeer review information\r\nNature thanks Karine Choquet and the other, anonymous, reviewer(s) for their contribution to the peer review of this work.","Peer reviewer reports are available.\r\n\nAdditional information\r\nPublisher\u2019s note Springer Nature remains neutral with regard to jurisdictional claims in published maps and institutional affiliations.\r\nExtended data figures and tablesExtended Data Fig. 1 Quality control metrics for RNU4-2 SGE experiments.\r\na,\u00a0The distribution of variant read counts in the HDR library is plotted for all n\u2009=\u2009539 variants included in library design. Of reads from the HDR library, 0.0068% and 4.6% matched unedited reference and PAM-edit only, respectively.","b, The distribution of variant read counts in day 4 gDNA is plotted, with counts averaged across biological replicates.","c,\u00a0Inter-replicate function score correlations are plotted, with Pearson\u2019s r shown and variants coloured by mutation type.\r\nExtended Data Fig. 2 ReNU syndrome variants are discriminated with high precision from variants seen frequently in population controls.\r\nROC-AUC measurements for distinguishing 12 ReNU syndrome SNVs from population control SNVs by SGE score are displayed as a heatmap. Each AUC was determined using only variants in UK Biobank and All of Us with allele counts above the thresholds indicated on the axes. For select allele count thresholds applied to both cohorts (10, 20, 40, 60, and 80), the number of population variants retained for the ROC-AUC calculation is indicated.\r\nExtended Data Fig.","3 Correlations between function scores and predicted effects on RNA binding stability.\r\nViennaRNA was used to predict the effects of variants (n\u2009=\u2009521) on the minimum free energy of U4\/U6 RNA binding compared to reference (\u0394\u0394G).","a, Predicted \u0394\u0394G values are plotted versus function scores for the whole transcript, as well as for individual regions (Spearman\u2019s \u03c1).","b, ROC curve for classifying ReNU syndrome variants from population controls using ViennaRNA-predicted \u0394\u0394G values (AUC\u2009=\u20090.72).\r\nExtended Data Fig. 4 Function scores for variants within the RNU4-2 critical region.\r\nFunction scores are plotted by position and coloured by their association with ReNU syndrome (red), presence in the UK Biobank or All of Us cohorts (blue), or no observation in either (teal). Variants score lowly in two regions within the CR (shaded), n.62-70 and n.75-78, which correspond to the T-loop and Stem III, respectively.","The black dashed line (function score\u2009=\u2009\u22120.302) indicates significantly depleted variants and the gray dashed line (function score\u2009=\u2009\u22120.90) separates \u201cmoderate\u201d from \u201cstrong\u201d depletion. The vertical red dashed lines represent the boundaries of the 18 nucleotide ReNU CR reported by Chen et al.1 drawn to include insertions at n.61_62 and n.79_80.\r\nExtended Data Fig. 5 Calibration of function scores to evidence for clinical classification of variants in relation to ReNU syndrome.\r\nGaussian mixture modelling was used to estimate odds of pathogenicity (OddsPath). Function scores are plotted against OddsPath values for n\u2009=\u2009127 variants within the ReNU syndrome critical region.","Vertical dotted lines mark the median of insertion controls (x\u2009=\u20090), as well as thresholds for \u201cmoderate\u201d (\u22120.302) and \u201cstrong\u201d (\u22120.90) depletion. Horizontal dashed lines indicate OddsPath thresholds for assigning evidence strengths in accordance with ACMG guidelines8. OddsPath values are capped for variants with function scores below \u22121.0 to display all points.\r\nExtended Data Fig. 6 Phenotype clustering of ReNU patients.\r\na, PCA clustering as in Fig. 3a but removing individuals with the recurrent n.64_65insT variant.","b, Phenotype clustering of all individuals represented in Fig. 3a using a UMAP representation.\r\nExtended Data Fig. 7 Correlation of the SGE assay in haploid versus diploid HAP1 cells.\r\na, Function scores (n\u2009=\u2009539) from SGE in diploid HAP1 cells, plotted by transcript position and coloured by variant type.","b, Function scores from SGE in diploid HAP1 cells coloured by the function score from SGE in haploid HAP1 cells.","c, Correlation of function scores in diploid versus haploid HAP1 cells, coloured by the region in which each variant is located (Pearson\u2019s r\u2009=\u20090.75).\r\nExtended Data Table 1 Comparison of clinical features by function score categories\r\nFull size table\r\nExtended Data Table 2 Results from association testing with intelligence-related metrics in the UK Biobank\r\nFull size table\r\nExtended Data Table 3 Homozygous and compound heterozygous variants in individuals with undiagnosed neurodevelopmental disorders\r\nFull size table\r\nSupplementary informationRights and permissions\r\nOpen Access This article is licensed under a Creative Commons Attribution 4.0 International License, which permits use, sharing, adaptation, distribution and reproduction in any medium or format, as long as you give appropriate credit to the original author(s) and the source, provide a link to the Creative Commons licence, and indicate if changes were made.","The images or other third party material in this article are included in the article\u2019s Creative Commons licence, unless indicated otherwise in a credit line to the material. If material is not included in the article\u2019s Creative Commons licence and your intended use is not permitted by statutory regulation or exceeds the permitted use, you will need to obtain permission directly from the copyright holder. To view a copy of this licence, visit http:\/\/creativecommons.org\/licenses\/by\/4.0\/.\r\n\r\nReprints and permissions\r\nAbout this article\r\n\r\nCite this article\r\nDe Jonghe, J.,","Kim, H.C.,","Adedeji, A. et al. Saturation editing of RNU4-2 reveals distinct dominant and recessive disorders.\nNature (2026). https:\/\/doi.org\/10.1038\/s41586-026-10334-9\r\n\r\nDownload citation\r\n\r\nReceived: 24 April 2025\r\n\r\nAccepted: 26 February 2026\r\n\r\nPublished: 08 April 2026\r\n\r\nVersion of record: 08 April 2026\r\n\r\nDOI: https:\/\/doi.org\/10.1038\/s41586-026-10334-9"],"images":[{"src":"\/news\/images\/TkbmuTA5An6pq0jkF31YSHaroE6S7TsKDrDN3Z0FIoltBva7I0do6Hv56SGRKYL85pVN6H7y6avsjnd9v9uqf4eFGmhcax0V6l2tuZNaytYfYHkUjKasu7XoZ0L4dajsjqh9NQVtaBnNYLsiwO6HiBpTgkUj9.jpg","width":0,"height":0,"source":"featured","size":0}]}"
      [3]=>
      int(1775726512)
    }
  }
  [3]=>
  array(7) {
    ["file"]=>
    string(21) "/app/sys/news.inc.php"
    ["line"]=>
    int(163)
    ["function"]=>
    string(8) "saveData"
    ["class"]=>
    string(6) "NewsDB"
    ["object"]=>
    object(NewsDB)#4 (1) {
      ["db":"NewsDB":private]=>
      object(SQL)#1 (6) {
        ["CONNECTION":"SQL":private]=>
        object(mysqli)#2 (18) {
          ["affected_rows"]=>
          int(-1)
          ["client_info"]=>
          string(14) "mysqlnd 8.2.30"
          ["client_version"]=>
          int(80230)
          ["connect_errno"]=>
          int(0)
          ["connect_error"]=>
          NULL
          ["errno"]=>
          int(1406)
          ["error"]=>
          string(40) "Data too long for column 'data' at row 1"
          ["error_list"]=>
          array(1) {
            [0]=>
            array(3) {
              ["errno"]=>
              int(1406)
              ["sqlstate"]=>
              string(5) "22001"
              ["error"]=>
              string(40) "Data too long for column 'data' at row 1"
            }
          }
          ["field_count"]=>
          int(2)
          ["host_info"]=>
          string(18) "db:3306 via TCP/IP"
          ["info"]=>
          NULL
          ["insert_id"]=>
          int(0)
          ["server_info"]=>
          string(37) "11.2.2-MariaDB-1:11.2.2+maria~ubu2204"
          ["server_version"]=>
          int(110202)
          ["sqlstate"]=>
          string(5) "22001"
          ["protocol_version"]=>
          int(10)
          ["thread_id"]=>
          int(810018)
          ["warning_count"]=>
          int(0)
        }
        ["URL":"SQL":private]=>
        string(7) "db:3306"
        ["USER":"SQL":private]=>
        string(8) "retrofox"
        ["PASS":"SQL":private]=>
        string(16) "TreeakWqQPD9qnWR"
        ["DB":"SQL":private]=>
        string(8) "retrofox"
        ["DROP_VALUES":"SQL":private]=>
        array(1) {
          [0]=>
          string(0) ""
        }
      }
    }
    ["type"]=>
    string(2) "->"
    ["args"]=>
    array(2) {
      [0]=>
      string(85) "foxapi_article_SNgSFz1UYwUgnAye8Iq6AwimFnBOT9jdyRE0kXeBSMWDBytDmFUTOmTLbAFJl2KLSL4Shs"
      [1]=>
      string(89760) "{"id":"SNgSFz1UYwUgnAye8Iq6AwimFnBOT9jdyRE0kXeBSMWDBytDmFUTOmTLbAFJl2KLSL4Shs","title":"Saturation editing of RNU4-2 reveals distinct dominant and recessive disorders","description":"Recently, de novo variants in an 18-nucleotide region in the centre of RNU4-2 were shown to cause ReNU syndrome, a syndromic neurodevelopmental disorder that is predicted to affect tens of thousands of individuals worldwide1,2. RNU4-2 is a non-protein-coding gene that is transcribed into the U4 small nuclear RNA component of the major spliceosome3. ReNU syndrome variants disrupt spliceosome function and alter 5\u2032 splice site selection1,4. Here we performed saturation genome editing (SGE) of RNU4-2 to identify the functional and clinical impact of variants across the entire gene. The resulting SGE function scores, derived from variants\u2019 effects on cell fitness, discriminate ReNU syndrome variants from those observed in the population and markedly outperform in silico variant effect prediction. Using these data, we redefine the ReNU syndrome critical region at single-nucleotide resolution, resolve variant pathogenicity for variants of uncertain significance and show that SGE function scores delineate variants by phenotypic severity and the extent of observed splicing disruption. Furthermore, we identify variants affecting function in regions of RNU4-2 that are critical for interactions with other spliceosome components. We show that these variants cause a new recessive neurodevelopmental disorder that is distinct from ReNU syndrome. Together, this work defines the landscape of variant function across RNU4-2, providing critical insights for both diagnosis and therapeutic development. Saturation genome editing of RNU4-2 identifies the functional and clinical impact of variants across the entire gene and delineates variants that\u00a0cause a new recessive neurodevelopmental disorder distinct from ReNU syndrome.","author":"Findlay, Gregory M.","source":"Nature","publish":"April 8, 2026, 9:45 pm","canonical":"https:\/\/www.nature.com\/articles\/s41586-026-10334-9","text":["Main\r\nThe spliceosome is a large ribonucleoprotein complex that mediates RNA splicing. De novo variants in a gene encoding one of the small nuclear RNA (snRNA) components of the spliceosome, RNU4-2, were recently shown to cause ReNU syndrome, a prevalent neurodevelopmental disorder (NDD)1,2. ReNU syndrome is a complex multi-system disorder characterized by moderate to severe global developmental delay, intellectual disability, hypotonia, acquired microcephaly, speech and motor difficulties, low bone density and often seizures1,4.\r\n\r\nRNU4-2 encodes the U4 snRNA, which is a critical component of the major spliceosome.","In particular, U4 is tightly bound with the U6 snRNA in the U4\/U6.U5 tri-small-nuclear ribonucleoprotein and the U4\/U6 duplex needs to be unwound for activation of splicing3. Variants identified in individuals with ReNU syndrome cluster in an 18-nucleotide (nt) region in the centre of RNU4-2 that is depleted of variants in population datasets (the \u2018critical region\u2019, or CR)1. This region is known to accurately position U6 for recognition of the 5\u2032 splice site. Consistent with this, variants causing ReNU syndrome have been shown to alter 5\u2032 splice site usage1, with this disruption correlating with phenotype severity4.","Similarly, variants in two distinct structures within the 18-nt CR (the T-loop and Stem III) have been proposed to differ in clinical severity4.\r\n\r\nThe precise relationship between genetic variation in RNU4-2 and clinical impact remains incompletely characterized. The variants initially characterized in individuals with ReNU syndrome are all within the 18-nt CR; however, more recent work has proposed a role for variants outside this region, in the 5\u2032 stem loop5. It is unclear which, if any, variants outside the CR could also cause NDD. This is particularly important as the increased mutation rate of RNU4-2 and other snRNA genes means that there will be many chance occurrences of variants among sequenced individuals with syndromic NDD6.","Up to 75% of individuals with ReNU syndrome have the same single-nucleotide insertion (n.64_65insT). Whether the high recurrence of this particular variant is due to ascertainment bias, germline selection and\/or an increased mutation rate is at present unknown. Furthermore, it is unclear whether available variant effect predictors (for example, CADD7) can effectively distinguish between pathogenic and benign variants in RNU4-2.\r\n\r\nResolving these questions will be critical to ensure accurate, comprehensive diagnoses of individuals affected by ReNU syndrome.","One approach to clarifying variant impact is through the generation of functional data of variant effect, which can mechanistically inform why specific variants cause disease and improve clinical interpretation of rare variants8.","However, no experimental assay has yet been established to evaluate variants in RNU4-2, owing to its recent association with NDD.\r\n\r\nSaturation genome editing (SGE) is a powerful approach to delineate genotype\u2013phenotype relationships9.","Crucially, it does not rely on variants being observed in an individual with or without disease.","Instead, every possible variant across a gene or region can be engineered and the relative functional effects of each determined through a cellular readout. SGE experiments have been performed across numerous protein-coding genes, including BRCA110, CARD1111, DDX3X12, VHL13 and BAP114. In each case, the SGE assay has accurately differentiated between known pathogenic and benign variants.\r\n\r\nHere, we perform SGE of the human RNU4-2 noncoding RNA. We implemented an approach to combat the high sequence homology between RNU4-2 and its many homologues and pseudogenes, obtaining a variant effect map that effectively distinguishes variants known to cause ReNU syndrome from those in population controls.","We redefine the CR at single-nucleotide resolution, resolve pathogenicity assignments for variants of uncertain significance, and show that function scores for variants within the CR correlate closely with phenotypic severity. Furthermore, we identify functionally critical variants in other regions of RNU4-2 that underlie a recessive NDD marked by clinical features that are distinct from those of ReNU syndrome.\r\nSGE maps the effects of RNU4-2 variants\r\nPerforming SGE on regions of high sequence homology poses a challenge in that the protocol requires CRISPR\u2013Cas9 editing of a single locus, specific amplification of the edited locus from millions of cells and accurate variant calling from amplicon sequencing.","Alignment of RNU4-2 (RefSeq NR_003137.3) to RNU4-1 (RefSeq NR_003925.1) reveals mismatches at only 4 of the 145\u2009nt. The sequence upstream of RNU4-2, however, is both unique and poorly conserved across species, such that guide RNAs (gRNAs) predicted to be highly specific15 can be designed in conjunction with protospacer adjacent motif (PAM)-disrupting edits to block Cas9 recutting (Fig. 1a).\r\nFig. 1: SGE reveals the functional spectrum of RNU4-2 variants.\r\na, Schematic of SGE library design and CRISPR targeting strategy for RNU4-2.","Positions of library variants including all possible SNVs (navy; across the 145-nt transcript and 6-nt 3\u2032), control 1-nt insertions in loop regions (yellow), CR 1-nt insertions (red) and deletions (teal) and multi-nt insertions (light purple) are denoted on a schematic of RNU4-2 and RNU6 in complex (left) and by genomic location (right). A gRNA was designed to cleave upstream of RNU4-2 (scissors), avoiding highly repetitive sequence and allowing for a PAM-blocking variant to be installed in a region of low conservation (PhyloP 100 vertebrates basewise conservation track shown).","b, Schematic of SGE experiments in HAP1. Following editing, cells were collected on days 4 and 14. Sequencing was performed to quantify variant frequencies at each timepoint and function scores were calculated.","c, Function scores for 539 variants were correlated across biological replicates (Pearson\u2019s r\u2009=\u20090.86\u00a0for\u00a0replicates 1 and 2). The function score threshold delineating significantly depleted variants is indicated with the dashed line.","d, Function scores are plotted by genomic position in relation to RNU4-2 (RefSeq NR_003137.3). The line at n.145 marks the end of the transcript, with 18 more distal SNVs also scored. Points in c,d are coloured by variant type with a single legend included for these two panels. CRISPR\u2013Cas9 icon in b adapted from Bioicons (https:\/\/bioicons.com\/?query=CRISPR; CRISPR_Cas9 schematic), Marcel Tisch, under a Creative Commons licence CC0 1.0 Universal.\r\n\r\nLacking established models for assaying RNU4-2 variants, we chose to perform SGE in HAP1\u00a0cells, a haploid human line in which growth effects have accurately distinguished pathogenic variants across several protein-coding genes10,12,13,14,16,17.","To HAP1 cells lacking LIG4 (HAP1-LIG4-KO), we codelivered Cas9 with a gRNA directing DNA cleavage 31-nt upstream of RNU4-2 to install a library comprising 539 variants by homology-directed DNA repair (HDR). The library included all possible single base substitutions from the first transcribed nucleotide to 6\u2009nt beyond the most 3\u2032 position of the RNU4-2 transcript (GRCh38, chr12:120291753\u2013120291903), as well as all 1-nt deletions and insertions in the CR, including all but one variant known to cause NDD (omitting n.72_73del, which was reported after assay design; Fig. 1a).","Uncertain whether pathogenic variants would show phenotypes in the HAP1-based assay, we included 8 2-nt to 5-nt insertions at positions in the CR previously associated with disease, reasoning these may have strong effects. As negative controls, we included 12 1-nt insertions in stem loops outside the CR, which were not predicted to be deleterious (Supplementary Table 1).\r\n\r\nAdapting an optimized SGE protocol for HAP1 cells13 (Fig.","1b), we successfully scored all variants included in the library, observing an average of 52% editing by HDR at day 4. Editing was confirmed by sequencing to be specifically targeted to RNU4-2, and not RNU4-1. Function scores, reflecting variants\u2019 effects on growth (Methods), were highly correlated across three biological replicates (Pearson\u2019s r\u2009=\u20090.83\u20130.86; Fig. 1c and Extended Data Fig. 1). As expected, given their location in the U4\/U6 secondary structure, all 12 negative control variants scored near 0 (mean, \u22120.009, s.d.\u2009=\u20090.11). We defined a neutral distribution from these negative controls to identify 151 significantly depleted variants (q\u2009<\u20090.01, that is, function score less than \u22120.302).","The 8 multi-nucleotide insertions in the CR included as positive controls all were depleted, with function scores ranging from \u22120.73 to \u22121.82. Mapping variants\u2019 function scores to their linear transcript position reveals that depleted variants are clustered, rather than distributed evenly across the gene (Fig. 1d).\r\nSGE data resolve variant pathogenicity\r\nWe annotated all assayed variants within RNU4-2 with whether or not they had been observed in individuals with ReNU syndrome1, observed in population cohorts (UK Biobank18 or All of Us), or observed in neither (unobserved; Fig. 2a).","All 18 variants observed in ReNU syndrome were depleted in the assay (function score less than \u22120.302), whereas 81.0% (286 out of 353) of population variants scored as normal (function score \u22120.302 or more; Fig. 2b). Accordingly, function scores effectively discriminate between ReNU syndrome variants and those identified in the population (Fig. 2c; area under the receiver operating characteristic (ROC) curve (AUC) of 0.93). Most variants that are unobserved in population cohorts score normally (56.0%; 84 out of 150); however, many are as, or even more, depleted than ReNU syndrome variants. Specifically, the four variants with the lowest function scores are all unobserved (Supplementary Table 1).\r\nFig.","2: Function scores accurately discriminate variants underlying ReNU syndrome.\r\na, Function scores for 521 variants within the RNU4-2 transcript are plotted by position and coloured by their association with ReNU syndrome (red), presence in the UK Biobank\u00a0(UKB) or All of Us\u00a0(AoU) cohorts (blue) or absence from both cohorts (teal). Depleted variants within the 18-nt CR (vertical red dashed lines) are confined to two smaller regions (shaded grey) and include all ReNU syndrome variants scored (n\u2009=\u200918). These regions, n.62-70 and n.75-78, correspond closely to the T-loop and Stem III regions, respectively.","The black dashed line (function score \u22120.302) indicates significantly depleted variants and the grey dashed line (function score \u22120.90) separates \u2018moderate\u2019 from \u2018strong\u2019 depletion.","b, Stacked histogram and overlaid density plot of function scores by category comparing 18 ReNU syndrome variants with 353 variants in UK Biobank and\/or All of Us and 150 unobserved variants.","c, ROC curves show the performance of function scores and CADD scores for classifying 12 ReNU syndrome SNVs from 346 SNVs observed at least once in population controls.","d, Function scores for SNVs are plotted by combined UK Biobank and All of Us allele count. Higher allele counts were correlated with higher function scores (Spearman\u2019s \u03c1\u2009=\u20090.29, two-sided P\u2009=\u20092.8\u2009\u00d7\u200910\u221211). Among the 50 most frequently observed SNVs (combined allele count greater than 91; black dashed line), no SNVs were depleted. The grey dashed line separates absent variants (combined allele count of 0) from those observed at least once (combined allele count greater than 0).","e, Function scores for the 435 tested SNVs are plotted by CADD score. The dashed line at y\u2009=\u2009\u22120.302 indicates significantly depleted SNVs, whereas the red line at x\u2009=\u200919.25 and the blue line at x\u2009=\u200918.99 indicate median CADD scores for ReNU syndrome SNVs and SNVs present in population cohorts, respectively.\r\n\r\nWe observed a significant correlation between single-nucleotide variant (SNV) allele counts in population cohorts and function scores, with rarer SNVs tending to be more depleted by SGE (Spearman\u2019s \u03c1\u2009=\u20090.29, P\u2009=\u20092.8\u2009\u00d7\u200910\u221211; Fig. 2d). Among the 50 SNVs with the highest combined allele counts in the UK Biobank and All of Us cohorts, none were depleted in the assay.","Indeed, applying more stringent allele count thresholds to define control variants in population cohorts consistently improved the assay\u2019s classification performance (Extended Data Fig. 2). These findings indicate that depleted variants observed in population cohorts are unlikely to be the result of experimental noise and, instead, represent genuine variants affecting RNU4-2 function segregating in the general population.\r\n\r\nThe discriminatory power of our SGE assay was substantially greater than that of the genome-wide in silico tool CADD19 (Fig. 2c; AUC\u2009=\u20090.65). Given the high conservation of the entire RNU4-2 gene, most SNVs have very similar CADD scores (Fig. 2e).","Although CADD scores for ReNU syndrome SNVs are marginally higher on average than those for SNVs in population cohorts (ReNU median 19.2; UK Biobank and All of Us median 19.0; one-sided Wilcoxon P\u2009=\u20090.040), a CADD score threshold that would capture all ReNU syndrome SNVs (18.89 or greater) would also annotate 56.4% (195 out of 346) of SNVs observed in UK Biobank and All of Us, and 55.6% (183 out of 329) of SNVs with normal SGE function scores, as probably deleterious. By contrast, our SGE function score threshold of \u22120.302 captures all ReNU syndrome SNVs and only 19.1% (66 out of 346) of SNVs observed in population cohorts.","We also observe only a weak correlation of SGE function scores with changes to U4\/U6 RNA binding stability predicted by ViennaRNA (\u03c1\u2009=\u2009\u22120.27, P\u2009=\u20094.5\u2009\u00d7\u200910\u221210; Extended Data Fig. 3a). The observed effect is limited to specific regions, most notably Stem II (\u03c1\u2009=\u2009\u22120.79, P\u2009=\u20095.0\u2009\u00d7\u200910\u221210). By contrast, no significant correlation is observed in the T-loop or Stem III and, overall, \u0394\u0394G values from ViennaRNA do not classify ReNU syndrome variants as well as SGE (ROC-AUC 0.72 versus 0.93, respectively; Extended Data Fig. 3b).\r\n\r\nThe assay clearly delineates the 18-nt CR of RNU4-2 (Fig. 2a) within which variants cause ReNU syndrome; however, some variants in this region score normally.","Using these data, we redefine the CR to two smaller regions of 9\u2009nt (n.62-70, inclusive of insertions at n.61_62) and 4\u2009nt (n.75-78), corresponding to the T-loop and Stem III, respectively (Extended Data Fig. 4). Although the T-loop region matches that reported by ref.","2, the CR overlapping Stem III is 3-nt smaller than previously suggested. Within these two regions, 85.4% (76 out of 89) of tested variants (79.5% of SNVs), including all ReNU syndrome variants, have significant function scores, compared with 17.4% (75 out of 432) across the remainder of RNU4-2.\r\n\r\nWe next used our function scores to assign evidence strengths for clinical variant classification8. We deemed the 17 pathogenic or likely pathogenic variants reported in ref. 4 and assayed here to be associated with ReNU syndrome and 45 variants with combined allele counts across the UK Biobank and All of Us above 100 to be neutral.","A Gaussian mixture model was then applied to determine the odds of pathogenicity (OddsPath) for each variant (Methods, Extended Data Fig. 5 and Supplementary Table 1). Within the CR, 69 of 127 (54.3%) variants receive PS3 strong evidence of pathogenicity, including 16 of 18 variants reported to be pathogenic, with the other two variants receiving PS3 moderate or indeterminate evidence. A further 38 (29.9%) variants receive BS3 strong evidence of benignity.","As no variants outside the CR have been associated with ReNU syndrome, we refrain from assigning evidence strengths to variants outside the CR.\r\n\r\nRecent work by one research group4 classified three variants outside the CR and one deletion within the CR as variants of uncertain significance. Three of these variants were included in our assay (n.76del, n.92C>G and n.111C>T) and all three had normal function scores (0.12, 0.04 and 0.05, respectively).","Notably, all three variants are also observed in population controls. Furthermore, a recent paper proposed a link between two 5\u2032 stem loop variants, each identified in a single individual and inherited from an unaffected mother, and ReNU syndrome5. One of these variants is included in our assay (n.30A>T), and its score of \u22120.305 just crosses the threshold to be classified as depleted; however, other depleted variants in the same region are observed in population controls.","Finally, of two variants recently associated with retinitis pigmentosa20, the one that is included in our assay (n.56T>C) has a normal function score (\u22120.23).\r\nSGE depletion predicts disease severity\r\nA previous study proposed a difference in phenotypic severity between ReNU syndrome variants mapping to the T-loop and Stem III structures of the U4\/U6 duplex4. This difference is seen in our data, with Stem III variants having on average, higher function scores (T-loop mean \u22121.13; Stem III mean \u22120.75; one-sided Wilcoxon P\u2009=\u20090.012).","However, we also observe considerable variation in function scores for ReNU variants within each of the two regions. For example, two SNVs within the T-loop, n.63T>C and n.65A>G, have function scores above the mean observed for Stem III variants (\u22120.51 and \u22120.32, respectively). To investigate this, we repeated the phenotype clustering analysis of 143 individuals with ReNU syndrome from ref. 4. We classified the variants into two categories corresponding to \u2018moderate\u2019 (\u22120.9\u2009<\u2009function score\u2009<\u2009\u22120.302) and \u2018strong\u2019 (function score less than \u22120.9) levels of depletion in the assay (Fig. 3a and Extended Data Fig. 4).","All of the individuals with moderate category variants cluster together, including the four individuals with the n.63T>C (n\u2009=\u20091) and n.65A>G (n\u2009=\u20093) T-loop variants (Fig. 3b). These results remained consistent when excluding n.64_65insT from the analysis (that is, the result is not driven by the recurrent insertion variant alone) and when using a uniform manifold approximation and projection (UMAP) representation (Extended Data Fig. 6).\r\nFig.","3: Function scores predict ReNU syndrome severity and degree of splicing disruption.\r\na, Schematic showing how ReNU variants are split into two categories based on their SGE function score: strong depletion (function score less than \u22120.9; red) and moderate depletion (\u22120.9\u2009<\u2009function score\u2009<\u2009\u22120.302; yellow).","b, The first two principal components from clustering of 143 ReNU syndrome cases by phenotype using the approach from ref. 4. Individuals are coloured by their variant SGE function score class. Unlabelled triangles indicate occurrences of n.64_65insT.","c, The proportion of affected individuals with each phenotype is plotted, with cases grouped by SGE function score class. The number of individuals (n) in each comparison group is shown for each phenotype. Error bars indicate 95% confidence intervals centred on each proportion (capped at 0 and 1.0).","Full data, including statistics for comparisons between groups, are included in Extended Data Table 1.","d, Principal component analysis based on PSI values from significant 5\u2032 splice site events detected from RNA sequencing data using rMATS, comparing 19 patients with ReNU with 20 control participants (purple), as performed in ref. 4. Individuals with ReNU are coloured by their variant SGE function score class.","GDD, global developmental delay; ID, intellectual disability.\r\n\r\nTo further determine whether SGE function scores were able to discriminate between more severe and milder ReNU syndrome variants, we compared four specific phenotypes.","Individuals with variants in the strong depletion group were significantly more likely to have severe developmental delay (73.3% versus 5.9%; odds ratio\u2009=\u200942.7; 95% confidence interval (CI) 6.1\u20131,841.8; two-sided Fisher\u2019s P\u2009=\u20091.1\u2009\u00d7\u200910\u22127), severe intellectual disability (76.6% versus 5.9%; odds ratio\u2009=\u200950.4; 95%CI 7.1\u20132,197.0; two-sided Fisher\u2019s P\u2009=\u20093.6\u2009\u00d7\u200910\u22128) and absent speech or to speak only a few words (92.8% versus 5.6%; odds ratio\u2009=\u2009195.5; 95%CI 24.7\u20138,591.7; two-sided Fisher\u2019s P\u2009=\u20096.6\u2009\u00d7\u200910\u221214) than individuals with moderate depletion variants. There was no difference in the occurrence of seizures between variant groups (Fig.","3c and Extended Data Table 1).\r\n\r\nTo test whether the strength of SGE depletion also correlates with the extent of splicing disruption observed in individuals with ReNU syndrome, we repeated a second analysis from ref. 4. We regenerated a principal component analysis of percentage spliced-in (PSI) values for 5\u2032 splice sites that differed significantly in usage between ReNU cases and control participants. Individuals with strong and moderate SGE function scores clustered separately, with the strong variant individuals being more distant from control participants (Fig. 3d).\r\nA recessive NDD linked to RNU4-2 variants\r\nSeventy-five variants outside the ReNU CR are depleted in the SGE assay (Supplementary Table 1).","Unlike the depleted variants in the ReNU CR, most of these other depleted variants (84.0%; 63 out of 75) are observed in population control cohorts, albeit at low frequencies (Fig. 2a). To investigate whether these variants are associated with NDD-related traits, we compared individuals heterozygous for such variants (n\u2009=\u2009592) and individuals with non-depleted SNVs (n\u2009=\u200912,374) in RNU4-2 with individuals without any variants in RNU4-2, using the UK Biobank.","We did not find any significant differences in fluid intelligence scores, childhood developmental disorder diagnoses or age of leaving education (Extended Data Table 2).\r\n\r\nBecause our SGE assay was performed in a haploid cell line, we reasoned that depleted variants outside the CR may instead be associated with recessive phenotypes. We searched global rare disease cohorts and identified 20 individuals, with biallelic depleted variants: 10 (including 3 pairs of siblings) with homozygous variants and 10 (including 4 pairs of siblings) who were each concordant for compound heterozygous depleted variants (Extended Data Table 3). None of these variants were located in the ReNU CR, yet all 20 individuals had NDD phenotypes.","None of the individuals had an existing genetic diagnosis that fully explained their observed phenotypes (Methods). Across the rare disease cohorts, no individuals with phenotypes unrelated to NDD had biallelic depleted variants. Only a single individual across the UK Biobank and All of Us cohorts is homozygous for a SGE-depleted variant (n.31T>G, function score \u22120.730).","This individual has only primary level education (highest grade, one to four) and reports difficulties with \u2018dressing or bathing\u2019, \u2018doing errands alone\u2019 and \u2018concentrating, remembering or making decisions\u2019, consistent with a possible intellectual disability.\r\n\r\nThe clinical phenotypes of the 20 identified NDD individuals are characterized as part of a broader cohort (total n\u2009=\u200938) in a companion paper21. The 18 extra individuals reported in this broader cohort all have biallelic RNU4-2 variants, but at least 1 variant had a non-significant function score or was not scored with SGE.","In brief, we define a new NDD characterized by global developmental delay, intellectual disability, delayed or absent speech, hypotonia, spasticity, microcephaly, ophthalmological and visual impairments and seizures, with variable involvement of genitalia, skin, hair and limb anomalies.","On MRI, individuals show distinctive white matter abnormalities and cerebellar atrophy that are not seen in ReNU syndrome21.\r\n\r\nDepleted variants outside the ReNU CR broadly map to four regions of U4\/U6 secondary structure that are known to mediate interactions between U4 and other components of the spliceosome: (1) the central portion of the Stem II interaction with U6 from nucleotides 6 to 11 (ref.","3); (2) a \u2018k-turn\u2019 structure required for protein binding22,23 comprising nucleotides 27 to 33 and nucleotides 42 to 46; (3) a region from nucleotides 118 to 126 that interacts with a ring of Sm proteins that are important for U4 biogenesis and stability24,25 and (4) a portion of the terminal stem loop formed by base-pairing of nucleotides 129 to 131 with nucleotides 140 to 142 (Fig. 4). All variants identified in the 20 recessive NDD cases map to these four regions. Variants in structurally equivalent regions of RNU4ATAC, which encodes the minor spliceosome equivalent of U4, U4atac, cause rare recessive RNU4atac-opathies26,27,28.","Of the 13 unique RNU4-2 variants identified in the recessive NDD cases, 5 have exact equivalents in RNU4ATAC that are (likely) pathogenic in ClinVar (n.32G>A, n.45G>C, n.46G>A, n.119A>G and n.122T>G; Supplementary Table 2). They include n.119A>G (function score \u22120.686; RNU4ATAC equivalent n.117A>G; ClinVar variation ID 1525441), which was homozygous in two individuals and compound heterozygous in three individuals, including two brothers.\r\nFig. 4: SGE-depleted variants outside the CR cause a recessive NDD.\r\nThe lowest SGE function score class among SNVs at each position is indicated on the U4\/U6 secondary structure. Outside the CR, low SGE scores occur at positions of spliceosomal protein binding, indicated by teal shaded regions.","Grey triangles correspond to homologous positions of RNU4ATAC at which (likely) pathogenic variants have been linked to recessive disease (from ClinVar; Supplementary Table 2). RNU4-2 variants with low function scores observed in recessive NDD cases are indicated, with filled purple circles indicating variants observed as homozygous and half-filled circles indicating variants observed in the compound heterozygous state. An orange dot in the centre of a circle indicates that the variant is observed in two affected siblings. Six (likely) pathogenic RNU4ATAC variants could not be confidently assigned to an equivalent nucleotide in RNU4-2. Three of these (n.8C>A, n.13C>T and n.16G>A) are shown together as mapping to Stem II.","The other three (n.29T>G, n.30G>A and n.111G>A) are not shown.\r\n\r\nIn an attempt to distinguish recessive and dominant variants experimentally, we performed SGE of RNU4-2 once more, this time using a diploid population of HAP1 cells selected through fluorescence-activated cell sorting (Methods). This experiment revealed function scores to be attenuated across the gene due to the presence of the second allele (Extended Data Fig.","7a,b and Supplementary Table 1).","However, all variants assayed in the Stem III region scored neutrally in diploid HAP1, suggesting pathogenic Stem III variants probably affect cell fitness in a manner that is distinct from pathogenic variants elsewhere. For all other regions, function scores between haploid and diploid models were highly correlated (Extended Data Fig.","7c), indicating fitness effects in diploid HAP1 cells do not delineate dominant and recessive variants in vivo.\r\nDiscussion\r\nRNU4-2 was the first noncoding RNA to be identified as having a substantial contribution to the prevalence of NDD, with ReNU syndrome predicted to affect around 100,000 individuals worldwide1,2. Here we developed an SGE assay to systematically assess the function of variants across RNU4-2 and map genotype\u2013phenotype relationships. We show that function scores accurately identify variants underlying ReNU syndrome and can distinguish these variants by disease severity. Furthermore, we define the CR at the centre of RNU4-2 within which variants cause dominant ReNU syndrome, at nucleotide resolution.","In two regions, of 9\u2009nt and 4\u2009nt, 85.4% of all tested variants are depleted.","However, some variants in these regions, particularly in Stem III, have normal function scores and are therefore unlikely to be pathogenic. As a consequence, these data have immediate use in clinical interpretation of newly observed variants in individuals with NDD.","Indeed, calibration of the SGE function scores for use within the ACMG\/AMP framework in the context of ReNU syndrome showed that these data can be used to give strong evidence towards either a pathogenic or benign classification.\r\n\r\nWe identified four regions of the U4\/U6 duplex structure, outside the ReNU CR where variants are also depleted. This led us to uncover a new recessive NDD caused by homozygous and compound heterozygous variants in these regions that were depleted in SGE. This NDD is described comprehensively in ref.","21, in which we also expand the cohort to include 38 individuals with biallelic RNU4-2 variants: the 20 individuals presented here with significant function scores for both variants, and 18 extra individuals harbouring variants in the same functional regions with at least one variant that was not significantly depleted or not assayed by SGE. Through comprehensive clinical phenotyping and analysis of RNA sequencing data, we show that the recessive NDD is phenotypically and mechanistically distinct from ReNU syndrome.","For example, MRI findings in individuals with ReNU syndrome most commonly include enlarged ventricles and corpus callosum abnormalities4, whereas individuals with biallelic RNU4-2 variants commonly have progressive white matter changes and cerebellar atrophy. Although we cannot yet determine the prevalence of the recessive NDD, SGE-depleted variants outside the ReNU CR are found in 0.12% and 0.094% of individuals in the UK Biobank and All of Us cohorts, respectively.","Hence, the recessive NDD is rarer than ReNU syndrome, but the prevalence is likely increased in populations with higher rates of consanguinity21.\r\n\r\nDistinct mechanisms underlie dominant and recessive RNU4-2-associated NDDs. We previously showed that individuals with ReNU syndrome have an increase in use of alternative non-canonical 5\u2032 splice sites1, consistent with the role of the T-loop and Stem III regions in accurately positioning the U6 ACAGAGA sequence to receive the 5\u2032 splice site. Recessive RNU4-2 variants map to different locations within U4, outside the T-loop and Stem III. They are found in key regions of binding between U4 and other important spliceosome factors.","The same regions have previously been shown to be important in U4 mutational analyses in yeast25 and variants in the 5\u2032 stem loop k-turn that we identify as depleted occur at nucleotides that are essential for SNU13\/15.5k protein binding in vitro23. In our companion paper21, we show through analysis of blood RNA sequencing data that individuals with biallelic RNU4-2 variants do not have the ReNU signature of disrupted 5\u2032 splice site selection. Furthermore, biallelic individuals have notably decreased RNU4-2 expression, which is not observed in individuals with ReNU syndrome, supporting a distinct loss-of-function molecular mechanism.","As variants in the equivalent regions and nucleotides of RNU4ATAC that cause recessive RNU4atac-opathies have been shown to lead to intron retention29,30, a similar mechanism may underlie recessive RNU4-2 NDD.","However, this was not readily evident in RNA sequencing analysis in blood21.\r\n\r\nRNU4-2 is a striking example of genetic pleiotropy, with variants in different regions of the RNA, which is only 145\u2009nt in length, causing both two distinct NDDs and retinitis pigmentosa. This adds complexity to variant interpretation and makes it particularly important to calibrate functional evidence with consideration of underlying mechanisms.","Although we showed that function scores for variants within the ReNU CR can provide strong evidence for clinical interpretation, we were unable to calibrate our assay for variants outside the ReNU CR due to a lack of independently defined pathogenic variants in these regions8, as all individuals with recessive NDD were identified on the basis of function score. Whereas we anticipate that our SGE data will prove highly useful for delineating variant pathogenicity for recessive disease, until orthogonal calibration can be performed, we recommend PS3 supporting evidence be assigned to significantly depleted variants outside the CR.","It is important to note that we set a relatively conservative threshold to define significantly depleted variants (q\u2009<\u20090.01) using synthetic controls in the absence of bona fide benign variants. Although all variants associated with ReNU syndrome scored below this threshold, we cannot exclude the possibility that variants with more subtle effects may be clinically relevant, particularly in relation to recessive disease. We cannot fully exclude the possibility that variants that score just below the \u22120.302 function score threshold are benign and represent false positives.","The calibration of function scores to evidence strength for ReNU variant classification reflects this, as variants were not assigned PS3 strong evidence in favour of pathogenicity unless their function scores were below \u22120.45.\r\n\r\nThus far, there are no strong data linking variants outside the CR to dominantly inherited NDD. This is supported by our analysis of heterozygous SGE-depleted variants outside the CR in the UK Biobank, in which we do not find any associations with intellectual disability related phenotypes. Accordingly, SGE data should not be used as evidence for the pathogenicity of variants for dominantly inherited ReNU syndrome beyond the CR.","We note that the 5\u2032 stem loop variants n.30A>T (function score \u22120.305) and n.43_44insT have been putatively associated with NDD5, with a link initially proposed with dominant ReNU syndrome.","However, these variants are within the \u2018k-turn\u2019 region linked to recessive disease in this study, and both are inherited from unaffected parents. Furthermore, n.43_44insT is identified in an individual with NDD in our companion paper, as compound heterozygous with a variant in Stem II21. Collectively, these data indicate that 5\u2032 stem loop variants are more likely to lead to recessive NDD than dominant ReNU syndrome.\r\n\r\nOur HAP1-based SGE assay has several limitations. Most notably, the growth-based readout does not inform directly on underlying mechanisms of splice alteration (for example, altered 5\u2032 splice site usage, intron retention).","This means that in the haploid context, both dominant and recessive effects are observed, which cannot be separated by function score alone. We also performed SGE in diploid HAP1 cells. Whereas function scores from these experiments revealed differences between T-loop and Stem III variants, they were once more unable to distinguish dominant and recessive variants in vivo. It is likely that specific changes in splicing underlying certain clinical phenotypes may not occur in HAP1 due to differences between cell types. It is notable, for instance, that a variant recently associated with retinitis pigmentosa (n.56T>C) did not score significantly.","Furthermore, most individuals with ReNU syndrome (70\u201375%) have the same single base insertion, n.64_65insT. Our data indicate that this variant is not unique in its functional severity, with many variants scoring similarly or having even lower function scores. This result could argue against high recurrence being the result of a particularly damaging functional effect driving ascertainment, suggesting that positive selection in the female germline or an increased local mutation rate might be more likely explanations.","However, we cannot rule out the possibility that this variant leads to unique changes in splicing not reflected in SGE function scores.\r\n\r\nFuture experiments using more cell types will be valuable for delineating mechanisms of RNU4-2 pleiotropy.","Likewise, testing larger insertions and deletions both inside and outside the ReNU CR will add insights into the degree of tolerated disruption across different regions of RNU4-2. For example, in ref.","4, the authors identified a 2-nt deletion (n.72_73del) in 2 individuals. This variant falls between Stem III and the T-loop but suggests that larger insertions and deletions in this region may also be disruptive to these structures. As we have observed for CR variants associated with ReNU syndrome, the degree of functional impact caused by recessive NDD variants may correlate with disease severity. There may also be phenotypic differences between individuals with variants mapping to the four distinct regions we identified.","Thorough phenotyping of large cohorts of cases will be necessary to establish how the degree of functional effect influences phenotype.\r\n\r\nIn summary, this work illustrates the power of a variant effect map for a locus recently implicated in disease to discover new genotype\u2013phenotype associations and understand mechanisms underlying disease. SGE data for RNU4-2 will be critical for accurately diagnosing patients with at present unexplained NDD and provide insights that are valuable for efforts to design effective therapies.","Finally, the SGE strategy we used to overcome the high sequence homology of RNU4-2 can be replicated to dissect other snRNAs recently linked to disease31,32.\r\nMethodsSingle guide RNA design and cloning\r\nThe gRNA used for SGE was designed using Benchling\u2019s CRISPR design tool to search the RNU4-2 locus, including upstream and downstream regions of low sequence homology to RNU4-1 and pseudogenes, identifying a candidate with high on-target and low off-target scores. The selected gRNA was not predicted to target RNU4-1, owing to eight mismatches occurring in the protospacer and PAM. The gRNA spacer sequence was ligated into the pX459 backbone as previously described33.","In brief, complementary primers containing the spacer were ordered from IDT (Supplementary Table 3), phosphorylated, hybridized and ligated into the pX459 linearized backbone followed by PlasmidSafe DNase (Lucigen) digestion.","Next, 2\u2009\u00b5l of the ligation reaction were transformed in NEB Stable Competent Escherichia coli cells using the high-efficiency transformation protocol and 75\u2009\u00b5l of transformant was plated on ampicillin-resistant plates and cultured overnight at 30\u2009\u00b0C. Three colonies were then picked and grown overnight at 37\u2009\u00b0C in 7\u2009ml of Luria\u2013Bertani medium supplemented with carbenicillin (100\u2009\u00b5g\u2009ml\u22121). Plasmid DNA was extracted using the QIAprep Spin Miniprep kit (Qiagen) and verified using Plasmidsaurus whole-plasmid sequencing. The selected clone was then grown in 100\u2009ml of Luria\u2013Bertani medium at 37\u2009\u00b0C in a shaking incubator supplemented with carbenicillin.","The cells were then pelleted and the plasmid was extracted using a ZymoPure Maxiprep kit (Zymo Research), endotoxins were removed using EndoZero columns (Zymo Research) and the product was quantified with the Qubit double-stranded DNA (dsDNA) BR assay kit (Invitrogen).\r\nHDR library cloning\r\nAn oligonucleotide library comprising RNU4-2 variants was manufactured by Twist Bioscience and subsequently cloned into a vector containing homology arms for RNU4-2 to make the HDR library for SGE.\r\n\r\nTo generate the vector with homology arms, a nested PCR was performed on genomic DNA (gDNA) extracted from HAP1 cells10 using primers designed to generate homology arms of 700\u2013800\u2009base pairs (bp) flanking RNU4-2 (Supplementary Table 3).","The PCR was performed using the Kapa HiFi HotStart ReadyMix (Roche). The product was purified using AmpureXP (Beckman Coulter) magnetic beads at 1.2\u00d7 volume and eluted in 12\u2009\u00b5l of nuclease-free water. The amplicon containing RNU4-2 homology arms was then inserted in the linearized pUC19 backbone using In-Fusion HD cloning (Takara) and 2\u2009\u00b5l of cloning reaction was transformed into NEB Stable cells following the manufacturer\u2019s 5-min transformation protocol. Cells were plated on agar plates containing ampicillin and incubated at 30\u2009\u00b0C overnight. The pUC19 plasmid containing RNU4-2 homology arms (pUC19-RNU4-2-HA) was purified and sequence-verified from a successfully transformed clone.","pUC19-RNU4-2-HA was then diluted to 8.7\u2009pg in a 50-\u00b5l PCR reaction and amplified with Kapa HiFi to obtain a linearized product with 17\u201318\u2009bp complementarity to the RNU4-2 oligo library. A PAM-blocking mutation was introduced 27\u2009nt upstream of the RNU4-2 sequence (chromosome 12:120291930-C-G) by means of primer overhang extension during PCR. The location of the PAM-disrupting edit was selected to minimize recutting by Cas9, converting a 5\u2032-GGG PAM sequence to 5\u2032-GCG. The PAM-disrupting edit had a CADD score of 4.20 (Phred) and a 100 vertebrates PhyloP score of 0.11. The reaction was treated with 1\u2009\u00b5l of DpnI (NEB) for 30\u2009min at 37\u2009\u00b0C, gel extracted and quantified.","Then, the RNU4-2 oligo library was amplified using Kapa HiFi and purified using AmpureXP (1.2\u00d7). The amplified library and linearized pUC19-RNU4-2-HA plasmid were then assembled using the In-Fusion HD cloning kit, and the product was transformed into NEB Stable cells using the high-efficiency transformation protocol. To quantify efficiency, 1% of cells in the transformation reaction were plated and the remainder were cultured in 100\u2009ml of Luria\u2013Bertani medium with carbenicillin overnight at 37\u2009\u00b0C. Cells were then pelleted by centrifugation and the final RNU4-2 HDR library was extracted using the ZymoPure Maxiprep kit (Zymo Research) with endotoxin removal.","The isolated HDR library was quantified with a Qubit dsDNA BR assay kit and sequence-verified by Plasmidsaurus.\r\nHAP1 cell culture\r\nHAP1 cells used for SGE (the\u00a0HAP1-LIG4-KO line;\u00a0herein referred to as \u2018HAP1\u2019)\u00a0show increased rates of editing by HDR due to a frameshifting mutation in LIG4 (ref. 10). Frozen HAP1 cells were thawed at 37\u2009\u00b0C in a water bath, then supplemented with 10\u2009ml of prewarmed Iscove\u2019s Modified Dulbecco\u2019s Medium (IMDM) containing l-glutamine, 25\u2009nM HEPES (Gibco), 10% FBS (Gibco), 1% penicillin\u2013streptomycin (Gibco) and 2.5\u2009\u03bcM 10-deacetyl-baccatin-III (DAB, Stratech), herein referenced to as IMDMc. Cells were centrifuged at 300g for 3\u2009min.","The supernatant was then aspirated and the cells were resuspended in fresh media, plated on a 10-cm dish and cultured at 37\u2009\u00b0C with 5% CO2. The next day, the IMDMc media was replaced, and cells were cultured routinely from that point forward.\r\n\r\nThe HAP1 subculture routine included a 1:5 split every 48\u2009h or 1:10 split every 72\u2009h to prevent cells from exceeding 80% confluency. To split cells, the media was aspirated and the dish washed with 10\u2009ml of room-temperature Dulbecco\u2019s PBS (Gibco). Following Dulbecco\u2019s PBS aspiration, the cells were treated with 1\u2009ml of 0.25% trypsin\u2013EDTA (Gibco) and incubated for 3\u2009min at 37\u2009\u00b0C.","Next 14\u2009ml of prewarmed IMDMc was then added and cells were collected and centrifuged at 300g for 5\u2009min. Cells were then resuspended in 10\u2009ml of IMDMc, counted and seeded on a 10-cm dish.\r\nGeneration of diploid HAP1 cells\r\nParental HAP1 cells were cultured for 9\u2009days after thawing in IMDMc without DAB supplementation to allow for the spontaneous occurrence of diploid cells.","On day 10, cells were stained with 5\u2009\u00b5g\u2009ml\u22121 Hoechst working solution (Thermo Fisher Scientific) for 1\u2009h at 37\u2009\u00b0C, followed by fluorescence-activated cell sorting to select diploid cells using a BD FACSAria Fusion Flow Cytometer. Diploid cells were sorted on the basis of their G2\/M peak (4n), with gates established using a monoclonal diploid HAP1 control population. Sorted diploid HAP1 cells were then expanded for 10\u2009days in IMDMc without DAB supplementation before the subsequent SGE experiment.\r\nTransfection and selection\r\nThe day before transfection, 12\u2009million cells were seeded on a 10-cm dish for each replicate and 2\u2009million cells were seeded on a six-well plate for the negative control sample.","On the day of transfection (day 0), a transfection mix containing 10\u2009\u00b5g of HDR library, 30\u2009\u00b5g of the pX459 gRNA plasmid and 24\u2009\u00b5l of Xfect polymer (Takara) in a final volume of 800\u2009\u00b5l was prepared according to the manufacturer\u2019s instructions for each replicate. For the negative control sample, a pX459 plasmid with a gRNA targeting HPRT1 (ref. 13) instead of RNU4-2 was used to prevent successful editing, and the transfection volume mix was scaled down eightfold. Following transfection, cells were incubated for 24\u2009h at 37\u2009\u00b0C and supplemented with prewarmed IMDMc with 1\u2009\u00b5g\u2009ml\u22121 puromycin (Cayman Chemical).","On day 4, half of the cells for each replicate were collected for gDNA extraction and stored as a pellet at \u221270\u2009\u00b0C; the rest were kept in culture in 15-cm dishes supplemented with 15\u2009ml of IMDMc. The negative control sample was collected when reaching 70% confluency at day 6. A second sample of 10\u2009million cells per replicate was collected at day 14 and stored at \u221270\u2009\u00b0C.\r\nSequencing library preparation\r\ngDNA was extracted from cells using QIAshredder (Qiagen) columns followed by the Allprep DNA\/RNA kit (Qiagen) according to the manufacturer\u2019s instructions. Concentrations were determined using the Qubit dsDNA BR assay kit.","The RNU4-2 locus was subsequently amplified using nested PCR to avoid amplification of plasmid DNA, followed by an indexing PCR, in total using three primer sets (Supplementary Table 3). For the first reaction, the total gDNA template from each condition was partitioned into separate reactions, each containing 1.25\u2009\u00b5g of DNA in a 100\u2009\u00b5l reaction volume, using NEBNext Ultra II Q5 master mix (NEB) supplemented with MgCl2 (Ambion) to a final concentration 4\u2009mM. The amplification reaction was monitored by quantitative PCR (qPCR) using SYBR green (Invitrogen) and stopped before completion.","The reactions for each sample were pooled and mixed before 50\u2009\u00b5l of each product was purified using AmpureXP (1.2\u00d7) and eluted in 15\u2009\u00b5l of nuclease-free water. Then 1\u2009\u00b5l of purified product was loaded into the second qPCR reaction (50\u2009\u00b5l final volume) and amplified using NEBNext Ultra II Q5. The reaction was again monitored using SYBR green and stopped before completion. The AmpureXP purification was then repeated, and a final qPCR (NEBNext Ultra II Q5) to incorporate sample indexes and sequencing adapters was performed using 1\u2009\u00b5l of purified product as template in a 50\u2009\u00b5l reaction for 8 cycles. Final products were purified and quantified with the Qubit dsDNA HS kit.","The samples were then pooled for sequencing, aiming for 5\u2009million reads per experimental replicate timepoint, 2\u2009million reads for the negative control sample and 1\u2009million reads for the HDR library. The pool was purified using AmpureXP (1\u00d7), quantified and loaded on a Novaseq X sequencer (Illumina).\r\nVariant frequency quantification\r\nThe fastq files were de-multiplexed using the bcl2fastq script and the variants were quantified as previously described13.","In brief, paired-end reads were adapter trimmed and merged, and reads containing N bases were discarded. HDR editing rates were computed from fastq files directly as the fraction of reads containing the exact PAM-blocking mutation. Fastq files were then aligned to a reference RNU4-2 sequence and the frequency of each variant included in the library was determined.\r\nFunction score calculation\r\nAll variants were observed in the library and day 4 at a frequency higher than 10\u22124, and were therefore included in downstream analyses.","Function scores for library variants were first calculated per replicate, computed as the log2 ratio of day 14 to day 4 variant frequencies, normalized by subtracting the median function score of negative control insertions from all scores. Final function scores were then calculated for each variant by averaging function scores across replicates, again normalizing to the median of negative control insertions such that the median final function score of control insertions equals 0. For each variant, P values were determined using the norm.cdf function in Python, defining a normal distribution from the mean and standard deviation of function scores for negative control insertions.","The P values were corrected for multiple hypothesis testing using the multipletests function in Python (Benjamini\u2013Hochberg procedure) to derive q values. Significantly depleted variants were defined as those with q\u2009<\u20090.01, corresponding to a function score below \u22120.302. We further classified depleted variants into two categories using an arbitrary function score threshold of \u22120.9 to include sufficient variants and individuals per category to assess for phenotypic differences.\r\nVariant scoring with CADD and ViennaRNA\r\nVariants were annotated as ReNU syndrome variants if they were reported in ref. 1 or classified as pathogenic or likely pathogenic in ref. 4.","Variants were annotated with whether or not they were observed in the 490,640 genome sequenced individuals from the UK Biobank18 (DRAGEN pipeline) or in 414,840 individuals from All of Us V8. CADD v.1.7 (ref. 19) annotations were obtained by uploading a synthetic VCF to the online annotation tool (https:\/\/cadd.gs.washington.edu\/score). As we preselected which insertions and deletions to include in the SGE assay (because of assay size limitations), we restricted analyses involving CADD to SNVs within the RNU4-2 transcript.\r\n\r\nFor variants assayed within the RNU4-2 transcript, predicted changes in U4\/U6 interaction stability (\u0394\u0394Gbind) were computed using the ViennaRNA package34 (v.2.7.0).","Minimum free energies (MFEs) were obtained by use of RNA.fold_compound() at 37\u2009\u00b0C using default Turner RNA thermodynamic parameters. U4\/U6 pairing was modelled with the ViennaRNA cofold grammar by providing sequences in the dimer format (u4(AGCUUUGCGCAGUGGCAGUAUCGUAGCCAAUGAGGUUUAUCCGAGGCGCGAUUAUUGCUAAUUGAAAACUUUUCCCAAUACCCCGCCAUGACGACUUGAAAUAUAGUCGGCAUUGGCAAUUUUUGACAGUCUCUACGGAGACUGA).\r\n\r\n+ \u2018&\u2019 + u6(GUGCUCGCUUCGGCAGCACAUAUACUAAAAUUGGAACGAUACAGAGAAGAUUAGCAUGGCCCCUGCGCAAGGAUGACACGCAAAUUCGUGAAGCGUUCCAUAUUUU), and the intermolecular MFE was extracted using mfe_dimer().","Single-strand MFEs for U4 and U6 were computed independently using mfe().\r\n\r\nBinding free energy was defined as:\r\n\r\n$$\\Delta {G}_{{\\rm{bind}}}=\\Delta {G}_{{\\rm{complex}}}-(\\Delta {G}_{{\\rm{U}}4}+\\Delta {G}_{{\\rm{U}}6})$$\r\n\r\nThe same procedure was applied to RNU4-2 variant sequences, and differential stability was then calculated as:\r\n\r\n$$\\Delta \\Delta {G}_{{\\rm{b}}{\\rm{i}}{\\rm{n}}{\\rm{d}}}=\\Delta {G}_{{\\rm{b}}{\\rm{i}}{\\rm{n}}{\\rm{d}}.{\\rm{v}}{\\rm{a}}{\\rm{r}}{\\rm{i}}{\\rm{a}}{\\rm{n}}{\\rm{t}}}-\\Delta {G}_{{\\rm{b}}{\\rm{i}}{\\rm{n}}{\\rm{d}}.{\\rm{r}}{\\rm{e}}{\\rm{f}}{\\rm{e}}{\\rm{r}}{\\rm{e}}{\\rm{n}}{\\rm{c}}{\\rm{e}}}$$\r\n\r\nPositive \u0394\u0394Gbind values indicate predicted destablization of U4\/U6 pairing.\r\n\r\nVariants were mapped to the following structural regions of RNU4-2: Stem II (n.3 to n.16), k-turn within the 5\u2032 Stem loop (n.27 to n.35 and n.41 to n.46), Stem I (n.56 to n.62), T-loop (n.63 to n.70), Stem III (n.75 to n.79), 3\u2032 Stem loop (n.85 to n.117), Sm protein (n.118 to n.126) and terminal Stem loop (n.127 to n.144).\r\n\r\nROC area under the curve (AUC) values were calculated by assigning a 1 label to ReNU syndrome SNVs and a 0 label for SNVs observed in UK Biobank or All of Us.","The labels and corresponding function scores were used to compute false positive and true positive rates (using Python\u2019s roc_curve function), as well as ROC-AUC values (using the roc_auc_score function). This analysis was also restricted to SNVs only.\r\nAssigning evidence codes to variants based on function score\r\nWe followed established guidelines8 to calibrate function scores from SGE experiments in haploid cells to evidence strengths for classification of ReNU syndrome variants.","To do so, we defined a gold standard set of pathogenic, dominantly inherited variants as the 17 previously reported4 as \u2018pathogenic\u2019 or \u2018likely pathogenic\u2019 for which we derived function scores. Few RNU4-2 variants have been deemed benign in ClinVar, so we instead used reported allele counts in the UK Biobank and All of Us studies to define a neutral set of variants. This included all 45 assayed variants with a combined allele count of more than 100 between the two studies. A two-component Gaussian mixture model was then fit from the function score distributions of these variant sets, using the \u2018Mclust\u2019 package in R.","This model was then used to determine the probability of pathogenicity for each variant in the CR based on function score. The resulting posterior probabilities were then converted to OddsPath values using a uniform prior of 0.5, and evidence codes were assigned according to established OddsPath thresholds8 with the exception that PS3 evidence was capped at strong (+4 points), in line with the limited number of gold standard variants available for calibration.","We did not apply the model to variants outside the CR on account of there being no known pathogenic variants for ReNU syndrome in these regions.\r\nPhenotype severity and clustering\r\nCategorical data for 44 clinical features from 143 patients with pathogenic and likely pathogenic RNU4-2 variants4 were transformed into a 0\u20131 scale, with 0 indicating a more favourable phenotype and 1 a more severe presentation. Principal component analysis was generated after imputing missing data with 0 and performing variable scaling. UMAP representation was created using the umap package in R.","Two-sided Fisher\u2019s tests with Bonferroni adjustment to account for four tests were used to compare clinical features between SGE function score variant categories (strong versus moderate) in Extended Data Table 1.\r\nRNA sequencing cluster analysis\r\nRNA sequencing from cultured lymphocytes was performed following the protocol described in ref. 4 for RNU4-2 and\u00a0rMATS-turbo (v.4.3.0)35 was run on 19 ReNU samples and 20 control participants (excluding one individual previously deemed a control in ref. 4 who was here found to be a recessive RNU4-2 case); 101 significant alternative non-canonical 5\u2032 splice sites (A5SS) events (false discovery rate less than 0.1, \u0394PSI\u2009>\u20090.05) were retained.","Then rMATS-turbo was rerun on the 19 ReNU samples, the 20 control participants, without statistical or \u0394PSI filtering. The A5SS output was filtered on the 101 retained events and the PSI values were extracted to perform the principal component analysis.\r\nAssociation testing in UK Biobank\r\nWe extracted phenotypes associated with educational attainment from UK Biobank following an approach published previously36. Fluid intelligence scores (field ID 20016) were retrieved for all participants. Where many scores were recorded, the median value was taken. Age left education was calculated as the maximum value in age completed full time education (field ID 845).","Diagnosis with childhood developmental disorder was defined using the ICD codes for intellectual disability (ICD-10: F70\u2013F73, F78, F79; ICD-9: 317, 318, 319), epilepsy (ICD-10: G40), global developmental disorders (ICD-10: F80\u2013F84, F88\u2013F95, R62, R48, Z55; ICD-9: 299, 312, 313, 314, 315) and congenital malformations (ICD-10: Q0\u2013Q99, ICD-9: 740\u2013759).\r\n\r\nWe identified UK Biobank participants with: (1) depleted variants in the 18-bp RNU4-2 CR (n\u2009=\u20096), (2) depleted variants outside the CR (n\u2009=\u200950) and (3) participants with non-depleted SNVs outside the CR (n\u2009=\u200912,132).","We performed multiple linear regression on fluid intelligence scores and age left education, and multiple logistic regression on childhood developmental disorder for variant groups (2) and (3) defined above, compared with all individuals without any variants in any of the three groups. Age at recruitment (field ID 21022), age2 (age at recruitment\u2009\u00d7\u2009age at recruitment), sex (field ID 31) and first ten genetic principal components (field ID 22009) were included as covariates.","P values were false discovery rate-corrected using the Benjamini\u2013Hochberg method.\r\nInvestigating RNU4ATAC variants in ClinVar\r\nVariants in RNU4ATAC with classifications of pathogenic, likely pathogenic, pathogenic or likely pathogenic, benign, likely benign or benign or likely benign were downloaded from the ClinVar37 website on 4 March 2025. Two regions of RNU4-2 and RNU4ATAC with identical structures were defined, mapping to the k-turn (RNU4-2 nucleotides 26\u201352; RNU4ATAC nucleotides 31\u201357) and the Sm protein binding site (RNU4-2 nucleotides 115\u2013126; RNU4ATAC nucleotides 113\u2013124).","Variants at the same nucleotide in the structure and where the reference bases in RNU4-2 and RNU4ATAC are identical, were marked as \u2018equivalent\u2019.\r\nIdentifying biallelic variants in cohorts\r\nWe searched rare disease cohorts for individuals with biallelic variants in RNU4-2. These cohorts included the Genomics England 100,000 Genomes Project and NHS Genomic Medicine Service datasets accessed through the UK National Genomic Research Library38, the SeqOIA and Auragen clinical cohorts in France (PFMG2025), the Undiagnosed Disease Network, the Broad Institute Center for Mendelian Genomics and GREGoR (Genomics Research to Elucidate the Genetics of Rare Diseases)39 Consortium cohorts.","We only included individuals with homozygous variants with function scores less than \u22120.302, or compound heterozygous variants in which both had function scores less than \u22120.302 (n\u2009=\u200920). All individuals had previous genome analysis including investigation of variants in known NDD genes and large structural variants. One individual (individual 17) had a reported likely pathogenic variant in GLI3; however, this variant did not explain all of their reported phenotypes (see ref. 21 for more details).\r\nEthics\r\nInformed consent was obtained for all participants included in this study from their parent(s) or legal guardian, with the study approved by the local regulatory authority.","The 100,000 Genomes Project Protocol has ethical approval from the Health Research Authority Committee East of England Cambridge South (Research Ethics Committee ref. 14\/EE\/1112). This study was approved by Genomics England under Research Registry Projects 354.","Health related research in UK Biobank was approved by the Research Ethics Committee under reference 20\/NW\/0274 with this research conducted under application number 81050.\r\n\r\nWe received an exception to the Data and Statistics Dissemination Policy from the All of Us Resource Access Board to report questionnaire response data for the single individual with a homozygous depleted variant as well as variant counts below 20 for all variants in RNU4-2.\r\nReporting summary\r\nFurther information on research design is available in the\u00a0Nature Portfolio Reporting Summary linked to this article.\r\n\nData availability\r\nSGE data including all RNU4-2 function scores are available in Supplementary Table 1.","Fastq files from SGE experiments are available through the European Nucleotide Archive at accession PRJEB87505. RNA sequencing data (Fig. 3d) were taken from ref. 4 and are available in the European Genome\u2013Phenome Archive at http:\/\/www.ebi.ac.uk\/ega; study accession EGAS50000000889. UK Biobank and All of Us V8 data are available to researchers on approval of application (https:\/\/www.ukbiobank.ac.uk\/use-our-data\/apply-for-access\/; https:\/\/www.researchallofus.org\/).\r\n\nCode availability\r\nCustom scripts used to analyse SGE experiments and generate figures are available at GitHub (https:\/\/github.com\/FrancisCrickInstitute\/RNU4-2_Saturation_Genome_Editing).\r\n\nReferences\r\nChen, Y. et al.","De novo variants in the RNU4-2 snRNA cause a frequent neurodevelopmental syndrome. Nature 632, 832\u2013840 (2024).\r\n\r\nGreene, D. et al. Mutations in the U4 snRNA gene RNU4-2 cause one of the most prevalent monogenic neurodevelopmental disorders. Nat. Med.","30, 2165\u20132169 (2024).\r\n\r\nNguyen, T. H. D. et al. The architecture of the spliceosomal U4\/U6.U5 tri-snRNP. Nature 523, 47\u201352 (2015).\r\n\r\nArticle\nADS\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nNava, C. et al. Dominant variants in major spliceosome U4 and U5 small nuclear RNA genes cause neurodevelopmental disorders through splicing disruption. Nat. Genet.","57, 1374\u20131388 (2025).\r\n\r\nArticle\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nBruselles, A. et al. Expanding the mutational spectrum of ReNU syndrome: insights into 5\u2032 stem-loop variants. Eur. J. Hum. Genet.","33, 432\u2013440 (2025).\r\n\r\nSeplyarskiy, V. et al. A mutation rate model at the basepair resolution identifies the mutagenic effect of polymerase III transcription. Nat. Genet.","55, 2235\u20132242 (2023).\r\n\r\nArticle\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nRentzsch, P.,","Witten, D.,","Cooper, G.","M.,","Shendure, J.","& Kircher, M. CADD: predicting the deleteriousness of variants throughout the human genome. Nucleic Acids Res.","47, D886\u2013D894 (2019).\r\n\r\nArticle\nCAS\nPubMed\nGoogle Scholar\r\n\r\nBrnich, S. E. et al. Recommendations for application of the functional evidence PS3\/BS3 criterion using the ACMG\/AMP sequence variant interpretation framework. Genome Med.","12, 3 (2019).\r\n\r\nArticle\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nFindlay, G.","M.,","Boyle, E.","A.,","Hause, R.","J.,","Klein, J. C. & Shendure, J. Saturation editing of genomic regions by multiplex homology-directed repair. Nature 513, 120\u2013123 (2014).\r\n\r\nArticle\nADS\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nFindlay, G. M. et al. Accurate classification of BRCA1 variants with saturation genome editing. Nature 562, 217\u2013222 (2018).\r\n\r\nArticle\nADS\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nMeitlis, I. et al. Multiplexed functional assessment of genetic variants in CARD11. Am. J. Hum. Genet.","107, 1029\u20131043 (2020).\r\n\r\nArticle\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nRadford, E. J. et al. Saturation genome editing of DDX3X clarifies pathogenicity of germline and somatic variation. Nat. Commun.","14, 7702 (2023).\r\n\r\nArticle\nADS\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nBuckley, M. et al. Saturation genome editing maps the functional spectrum of pathogenic VHL alleles. Nat. Genet.","56, 1446\u20131455 (2024).\r\n\r\nArticle\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nWaters, A. J. et al. Saturation genome editing of BAP1 functionally classifies somatic and germline variants. Nat. Genet.","56, 1434\u20131445 (2024).\r\n\r\nArticle\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nHsu, P. D. et al. DNA targeting specificity of RNA-guided Cas9 nucleases. Nat. Biotechnol.","31, 827\u2013832 (2013).\r\n\r\nArticle\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nOlvera-Le\u00f3n, R. et al. High-resolution functional mapping of RAD51C by saturation genome editing.","Cell 187, 5719\u20135734.e19 (2024).\r\n\r\nArticle\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nHuang, H. et al. Functional evaluation and clinical classification of BRCA2 variants. Nature 638, 528\u2013537 (2025).\r\n\r\nArticle\nADS\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nSudlow, C. et al. UK Biobank: an open access resource for identifying the causes of a wide range of complex diseases of middle and old age. PLoS Med.","12, e1001779 (2015).\r\n\r\nArticle\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nSchubach, M.,","Maass, T., Nazaretyan, L.,","R\u00f6ner, S.","& Kircher, M. CADD v1.7: using protein language models, regulatory CNNs and other nucleotide-level scores to improve genome-wide variant predictions. Nucleic Acids Res.","52, D1143\u2013D1154 (2024).\r\n\r\nArticle\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nQuinodoz, M. et al. De novo and inherited dominant variants in U4 and U6 snRNA genes cause retinitis pigmentosa. Nat. Genet.","58, 169\u2013179 (2026).\r\n\r\nArticle\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nRius, R. et al. Biallelic variants in the noncoding RNA gene RNU4-2 cause a recessive neurodevelopmental syndrome with distinct white matter changes. Nat. Genet. https:\/\/doi.org\/10.1038\/s41588-026-02554-6 (2026).\r\n\r\nLiu, S. et al. Binding of the human Prp31 Nop domain to a composite RNA-protein platform in U4 snRNP. Science 316, 115\u2013120 (2007).\r\n\r\nArticle\nADS\nCAS\nPubMed\nGoogle Scholar\r\n\r\nNottrott, S. et al. Functional interaction of a novel 15.5kD [U4\/U6\u00b7U5] tri-snRNP protein with the 5\u2032 stem\u2013loop of U4 snRNA. EMBO J. https:\/\/doi.org\/10.1093\/emboj\/18.21.6119 (1999).\r\n\r\nPannone, B. K.","& Wolin, S. L. Sm-like proteins wRING the neck of mRNA. Curr. Biol.","10, R478\u201381 (2000).\r\n\r\nArticle\nCAS\nPubMed\nGoogle Scholar\r\n\r\nHu, J.,","Xu, D.,","Schappert, K.,","Xu, Y.","& Friesen, J. D. Mutational analysis of Saccharomyces cerevisiae U4 small nuclear RNA identifies functionally important domains. Mol. Cell. Biol.","15, 1274\u20131285 (1995).\r\n\r\nArticle\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nEdery, P. et al. Association of TALS developmental disorder with defect in minor splicing component U4atac snRNA. Science 332, 240\u2013243 (2011).\r\n\r\nArticle\nADS\nCAS\nPubMed\nGoogle Scholar\r\n\r\nFarach, L. S. et al. The expanding phenotype of RNU4ATAC pathogenic variants to Lowry Wood syndrome. Am. J. Med. Genet.","A 176, 465\u2013469 (2018).\r\n\r\nArticle\nCAS\nPubMed\nGoogle Scholar\r\n\r\nMerico, D. et al. Compound heterozygous mutations in the noncoding RNU4ATAC cause Roifman Syndrome by disrupting minor intron splicing. Nat. Commun.","6, 8718 (2015).\r\n\r\nArticle\nADS\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nOlthof, A. M. et al. Disruption of exon-bridging interactions between the minor and major spliceosomes results in alternative splicing around minor introns. Nucleic Acids Res.","49, 3524\u20133545 (2021).\r\n\r\nArticle\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nArriaga, T. M. et al. Transcriptome-wide outlier approach identifies individuals with minor spliceopathies. Am. J. Hum. Genet.","112, 2458\u20132475 (2025).\r\n\r\nArticle\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nJackson, A. et al. Analysis of R-loop forming regions identifies RNU2-2 and RNU5B-1 as neurodevelopmental disorder genes. Nat. Genet.","57, 1362\u20131366 (2025).\r\n\r\nArticle\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nGreene, D. et al. Mutations in the small nuclear RNA gene RNU2-2 cause a severe neurodevelopmental disorder with prominent epilepsy. Nat. Genet.","57, 1367\u20131373 (2025).\r\n\r\nArticle\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nRan, F. A. et al. Genome engineering using the CRISPR\u2013Cas9 system. Nat. Protoc.","8, 2281\u20132308 (2013).\r\n\r\nArticle\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nLorenz, R. et al. ViennaRNA package 2.0. Algorithms Mol. Biol.","6, 26 (2011).\r\n\r\nArticle\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nWang, Y. et al. rMATS-turbo: an efficient and flexible computational tool for alternative splicing analysis of large-scale RNA-seq data. Nat. Protoc.","19, 1083\u20131104 (2024).\r\n\r\nArticle\nCAS\nPubMed\nGoogle Scholar\r\n\r\nKingdom, R.,","Beaumont, R.","N.,","Wood, A.","R.,","Weedon, M. N.","& Wright, C. F. Genetic modifiers of rare variants in monogenic developmental disorder loci. Nat. Genet.","56, 861\u2013868 (2024).\r\n\r\nArticle\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nLandrum, M. J. et al. ClinVar: public archive of relationships among sequence variation and human phenotype. Nucleic Acids Res.","42, D980\u2013D985 (2014).\r\n\r\nArticle\nCAS\nPubMed\nGoogle Scholar\r\n\r\nGenomics England. National Genomic Research Library. Dataset. figshare https:\/\/doi.org\/10.6084\/m9.figshare.4530893.v8 (2025).\r\n\r\nDawood, M. et al. GREGoR: accelerating genomics for rare diseases. Nature 647, 331\u2013342 (2025).\r\n\r\nArticle\nCAS\nPubMed\nPubMed Central\nGoogle Scholar\r\n\r\nDownload references\r\nAcknowledgements\r\nWe thank the Crick\u2019s Genomics Scientific Technology Platform for performing sequencing and the Flow Cytometry and Cell Sciences Scientific Technology Platforms for assisting in maintaining cell lines. We also thank P. O\u2019Donovan, M. Sato and E. Miller from the Genomics England Airlock team. N.W.","is supported by a Sir Henry Dale Fellowship jointly funded by the Wellcome Trust and the Royal Society (grant 220134\/Z\/20\/Z), a Lister Institute research prize and grant funding from Novo Nordisk. Y.C. is supported by a studentship from Novo Nordisk. The Francis Crick Institute receives its core funding (G.M.F.) from Cancer Research UK (grant CC2190), the UK Medical Research Council (grant CC2190) and the Wellcome Trust (grant CC2190). G.M.F. is supported by a European Research Council Starting grant (Seq2Func-NC). A.J.M.B. is supported by a Wellcome PhD Training Fellowship for Clinicians and the 4Ward North PhD Programme for Health Professionals (grant 223521\/Z\/21\/Z). C.D.","is supported by research grants from the Deutsche Forschungsgemeinschaft (DFG) (project grants 455314768, 458099954 and 505514143). C.N. has received support from the Health philanthropic program of Mutuelles AXA dedicated to supporting innovative research projects in France (RNU-SPLICE project). Patients 4, 5, 6, 13, 14, 15 and 16 included in this study were diagnosed through Plan France M\u00e9decine G\u00e9nomique 2025 (PFMG2025). Patients 11 and 12 were sequenced at the Baylor College of Medicine Human Genome Sequencing Center through the GREGoR Consortium with support from US National Human Genome Research Institute grants U01HG011758 and U54HG003273.","Analysis of individuals 9 and 10 was supported by National Human Genome Research Institute grant R01HG009141. D.G.C. was supported by the Child Neurologist Career Development Program CNCDP-K12 (US National Institute of Neurological Disorders and Strokes grant NS098482). C.A.-T. is supported in part by the National Human Genome Research Institute grant U01HG011755 (GREGoR consortium). O.M. is supported by the Hazem Ben-Gacem Tunisia Medical Fellowship Fund. Research reported in this publication was supported by the National Institute Of Neurological Disorders And Stroke of the National Institutes of Health under grant awards U01HG010218 and U01HG010233.","The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health. This research was made possible through access to data in the National Genomic Research Library, which is managed by Genomics England Limited (a wholly owned company of the Department of Health and Social Care). The National Genomic Research Library holds data provided by patients and collected by the NHS as part of their care and data collected as part of their participation in research. The National Genomic Research Library is funded by the National Institute for Health Research and NHS England.","The Wellcome Trust, Cancer Research UK and the Medical Research Council have also funded research infrastructure. This study was registered with Genomics England under Research Registry Projects 354. This research has been conducted using the UK Biobank Resource under application number 81050. We gratefully acknowledge All of Us and UK Biobank participants for their contributions. We also thank the National Institutes of Health\u2019s All of Us Research Program for making available the participant and variant data examined in this study.","For the purpose of Open Access, the authors have applied a CC BY public copyright licence to any Author Accepted Manuscript version arising from this submission.\r\nAuthor information\r\nAuthor notes\r\n\r\nThese authors contributed equally: Nicola Whiffin, Gregory M. Findlay\r\nAuthors and Affiliations\r\nThe Genome Function Laboratory, The Francis Crick Institute, London, UK\r\n\r\nJoachim De Jonghe,\u00a0Ayanfeoluwa Adedeji,\u00a0Christina M. Kajba\u00a0&\u00a0Gregory M.","Findlay\r\n\r\nBig Data Institute, University of Oxford, Oxford, UK\r\n\r\nHyung Chul Kim,\u00a0Ruebena Dawes,\u00a0Yuyang Chen\u00a0&\u00a0Nicola Whiffin\r\n\r\nCentre for Human Genetics, University of Oxford, Oxford, UK\r\n\r\nHyung Chul Kim,\u00a0Ruebena Dawes,\u00a0Yuyang Chen\u00a0&\u00a0Nicola Whiffin\r\n\r\nDepartment of Biochemical Engineering, University College London, London, UK\r\n\r\nAyanfeoluwa Adedeji\r\n\r\nInstitute of Human Genetics, University Hospital Essen, University Duisburg-Essen, Essen, Germany\r\n\r\nElsa Leit\u00e3o\u00a0&\u00a0Christel Depienne\r\n\r\nNantes Universit\u00e9, CHU de Nantes, CNRS, INSERM, L\u2019Institut du Thorax, Nantes, France\r\n\r\nBenjamin Cogn\u00e9\r\n\r\nNantes Universit\u00e9, CHU de Nantes, CNRS, INSERM, G\u00e9n\u00e9tique m\u00e9dicale, Nantes, France\r\n\r\nBenjamin Cogn\u00e9\r\n\r\nManchester Centre for Genomic Medicine, Division of Evolution and Genomic Sciences, School of Biological Sciences, Faculty of Biology, Medicine and Health, University of Manchester, Manchester, UK\r\n\r\nAlexander J.","M. Blakes\r\n\r\nCentre for Population Genomics, Garvan Institute of Medical Research, Sydney, New South Wales, Australia\r\n\r\nCas Simons,\u00a0Rocio Rius\u00a0&\u00a0Daniel G. MacArthur\r\n\r\nCentre for Population Genomics, Murdoch Children\u2019s Research Institute, Melbourne, Victoria, Australia\r\n\r\nCas Simons,\u00a0Rocio Rius\u00a0&\u00a0Daniel G. MacArthur\r\n\r\nDepartment of Pediatric Neurology, University of Child Health Sciences, The Children\u2019s Hospital, Lahore, Pakistan\r\n\r\nJaveria R.","Alvi\u00a0&\u00a0Tipu Sultan\r\n\r\nService de G\u00e9n\u00e9tique, G\u00e9nomique et Procr\u00e9ation, CHU Grenoble Alpes, Grenoble, France\r\n\r\nFlorence Amblard,\u00a0Charles Coutton,\u00a0Radu Harbuz\u00a0&\u00a0Julien Thevenon\r\n\r\nGCS AURAGEN, Lyon, France\r\n\r\nFlorence Amblard,\u00a0Charles Coutton\u00a0&\u00a0Julien Thevenon\r\n\r\nUniversit\u00e9 Grenoble Alpes, INSERM U 1209, CNRS UMR 5309, Institut for Advanced Biosciences, Grenoble, France\r\n\r\nFlorence Amblard,\u00a0Charles Coutton\u00a0&\u00a0Julien Thevenon\r\n\r\nBroad Center for Mendelian Genomics, Program in Medical and Population Genetics, Broad Institute of MIT and Harvard, Cambridge, MA, USA\r\n\r\nChristina Austin-Tse,\u00a0Olfa Messaoud\u00a0&\u00a0Nicola Whiffin\r\n\r\nService de p\u00e9diatrie, H\u00f4pitaux Universitaires de Strasbourg, Strasbourg, France\r\n\r\nSarah Baer\r\n\r\nDepartment of Medicine, University of Washington School of Medicine, Seattle, WA, USA\r\n\r\nElsa V.","Balton\u00a0&\u00a0Nitsuh Dargie\r\n\r\nLaboratoire SeqOIA, Paris, France\r\n\r\nPierre Blanc,\u00a0Olivier Grunewald,\u00a0Paul Gueguen,\u00a0Pierre Marijon\u00a0&\u00a0Caroline Nava\r\n\r\nSection of Pediatric Neurology, Department of Pediatrics, Baylor College of Medicine, Houston, TX, USA\r\n\r\nDaniel G. Calame\r\n\r\nTexas Children\u2019s Hospital, Houston, TX, USA\r\n\r\nDaniel G. Calame\r\n\r\nVictorian Clinical Genetics Services, Murdoch Children\u2019s Research Institute, Melbourne, Victoria, Australia\r\n\r\nChloe A. Cunningham\u00a0&\u00a0Richard J. Leventer\r\n\r\nDepartment of Paediatrics, University of Melbourne, Melbourne, Victoria, Australia\r\n\r\nChloe A. Cunningham\u00a0&\u00a0Richard J. Leventer\r\n\r\nDepartment of Pediatrics, University of Washington, Seattle, WA, USA\r\n\r\nKatrina M.","Dipple\u00a0&\u00a0Ian Glass\r\n\r\nBrotman Baty Institute for Precision Medicine, Seattle, WA, USA\r\n\r\nKatrina M. Dipple\u00a0&\u00a0Ian Glass\r\n\r\nDepartment of Molecular and Human Genetics, Baylor College of Medicine, Houston, TX, USA\r\n\r\nHaowei Du\r\n\r\nService de G\u00e9n\u00e9tique M\u00e9dicale, Institut de G\u00e9n\u00e9tique M\u00e9dicale D\u2019Alsace, H\u00f4pitaux Universitaires de Strasbourg, Strasbourg, France\r\n\r\nSalima El Chehadeh\r\n\r\nLaboratoire de G\u00e9n\u00e9tique M\u00e9dicale, Institut de G\u00e9n\u00e9tique M\u00e9dicale d\u2019Alsace, INSERM UMRS_1112, CRBS, Universit\u00e9 de Strasbourg, Strasbourg, France\r\n\r\nSalima El Chehadeh\r\n\r\nRady Children\u2019s Institute for Genomic Medicine, San Diego, CA, USA\r\n\r\nJoseph G.","Gleeson\r\n\r\nDepartment of Neurosciences and Pediatrics, University of California, San Diego, San Diego, CA, USA\r\n\r\nJoseph G.","Gleeson\r\n\r\nU1172-LilNCog-Lille Neuroscience and Cognition, CHU de Lille, Lille, France\r\n\r\nOlivier Grunewald\r\n\r\nLaboratoire de Genopathies, CHU Lille, Lille, France\r\n\r\nOlivier Grunewald\r\n\r\nService de G\u00e9n\u00e9tique, CHRU de Tours, Tours, France\r\n\r\nPaul Gueguen\u00a0&\u00a0Marie-Line Jacquemont\r\n\r\nUniversit\u00e9 de Tours, Imaging Brain and Neuropsychiatry iBraiN, Tours, France\r\n\r\nPaul Gueguen\u00a0&\u00a0Marie-Line Jacquemont\r\n\r\nCentre de R\u00e9f\u00e9rence Maladies Rares \u2018Anomalies du D\u00e9veloppement et Syndromes Malformatifs\u2019, FHU Genomeds, CHRU de Tours, Tours, France\r\n\r\nMarie-Line Jacquemont\r\n\r\nRoyal Children\u2019s Hospital, Melbourne, Victoria, Australia\r\n\r\nRichard J.","Leventer\r\n\r\nHarvard Medical School, Boston, MA, USA\r\n\r\nOlfa Messaoud\r\n\r\nCentre de r\u00e9f\u00e9rence maladies rares, D\u00e9ficiences Intellectuelles de Causes Rares, Centre de G\u00e9n\u00e9tique, FHU-TRANSLAD, CHU Dijon Bourgogne, Dijon, France\r\n\r\nChristel Thauvin\r\n\r\nUnit\u00e9 Fonctionnelle Innovation en Diagnostic G\u00e9nomique des Maladies Rares, F\u00e9d\u00e9ration Hospitalo-Universitaire-TRANSLAD, CHU Dijon Bourgogne, Dijon, France\r\n\r\nChristel Thauvin\r\n\r\nUMR1231 GAD, Inserm, Universit\u00e9 Bourgogne-Franche Comt\u00e9, Dijon, France\r\n\r\nChristel Thauvin\r\n\r\nClinique de G\u00e9n\u00e9tique, H\u00f4pital Jeanne de Flandre, CHU de Lille, Lille, France\r\n\r\nCatherine Vincent-Delorme\r\n\r\nConsultation de g\u00e9n\u00e9tique, CH Arras, Arras, France\r\n\r\nCatherine Vincent-Delorme\r\n\r\nDepartment of Medical Genetics, Istanbul Medeniyet University Medical School, Istanbul, Turkey\r\n\r\nElif Yilmaz Gulec\r\n\r\nMedical Genetics Clinic, Istanbul Goztepe Prof Dr Suleyman Yalcin City Hospital, Istanbul, Turkey\r\n\r\nElif Yilmaz Gulec\r\n\r\nCardiovascular Medicine, Stanford University, Stanford, CA, USA\r\n\r\nRodrigo Mendez\r\n\r\nSorbonne Universit\u00e9, Institut du Cerveau\u2014Paris Brain Institute\u2014ICM, Inserm, CNRS, APHP, D\u00e9partement de G\u00e9n\u00e9tique, H\u00f4pital de la Piti\u00e9 Salp\u00eatri\u00e8re, Paris, France\r\n\r\nCaroline Nava\r\nContributions\r\nJ.D.J.,","A.A. and C.M.K. performed experiments.","J.D.J.,","H.C.K.,","R.D.,","E.L.,","B.C., Y.C. and\u00a0A.J.M.B. analysed data and contributed to the figures and tables in the paper.","C.S.,","R.R.,\u00a0J.T.,","R.M.,","D.G.M.,","C.D., N.W. and G.M.F. collected data, provided funding and supervised the work. All other authors provided clinical and\/or genomic data and are listed alphabetically.","J.D.J., N.W. and G.M.F. wrote the paper with input from all the authors.\r\nCorresponding authors\r\nCorrespondence to\nNicola Whiffin or Gregory M. Findlay.\r\nEthics declarations\nCompeting interests\r\nN.W. receives research funding from Novo Nordisk and Biomarin Pharmaceutical. D.G.M. is a paid consultant for GlaxoSmithKline, Insitro and Overtone Therapeutics and receives research support from Microsoft. The other authors declare no competing interests.\r\n\nPeer review\nPeer review information\r\nNature thanks Karine Choquet and the other, anonymous, reviewer(s) for their contribution to the peer review of this work.","Peer reviewer reports are available.\r\n\nAdditional information\r\nPublisher\u2019s note Springer Nature remains neutral with regard to jurisdictional claims in published maps and institutional affiliations.\r\nExtended data figures and tablesExtended Data Fig. 1 Quality control metrics for RNU4-2 SGE experiments.\r\na,\u00a0The distribution of variant read counts in the HDR library is plotted for all n\u2009=\u2009539 variants included in library design. Of reads from the HDR library, 0.0068% and 4.6% matched unedited reference and PAM-edit only, respectively.","b, The distribution of variant read counts in day 4 gDNA is plotted, with counts averaged across biological replicates.","c,\u00a0Inter-replicate function score correlations are plotted, with Pearson\u2019s r shown and variants coloured by mutation type.\r\nExtended Data Fig. 2 ReNU syndrome variants are discriminated with high precision from variants seen frequently in population controls.\r\nROC-AUC measurements for distinguishing 12 ReNU syndrome SNVs from population control SNVs by SGE score are displayed as a heatmap. Each AUC was determined using only variants in UK Biobank and All of Us with allele counts above the thresholds indicated on the axes. For select allele count thresholds applied to both cohorts (10, 20, 40, 60, and 80), the number of population variants retained for the ROC-AUC calculation is indicated.\r\nExtended Data Fig.","3 Correlations between function scores and predicted effects on RNA binding stability.\r\nViennaRNA was used to predict the effects of variants (n\u2009=\u2009521) on the minimum free energy of U4\/U6 RNA binding compared to reference (\u0394\u0394G).","a, Predicted \u0394\u0394G values are plotted versus function scores for the whole transcript, as well as for individual regions (Spearman\u2019s \u03c1).","b, ROC curve for classifying ReNU syndrome variants from population controls using ViennaRNA-predicted \u0394\u0394G values (AUC\u2009=\u20090.72).\r\nExtended Data Fig. 4 Function scores for variants within the RNU4-2 critical region.\r\nFunction scores are plotted by position and coloured by their association with ReNU syndrome (red), presence in the UK Biobank or All of Us cohorts (blue), or no observation in either (teal). Variants score lowly in two regions within the CR (shaded), n.62-70 and n.75-78, which correspond to the T-loop and Stem III, respectively.","The black dashed line (function score\u2009=\u2009\u22120.302) indicates significantly depleted variants and the gray dashed line (function score\u2009=\u2009\u22120.90) separates \u201cmoderate\u201d from \u201cstrong\u201d depletion. The vertical red dashed lines represent the boundaries of the 18 nucleotide ReNU CR reported by Chen et al.1 drawn to include insertions at n.61_62 and n.79_80.\r\nExtended Data Fig. 5 Calibration of function scores to evidence for clinical classification of variants in relation to ReNU syndrome.\r\nGaussian mixture modelling was used to estimate odds of pathogenicity (OddsPath). Function scores are plotted against OddsPath values for n\u2009=\u2009127 variants within the ReNU syndrome critical region.","Vertical dotted lines mark the median of insertion controls (x\u2009=\u20090), as well as thresholds for \u201cmoderate\u201d (\u22120.302) and \u201cstrong\u201d (\u22120.90) depletion. Horizontal dashed lines indicate OddsPath thresholds for assigning evidence strengths in accordance with ACMG guidelines8. OddsPath values are capped for variants with function scores below \u22121.0 to display all points.\r\nExtended Data Fig. 6 Phenotype clustering of ReNU patients.\r\na, PCA clustering as in Fig. 3a but removing individuals with the recurrent n.64_65insT variant.","b, Phenotype clustering of all individuals represented in Fig. 3a using a UMAP representation.\r\nExtended Data Fig. 7 Correlation of the SGE assay in haploid versus diploid HAP1 cells.\r\na, Function scores (n\u2009=\u2009539) from SGE in diploid HAP1 cells, plotted by transcript position and coloured by variant type.","b, Function scores from SGE in diploid HAP1 cells coloured by the function score from SGE in haploid HAP1 cells.","c, Correlation of function scores in diploid versus haploid HAP1 cells, coloured by the region in which each variant is located (Pearson\u2019s r\u2009=\u20090.75).\r\nExtended Data Table 1 Comparison of clinical features by function score categories\r\nFull size table\r\nExtended Data Table 2 Results from association testing with intelligence-related metrics in the UK Biobank\r\nFull size table\r\nExtended Data Table 3 Homozygous and compound heterozygous variants in individuals with undiagnosed neurodevelopmental disorders\r\nFull size table\r\nSupplementary informationRights and permissions\r\nOpen Access This article is licensed under a Creative Commons Attribution 4.0 International License, which permits use, sharing, adaptation, distribution and reproduction in any medium or format, as long as you give appropriate credit to the original author(s) and the source, provide a link to the Creative Commons licence, and indicate if changes were made.","The images or other third party material in this article are included in the article\u2019s Creative Commons licence, unless indicated otherwise in a credit line to the material. If material is not included in the article\u2019s Creative Commons licence and your intended use is not permitted by statutory regulation or exceeds the permitted use, you will need to obtain permission directly from the copyright holder. To view a copy of this licence, visit http:\/\/creativecommons.org\/licenses\/by\/4.0\/.\r\n\r\nReprints and permissions\r\nAbout this article\r\n\r\nCite this article\r\nDe Jonghe, J.,","Kim, H.C.,","Adedeji, A. et al. Saturation editing of RNU4-2 reveals distinct dominant and recessive disorders.\nNature (2026). https:\/\/doi.org\/10.1038\/s41586-026-10334-9\r\n\r\nDownload citation\r\n\r\nReceived: 24 April 2025\r\n\r\nAccepted: 26 February 2026\r\n\r\nPublished: 08 April 2026\r\n\r\nVersion of record: 08 April 2026\r\n\r\nDOI: https:\/\/doi.org\/10.1038\/s41586-026-10334-9"],"images":[{"src":"\/news\/images\/TkbmuTA5An6pq0jkF31YSHaroE6S7TsKDrDN3Z0FIoltBva7I0do6Hv56SGRKYL85pVN6H7y6avsjnd9v9uqf4eFGmhcax0V6l2tuZNaytYfYHkUjKasu7XoZ0L4dajsjqh9NQVtaBnNYLsiwO6HiBpTgkUj9.jpg","width":0,"height":0,"source":"featured","size":0}]}"
    }
  }
  [4]=>
  array(4) {
    ["file"]=>
    string(22) "/app/news/article.html"
    ["line"]=>
    int(11)
    ["function"]=>
    string(18) "news_article_fetch"
    ["args"]=>
    array(1) {
      [0]=>
      string(70) "SNgSFz1UYwUgnAye8Iq6AwimFnBOT9jdyRE0kXeBSMWDBytDmFUTOmTLbAFJl2KLSL4Shs"
    }
  }
}