All files / src/sequence-utils sequence-processor.ts

96.29% Statements 26/27
75% Branches 9/12
100% Functions 3/3
96.15% Lines 25/26

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89      8x                       8x             8x         8x         8x     8x         6x 6x 6x     14x     4x     2x     2x     4x         4x 4x 4x     4x     10x       14x 6x   14x     6x 6x     6x        
import { commentLineRE, extractNameFromFASTAHeader } from '.';
import { sequenceValidator } from './sequenceValidator';
 
const whitespaceRE = /\s+/g;
 
export type SequenceObject = {
  raw: string;
  name: string;
  header: string;
  sequence: string;
  likelyType?: 'aa' | 'na';
  valid?: boolean;
  message?: string;
};
 
const getNewSequenceObject = (): SequenceObject => ({
  raw: '',
  name: '',
  header: '',
  sequence: '',
});
 
const validate = (
  sequenceObject: SequenceObject,
  minimumLength?: number,
  strict?: boolean
): SequenceObject => {
  const validation = sequenceValidator(
    sequenceObject.sequence,
    minimumLength,
    strict
  );
  return { ...validation, ...sequenceObject };
};
 
const sequenceProcessor = (
  rawText: string,
  minimumLength?: number,
  strict?: boolean // Strict validates only conventional amino acids (no ambiguous amino acids)
): SequenceObject[] => {
  const sequences: SequenceObject[] = [];
  let currentSequence = getNewSequenceObject();
  for (const line of rawText.split('\n')) {
    // for each line
 
    if (commentLineRE.test(line)) {
      // if this is a comment line
 
      if (currentSequence.sequence) {
        // if we already have sequence data being processed
        // store current sequence
        sequences.push(validate(currentSequence, minimumLength, strict));
 
        // and start new sequence
        currentSequence = getNewSequenceObject();
      }
 
      Iif (currentSequence.header) {
        // multiline header
        currentSequence.header += '\n';
      } else {
        // first header line
        const name = extractNameFromFASTAHeader(line);
        Eif (name) {
          currentSequence.name = name;
        }
      }
      currentSequence.header += line;
    } else {
      // if this is a sequence line
      currentSequence.sequence += line.replace(whitespaceRE, '');
    }
 
    // store the raw string
    if (currentSequence.raw) {
      currentSequence.raw += '\n';
    }
    currentSequence.raw += line;
  }
 
  Eif (currentSequence.raw) {
    sequences.push(validate(currentSequence, minimumLength, strict));
  }
 
  return sequences;
};
 
export default sequenceProcessor;