CurryInfo: curry-source-1.2.0 / Language.Curry.SourceCodeClassifier

classes: Info
 
documentation: Info
 
---------------------------------------------------------------------------
This module provides functions to extract span information of entities
defined in Curry source programs. These functions extract the
span information of operations, types, and classes. The span information
consists of the comment span and the source code span of each entity.

@version June 2025
---------------------------------------------------------------------------
name: Info
 Language.Curry.SourceCodeClassifier
operations: Info
 getClassesInModule getDeclarationsInModule getOperationsInModule getTypesInModule
sourcecode: Info
 

module Language.Curry.SourceCodeClassifier
  ( getDeclarationsInModule, getOperationsInModule
  , getTypesInModule, getClassesInModule
  ) where

import Data.Maybe ( catMaybes )
import Data.List  ( sortBy )

import Curry.Comment  ( readComments )
import Curry.Files    ( readFullAST, readShortAST )
import Curry.Types    ( Module(..), Decl(..) )
import Curry.Ident    ( idName )
import Curry.SpanInfo ( SpanInfo(..) )
import Curry.Span     ( Span(..) )
import Curry.Position ( Position (..) )

import Data.Trie as T ( Trie, empty, insert, lookup )

--- A line span is a pair of a start and an end line number.
--- The start line number is inclusive, the end line number is exclusive.
type LineSpan = (Int, Int)

--- Denotes a missing span. This is used for occurrences of entites without
--- a comment, e.g., the comment span is non-existent.
missing :: LineSpan
missing = (0, 0)

--- An occurrence of some entity in the source code consists of
--- the entity's name, the comment span, and the source code span.
type Occurrence = (String, LineSpan, LineSpan)

--- Extracts all operations, type declarations, and class declarations
--- in a module with their associated comment and source code spans.
getDeclarationsInModule :: String -> IO ([Occurrence],[Occurrence],[Occurrence])
getDeclarationsInModule mn = do
  (mdl, comments) <- readModule mn

  let ops     = collectOperationsInModule mdl comments
  let types   = collectTypesInModule      mdl comments
  let classes = collectClassesInModule    mdl comments

  return (ops, types, classes)

--- Extracts all operations in a module with their comment and source code spans.
getOperationsInModule :: String -> IO [Occurrence]
getOperationsInModule mn =
  uncurry collectOperationsInModule <$> readModule mn

--- Extracts all types in a module with their comment and source code spans.
getTypesInModule :: String -> IO [Occurrence]
getTypesInModule mn =
  uncurry collectTypesInModule <$> readModule mn

--- Extracts all classes in a module with their comment and source code spans.
getClassesInModule :: String -> IO [Occurrence]
getClassesInModule mn =
  uncurry collectClassesInModule <$> readModule mn

--------------------------------------------------------------------------------
-- Implementation of the source code classifier.

--- Some entity with a line span.
type Entity a = (a, LineSpan)

--- A signature of some operation or operations in the source code,
--- consisting of the name(s) and the code span.
---
--- Because, e.g.,
---  > x,y,z :: Int -> Int
--- is a valid Curry signature, we need to be able to associate
--- multiple names with a single signature span.
type SignatureE = Entity [String]

--- A type declaration in the source code.
type TypeDeclE = Entity String

--- A class declaration
type ClassDeclE = Entity String

--- Given some module and comment line spans, this function collects all
--- operations in the module with their associated comment and code spans.
collectOperationsInModule :: Module a -> [LineSpan] -> [Occurrence]
collectOperationsInModule mdl comments =
  let sigs = collectSignatures mdl
      ops  = collectOperations mdl
  in extendWithRules ops $ addComments createOccs comments sigs
 where
  -- Creates occurrences from the signature and the comment span of
  -- an operation or multiple operations.
  createOccs :: LineSpan -> SignatureE -> [Occurrence]
  createOccs cls (is, sigSpan) = [(i, cls, sigSpan) | i <- is]

  collectSignatures :: Module a -> [SignatureE]
  collectSignatures (Module _ _ _ _ _ _ ops)
    = sortBy lineNumber $ catMaybes $ map collectSignature ops

  -- Collects the identifier and span of all operation signatures.
  collectSignature :: Decl a -> Maybe SignatureE
  collectSignature decl = case decl of
    (TypeSig si is _) -> do
      sp <- getLineSpan si
      return (map idName is, sp)
    _ -> Nothing

  -- Collects the span lines of all operations (rules) and stores them
  -- in a trie.
  collectOperations :: Module a -> T.Trie LineSpan
  collectOperations (Module _ _ _ _ _ _ ops) = foldr collect T.empty ops
   where
    collect :: Decl a -> Trie LineSpan -> Trie LineSpan
    collect decl trie = case decl of
      (FunctionDecl si _ i _) -> case getLineSpan si of
        Nothing -> trie
        Just ls -> T.insert (idName i) ls trie
      _ -> trie

  -- Extends the occurrences with the code spans of the operations'
  -- rule definitions.
  extendWithRules :: T.Trie LineSpan -> [Occurrence] -> [Occurrence]
  extendWithRules ops = map extend
   where
    extend :: Occurrence -> Occurrence
    extend occ@(i, cSpan, sigSpan) = case T.lookup i ops of
      Nothing -> occ
      Just opSpan -> (i, cSpan, extendSpan sigSpan opSpan)

    extendSpan :: LineSpan -> LineSpan -> LineSpan
    extendSpan (s1, e1) (s2, e2) = (min s1 s2, max e1 e2)

--- Given some module and comment line spans, this function collects all
--- type declarations in the module with their associated comment and
--- code spans.
collectTypesInModule :: Module a -> [LineSpan] -> [Occurrence]
collectTypesInModule mdl comments =
  let types = collectTypes mdl
  in addComments createOcc comments types
 where
  collectTypes :: Module a -> [TypeDeclE]
  collectTypes (Module _ _ _ _ _ _ decls)
    = sortBy lineNumber $ catMaybes $ map collectType decls

  collectType :: Decl a -> Maybe TypeDeclE
  collectType decl = case decl of
    (DataDecl si i _ _ _)    -> typeDeclE si i
    (NewtypeDecl si i _ _ _) -> typeDeclE si i
    (TypeDecl si i _ _)      -> typeDeclE si i
    _ -> Nothing

  -- Collects the identifier and span of some type declaration.
  typeDeclE si i = do
      sp <- getLineSpan si
      return (idName i, sp)

--- Given some module and comment line spans, this function collects all
--- classes in the module with their associated comment and code spans.
collectClassesInModule :: Module a -> [LineSpan] -> [Occurrence]
collectClassesInModule mdl comments =
  addComments createOcc comments $ collectClasses mdl
 where
  collectClasses :: Module a -> [ClassDeclE]
  collectClasses (Module _ _ _ _ _ _ decls)
    = sortBy lineNumber $ catMaybes $ map collectClass decls

  collectClass :: Decl a -> Maybe (ClassDeclE)
  collectClass decl = case decl of
    (ClassDecl si _ _ i _ _ _) -> do
      sp <- getLineSpan si
      return (idName i, sp)
    _ -> Nothing

--------------------------------------------------------------------------------
-- Various helper functions.

--- Reads and returns the short AST and comments of a module.
readModule :: String -> IO (Module (), [LineSpan])
readModule mn = do
  comments <- (mergeCommentSpans . getCommentSpans) <$> readComments mn
  mdl <- readShortAST mn
  return (mdl, comments)

-- Creates a single occurrence from an entity that holds one
-- name and a line span, with some associated comment span.
createOcc :: LineSpan -> Entity String -> [Occurrence]
createOcc cls (i, sigSpan) = [(i, cls, sigSpan)]

-- Assigns the comment spans to some entities.
--
-- Notice that because the comment line positions as well as the entity
-- line positions are steadily increasing, we can merge in linear time in the
-- number of comments and signatures.
--
-- For `addComments f cs ss`,
--  - `f` is a function that creates occurrences from a comment span and
--    some entity that has a line span
--  - `cs` is the list of comment spans
--  - `ss` is the list of signature spans
addComments :: (LineSpan -> (a, LineSpan) -> [Occurrence]) -> [LineSpan]
            -> [(a, LineSpan)] -> [Occurrence]
addComments f cs ss = case (cs, ss) of
  ([], _) -> concatMap (f missing) ss
  (_, []) -> []
  (c:cs', s@(_, sigSpan):ss')
    -- Skip unconnected comments:
    | end c <  start sigSpan -> addComments f cs' (s:ss')
    -- Add with connected comments:
    | end c == start sigSpan -> f c s       ++ addComments f cs' ss'
    -- Add with missing comments:
    | otherwise              -> f missing s ++ addComments f (c:cs') ss'

--- Converts all comments with span information to line spans.
getCommentSpans :: [(Span, _)] -> [LineSpan]
getCommentSpans = sortBy startLine . catMaybes . map (getLineSpan . fst)
 where
  startLine :: LineSpan -> LineSpan -> Bool
  startLine (x, _) (y, _) = x <= y

--- Merges adjacent line spans of comments.
---
--- Consider the spans `[(1, 2), (2, 3), (4, 5)]`. The first two spans are
--- adjacent and should be merged to `(1, 3)`. The last span is not adjacent
--- to any other span and should be left as is.
mergeCommentSpans :: [LineSpan] -> [LineSpan]
mergeCommentSpans = foldr merge []
 where
  merge :: LineSpan -> [LineSpan] -> [LineSpan]
  merge sl [] = [sl]
  merge sl (sr : xs)
    | end sl == start sr = (start sl, end sr) : xs
    | otherwise          = sl : sr : xs

-- Returns the start line number of a line span.
start :: LineSpan -> Int
start = fst

-- Returns the end line number of a line span.
--
-- The end line number is exclusive, i.e., the line span `(s, e)` covers
-- the lines `s, s+1, ..., e-1`.
end :: LineSpan -> Int
end = snd

--- Orders entities by their start line number.
lineNumber :: Entity a -> Entity a -> Bool
lineNumber (_, (s1, _)) (_, (s2, _)) = s1 <= s2

--- Extracts the row number from a position.
row :: Position -> Maybe Int
row (Position r _) = Just r
row NoPos          = Nothing

class HasLineSpan a where
  getLineSpan :: a -> Maybe LineSpan

instance HasLineSpan Span where
  getLineSpan NoSpan     = Nothing
  getLineSpan (Span s e) = do
    sp <- row s
    ep <- row e
    return (sp, ep + 1)

instance HasLineSpan SpanInfo where
  getLineSpan (SpanInfo s _) = getLineSpan s
  getLineSpan NoSpanInfo     = Nothing
types: Info
 
unsafe: Info
 safe