1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
------------------------------------------------------------------------------
--- This module provides functions to extract span information of entities
--- in curry source programs. Namely, it provides functions to extract the
--- span information of operations, types, and classes. The span information 
--- consists of the comment span and the code span of the entity.
---
--- @version January 2025
------------------------------------------------------------------------------

module Language.Curry.SourceCodeClassifier
  ( getDeclarationsInModule, getOperationsInModule
  , getTypesInModule, getClassesInModule
  ) where

import Curry.Comment  ( readComments )
import Curry.Files    ( readFullAST, readShortAST )
import Curry.Types    ( Module(..), Decl(..) )
import Curry.Ident    ( idName )
import Curry.SpanInfo ( SpanInfo(..) )
import Curry.Span     ( Span(..) )
import Curry.Position ( Position (..) )

import Data.Maybe ( catMaybes )
import Data.List  ( sortBy )

import Data.Trie as T ( Trie, empty, insert, lookup )

--- A pair of start and end line numbers. The start line number is inclusive,
--- the end line number is exclusive.
type LineSpan = (Int, Int)

--- Denotes a missing span. This is used for occurrences of entites without
--- a comment, e.g., the comment span is non-existent.
missing :: LineSpan
missing = (0, 0)

--- An occurrence of some entity in the source code, consisting of
--- the entity's name, the comment span, and the code span.
type Occurrence = (String, LineSpan, LineSpan)

--- Extracts all operations, type declarations and class declarations
--- in a module with their associated comment and code spans.
getDeclarationsInModule :: String -> IO ([Occurrence], [Occurrence], [Occurrence])
getDeclarationsInModule mn = do
  (mdl, comments) <- readModule mn

  let ops     = collectOperationsInModule mdl comments
  let types   = collectTypesInModule      mdl comments
  let classes = collectClassesInModule    mdl comments

  return (ops, types, classes)

--- Extracts all operations in a module with their comment and code spans.
getOperationsInModule :: String -> IO [Occurrence]
getOperationsInModule mn =
  uncurry collectOperationsInModule <$> readModule mn

--- Extracts all types in a module with their comment and code spans.
getTypesInModule :: String -> IO [Occurrence]
getTypesInModule mn =
  uncurry collectTypesInModule <$> readModule mn

--- Extracts all classes in a module with their comment and code spans.
getClassesInModule :: String -> IO [Occurrence]
getClassesInModule mn =
  uncurry collectClassesInModule <$> readModule mn

--------------------------------------------------------------------------------
-- Implementation of the source code classifier.

--- Some entity with a line span.
type Entity a = (a, LineSpan)

--- A signature of some operation or operations in the source code, 
--- consisting of the name(s) and the code span.
---
--- Because, e.g.,
---  > x,y,z :: Int -> Int
--- is a valid Curry signature, we need to be able to associate
--- multiple names with a single signature span.
type SignatureE = Entity [String]

--- A type declaration in the source code.
type TypeDeclE = Entity String

--- A class declaration
type ClassDeclE = Entity String

--- Given some module and comment line spans, this function collects all
--- operations in the module with their associated comment and code spans.
collectOperationsInModule :: Module a -> [LineSpan] -> [Occurrence]
collectOperationsInModule mdl comments =
  let sigs = collectSignatures mdl
      ops  = collectOperations mdl
  in extendWithRules ops $ addComments createOccs comments sigs
 where
  -- Creates occurrences from the signature and the comment span of
  -- an operation or multiple operations.
  createOccs :: LineSpan -> SignatureE -> [Occurrence]
  createOccs cls (is, sigSpan) = [(i, cls, sigSpan) | i <- is]

  collectSignatures :: Module a -> [SignatureE]
  collectSignatures (Module _ _ _ _ _ _ ops)
    = sortBy lineNumber $ catMaybes $ map collectSignature ops

  -- Collects the identifier and span of all operation signatures.
  collectSignature :: Decl a -> Maybe SignatureE
  collectSignature decl = case decl of
    (TypeSig si is _) -> do
      sp <- getLineSpan si
      return (map idName is, sp)
    _ -> Nothing

  -- Collects the span lines of all operations (rules) and stores them 
  -- in a trie.
  collectOperations :: Module a -> T.Trie LineSpan
  collectOperations (Module _ _ _ _ _ _ ops) = foldr collect T.empty ops
   where
    collect :: Decl a -> Trie LineSpan -> Trie LineSpan
    collect decl trie = case decl of
      (FunctionDecl si _ i _) -> case getLineSpan si of
        Nothing -> trie
        Just ls -> T.insert (idName i) ls trie
      _ -> trie

  -- Extends the occurrences with the code spans of the operations' 
  -- rule definitions.
  extendWithRules :: T.Trie LineSpan -> [Occurrence] -> [Occurrence]
  extendWithRules ops = map extend
   where
    extend :: Occurrence -> Occurrence
    extend occ@(i, cSpan, sigSpan) = case T.lookup i ops of
      Nothing -> occ
      Just opSpan -> (i, cSpan, extendSpan sigSpan opSpan)

    extendSpan :: LineSpan -> LineSpan -> LineSpan
    extendSpan (s1, e1) (s2, e2) = (min s1 s2, max e1 e2)

--- Given some module and comment line spans, this function collects all
--- type declarations in the module with their associated comment and 
--- code spans.
collectTypesInModule :: Module a -> [LineSpan] -> [Occurrence]
collectTypesInModule mdl comments =
  let types = collectTypes mdl
  in addComments createOcc comments types
 where
  collectTypes :: Module a -> [TypeDeclE]
  collectTypes (Module _ _ _ _ _ _ decls)
    = sortBy lineNumber $ catMaybes $ map collectType decls

  collectType :: Decl a -> Maybe (TypeDeclE)
  collectType decl = case decl of
    (DataDecl si i _ _ _) -> do
      sp <- getLineSpan si
      return (idName i, sp)
    (NewtypeDecl si i _ _ _) -> do
      sp <- getLineSpan si
      return (idName i, sp)
    (TypeDecl si i _ _) -> do
      sp <- getLineSpan si
      return (idName i, sp)
    _ -> Nothing

--- Given some module and comment line spans, this function collects all
--- classes in the module with their associated comment and code spans.
collectClassesInModule :: Module a -> [LineSpan] -> [Occurrence]
collectClassesInModule mdl comments =
  let classes = collectClasses mdl
  in addComments createOcc comments classes
 where
  collectClasses :: Module a -> [ClassDeclE]
  collectClasses (Module _ _ _ _ _ _ decls)
    = sortBy lineNumber $ catMaybes $ map collectClass decls

  collectClass :: Decl a -> Maybe (ClassDeclE)
  collectClass decl = case decl of
    (ClassDecl si _ _ i _ _ _) -> do
      sp <- getLineSpan si
      return (idName i, sp)
    _ -> Nothing

--------------------------------------------------------------------------------
-- Various helper functions.

--- Reads and returns the short AST and comments of a module.
readModule :: String -> IO (Module (), [LineSpan])
readModule mn = do
  comments <- (mergeCommentSpans . getCommentSpans) <$> readComments mn
  mdl <- readShortAST mn
  return (mdl, comments)

-- Creates a single occurrence from an entity that holds one 
-- name and a line span, with some associated comment span.
createOcc :: LineSpan -> Entity String -> [Occurrence]
createOcc cls (i, sigSpan) = [(i, cls, sigSpan)]

-- Assigns the comment spans to some entities.
--
-- Notice that because the comment line positions as well as the entity
-- line positions are steadily increasing, we can merge in linear time in the
-- number of comments and signatures.
--
-- For `addComments f cs ss`, 
--  - `f` is a function that creates occurrences from a comment span and some entity
--    that has a line span.
--  - `cs` is the list of comment spans.
--  - `ss` is the list of signature spans.
addComments :: (LineSpan -> (a, LineSpan) -> [Occurrence]) -> [LineSpan] -> [(a, LineSpan)] -> [Occurrence]
addComments f cs ss = case (cs, ss) of
  ([], _) -> concatMap (f missing) ss
  (_, []) -> []
  (c:cs', s@(_, sigSpan):ss')
    -- Skip unconnected comments:
    | end c <  start sigSpan -> addComments f cs' (s:ss')
    -- Add with connected comments:
    | end c == start sigSpan -> f c s       ++ addComments f cs' ss'
    -- Add with missing comments:
    | otherwise              -> f missing s ++ addComments f (c:cs') ss'
 where
  start = fst
  end = snd

--- Converts all comments with span information to line spans.
getCommentSpans :: [(Span, _)] -> [LineSpan]
getCommentSpans = sortBy startLine . catMaybes . map (getLineSpan . fst)
 where
  startLine :: LineSpan -> LineSpan -> Bool
  startLine (x, _) (y, _) = x <= y

--- Merges adjacent line spans of comments.
--- 
--- Consider the spans `[(1, 2), (2, 3), (4, 5)]`. The first two spans are
--- adjacent and should be merged to `(1, 3)`. The last span is not adjacent
--- to any other span and should be left as is.
mergeCommentSpans :: [LineSpan] -> [LineSpan]
mergeCommentSpans = foldr merge []
 where
  merge :: LineSpan -> [LineSpan] -> [LineSpan]
  merge (s, e) [] = [(s, e)]
  merge (s, e) ((s', e') : xs)
    | e == s'   = (s, e') : xs
    | otherwise = (s, e) : (s', e') : xs

--- Orders entities by their start line number.
lineNumber :: Entity a -> Entity a -> Bool
lineNumber (_, (s1, _)) (_, (s2, _)) = s1 <= s2

--- Extracts the row number from a position.
row :: Position -> Maybe Int
row (Position r _) = Just r
row NoPos          = Nothing

class HasLineSpan a where
  getLineSpan :: a -> Maybe LineSpan

instance HasLineSpan Span where
  getLineSpan NoSpan     = Nothing
  getLineSpan (Span s e) = do
    sp <- row s
    ep <- row e
    return (sp, ep + 1)

instance HasLineSpan SpanInfo where
  getLineSpan (SpanInfo s _) = getLineSpan s
  getLineSpan NoSpanInfo     = Nothing