golang.org/x/text@v0.14.0/message/pipeline/pipeline.go (about) 1 // Copyright 2017 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package pipeline provides tools for creating translation pipelines. 6 // 7 // NOTE: UNDER DEVELOPMENT. API MAY CHANGE. 8 package pipeline 9 10 import ( 11 "bytes" 12 "encoding/json" 13 "fmt" 14 "go/build" 15 "go/parser" 16 "io/ioutil" 17 "log" 18 "os" 19 "path/filepath" 20 "regexp" 21 "strings" 22 "text/template" 23 "unicode" 24 25 "golang.org/x/text/internal" 26 "golang.org/x/text/language" 27 "golang.org/x/text/runes" 28 "golang.org/x/tools/go/loader" 29 ) 30 31 const ( 32 extractFile = "extracted.gotext.json" 33 outFile = "out.gotext.json" 34 gotextSuffix = "gotext.json" 35 ) 36 37 // Config contains configuration for the translation pipeline. 38 type Config struct { 39 // Supported indicates the languages for which data should be generated. 40 // The default is to support all locales for which there are matching 41 // translation files. 42 Supported []language.Tag 43 44 // --- Extraction 45 46 SourceLanguage language.Tag 47 48 Packages []string 49 50 // --- File structure 51 52 // Dir is the root dir for all operations. 53 Dir string 54 55 // TranslationsPattern is a regular expression to match incoming translation 56 // files. These files may appear in any directory rooted at Dir. 57 // language for the translation files is determined as follows: 58 // 1. From the Language field in the file. 59 // 2. If not present, from a valid language tag in the filename, separated 60 // by dots (e.g. "en-US.json" or "incoming.pt_PT.xmb"). 61 // 3. If not present, from a the closest subdirectory in which the file 62 // is contained that parses as a valid language tag. 63 TranslationsPattern string 64 65 // OutPattern defines the location for translation files for a certain 66 // language. The default is "{{.Dir}}/{{.Language}}/out.{{.Ext}}" 67 OutPattern string 68 69 // Format defines the file format for generated translation files. 70 // The default is XMB. Alternatives are GetText, XLIFF, L20n, GoText. 71 Format string 72 73 Ext string 74 75 // TODO: 76 // Actions are additional actions to be performed after the initial extract 77 // and merge. 78 // Actions []struct { 79 // Name string 80 // Options map[string]string 81 // } 82 83 // --- Generation 84 85 // GenFile may be in a different package. It is not defined, it will 86 // be written to stdout. 87 GenFile string 88 89 // GenPackage is the package or relative path into which to generate the 90 // file. If not specified it is relative to the current directory. 91 GenPackage string 92 93 // DeclareVar defines a variable to which to assign the generated Catalog. 94 DeclareVar string 95 96 // SetDefault determines whether to assign the generated Catalog to 97 // message.DefaultCatalog. The default for this is true if DeclareVar is 98 // not defined, false otherwise. 99 SetDefault bool 100 101 // TODO: 102 // - Printf-style configuration 103 // - Template-style configuration 104 // - Extraction options 105 // - Rewrite options 106 // - Generation options 107 } 108 109 // Operations: 110 // - extract: get the strings 111 // - disambiguate: find messages with the same key, but possible different meaning. 112 // - create out: create a list of messages that need translations 113 // - load trans: load the list of current translations 114 // - merge: assign list of translations as done 115 // - (action)expand: analyze features and create example sentences for each version. 116 // - (action)googletrans: pre-populate messages with automatic translations. 117 // - (action)export: send out messages somewhere non-standard 118 // - (action)import: load messages from somewhere non-standard 119 // - vet program: don't pass "foo" + var + "bar" strings. Not using funcs for translated strings. 120 // - vet trans: coverage: all translations/ all features. 121 // - generate: generate Go code 122 123 // State holds all accumulated information on translations during processing. 124 type State struct { 125 Config Config 126 127 Package string 128 program *loader.Program 129 130 Extracted Messages `json:"messages"` 131 132 // Messages includes all messages for which there need to be translations. 133 // Duplicates may be eliminated. Generation will be done from these messages 134 // (usually after merging). 135 Messages []Messages 136 137 // Translations are incoming translations for the application messages. 138 Translations []Messages 139 } 140 141 func (s *State) dir() string { 142 if d := s.Config.Dir; d != "" { 143 return d 144 } 145 return "./locales" 146 } 147 148 func outPattern(s *State) (string, error) { 149 c := s.Config 150 pat := c.OutPattern 151 if pat == "" { 152 pat = "{{.Dir}}/{{.Language}}/out.{{.Ext}}" 153 } 154 155 ext := c.Ext 156 if ext == "" { 157 ext = c.Format 158 } 159 if ext == "" { 160 ext = gotextSuffix 161 } 162 t, err := template.New("").Parse(pat) 163 if err != nil { 164 return "", wrap(err, "error parsing template") 165 } 166 buf := bytes.Buffer{} 167 err = t.Execute(&buf, map[string]string{ 168 "Dir": s.dir(), 169 "Language": "%s", 170 "Ext": ext, 171 }) 172 return filepath.FromSlash(buf.String()), wrap(err, "incorrect OutPattern") 173 } 174 175 var transRE = regexp.MustCompile(`.*\.` + gotextSuffix) 176 177 // Import loads existing translation files. 178 func (s *State) Import() error { 179 outPattern, err := outPattern(s) 180 if err != nil { 181 return err 182 } 183 re := transRE 184 if pat := s.Config.TranslationsPattern; pat != "" { 185 if re, err = regexp.Compile(pat); err != nil { 186 return wrapf(err, "error parsing regexp %q", s.Config.TranslationsPattern) 187 } 188 } 189 x := importer{s, outPattern, re} 190 return x.walkImport(s.dir(), s.Config.SourceLanguage) 191 } 192 193 type importer struct { 194 state *State 195 outPattern string 196 transFile *regexp.Regexp 197 } 198 199 func (i *importer) walkImport(path string, tag language.Tag) error { 200 files, err := ioutil.ReadDir(path) 201 if err != nil { 202 return nil 203 } 204 for _, f := range files { 205 name := f.Name() 206 tag := tag 207 if f.IsDir() { 208 if t, err := language.Parse(name); err == nil { 209 tag = t 210 } 211 // We ignore errors 212 if err := i.walkImport(filepath.Join(path, name), tag); err != nil { 213 return err 214 } 215 continue 216 } 217 for _, l := range strings.Split(name, ".") { 218 if t, err := language.Parse(l); err == nil { 219 tag = t 220 } 221 } 222 file := filepath.Join(path, name) 223 // TODO: Should we skip files that match output files? 224 if fmt.Sprintf(i.outPattern, tag) == file { 225 continue 226 } 227 // TODO: handle different file formats. 228 if !i.transFile.MatchString(name) { 229 continue 230 } 231 b, err := ioutil.ReadFile(file) 232 if err != nil { 233 return wrap(err, "read file failed") 234 } 235 var translations Messages 236 if err := json.Unmarshal(b, &translations); err != nil { 237 return wrap(err, "parsing translation file failed") 238 } 239 i.state.Translations = append(i.state.Translations, translations) 240 } 241 return nil 242 } 243 244 // Merge merges the extracted messages with the existing translations. 245 func (s *State) Merge() error { 246 if s.Messages != nil { 247 panic("already merged") 248 } 249 // Create an index for each unique message. 250 // Duplicates are okay as long as the substitution arguments are okay as 251 // well. 252 // Top-level messages are okay to appear in multiple substitution points. 253 254 // Collect key equivalence. 255 msgs := []*Message{} 256 keyToIDs := map[string]*Message{} 257 for _, m := range s.Extracted.Messages { 258 m := m 259 if prev, ok := keyToIDs[m.Key]; ok { 260 if err := checkEquivalence(&m, prev); err != nil { 261 warnf("Key %q matches conflicting messages: %v and %v", m.Key, prev.ID, m.ID) 262 // TODO: track enough information so that the rewriter can 263 // suggest/disambiguate messages. 264 } 265 // TODO: add position to message. 266 continue 267 } 268 i := len(msgs) 269 msgs = append(msgs, &m) 270 keyToIDs[m.Key] = msgs[i] 271 } 272 273 // Messages with different keys may still refer to the same translated 274 // message (e.g. different whitespace). Filter these. 275 idMap := map[string]bool{} 276 filtered := []*Message{} 277 for _, m := range msgs { 278 found := false 279 for _, id := range m.ID { 280 found = found || idMap[id] 281 } 282 if !found { 283 filtered = append(filtered, m) 284 } 285 for _, id := range m.ID { 286 idMap[id] = true 287 } 288 } 289 290 // Build index of translations. 291 translations := map[language.Tag]map[string]Message{} 292 languages := append([]language.Tag{}, s.Config.Supported...) 293 294 for _, t := range s.Translations { 295 tag := t.Language 296 if _, ok := translations[tag]; !ok { 297 translations[tag] = map[string]Message{} 298 languages = append(languages, tag) 299 } 300 for _, m := range t.Messages { 301 if !m.Translation.IsEmpty() { 302 for _, id := range m.ID { 303 if _, ok := translations[tag][id]; ok { 304 warnf("Duplicate translation in locale %q for message %q", tag, id) 305 } 306 translations[tag][id] = m 307 } 308 } 309 } 310 } 311 languages = internal.UniqueTags(languages) 312 313 for _, tag := range languages { 314 ms := Messages{Language: tag} 315 for _, orig := range filtered { 316 m := *orig 317 m.Key = "" 318 m.Position = "" 319 320 for _, id := range m.ID { 321 if t, ok := translations[tag][id]; ok { 322 m.Translation = t.Translation 323 if t.TranslatorComment != "" { 324 m.TranslatorComment = t.TranslatorComment 325 m.Fuzzy = t.Fuzzy 326 } 327 break 328 } 329 } 330 if tag == s.Config.SourceLanguage && m.Translation.IsEmpty() { 331 m.Translation = m.Message 332 if m.TranslatorComment == "" { 333 m.TranslatorComment = "Copied from source." 334 m.Fuzzy = true 335 } 336 } 337 // TODO: if translation is empty: pre-expand based on available 338 // linguistic features. This may also be done as a plugin. 339 ms.Messages = append(ms.Messages, m) 340 } 341 s.Messages = append(s.Messages, ms) 342 } 343 return nil 344 } 345 346 // Export writes out the messages to translation out files. 347 func (s *State) Export() error { 348 path, err := outPattern(s) 349 if err != nil { 350 return wrap(err, "export failed") 351 } 352 for _, out := range s.Messages { 353 // TODO: inject translations from existing files to avoid retranslation. 354 data, err := json.MarshalIndent(out, "", " ") 355 if err != nil { 356 return wrap(err, "JSON marshal failed") 357 } 358 file := fmt.Sprintf(path, out.Language) 359 if err := os.MkdirAll(filepath.Dir(file), 0755); err != nil { 360 return wrap(err, "dir create failed") 361 } 362 if err := ioutil.WriteFile(file, data, 0644); err != nil { 363 return wrap(err, "write failed") 364 } 365 } 366 return nil 367 } 368 369 var ( 370 ws = runes.In(unicode.White_Space).Contains 371 notWS = runes.NotIn(unicode.White_Space).Contains 372 ) 373 374 func trimWS(s string) (trimmed, leadWS, trailWS string) { 375 trimmed = strings.TrimRightFunc(s, ws) 376 trailWS = s[len(trimmed):] 377 if i := strings.IndexFunc(trimmed, notWS); i > 0 { 378 leadWS = trimmed[:i] 379 trimmed = trimmed[i:] 380 } 381 return trimmed, leadWS, trailWS 382 } 383 384 // NOTE: The command line tool already prefixes with "gotext:". 385 var ( 386 wrap = func(err error, msg string) error { 387 if err == nil { 388 return nil 389 } 390 return fmt.Errorf("%s: %v", msg, err) 391 } 392 wrapf = func(err error, msg string, args ...interface{}) error { 393 if err == nil { 394 return nil 395 } 396 return wrap(err, fmt.Sprintf(msg, args...)) 397 } 398 errorf = fmt.Errorf 399 ) 400 401 func warnf(format string, args ...interface{}) { 402 // TODO: don't log. 403 log.Printf(format, args...) 404 } 405 406 func loadPackages(conf *loader.Config, args []string) (*loader.Program, error) { 407 if len(args) == 0 { 408 args = []string{"."} 409 } 410 411 conf.Build = &build.Default 412 conf.ParserMode = parser.ParseComments 413 414 // Use the initial packages from the command line. 415 args, err := conf.FromArgs(args, false) 416 if err != nil { 417 return nil, wrap(err, "loading packages failed") 418 } 419 420 // Load, parse and type-check the whole program. 421 return conf.Load() 422 }