forgejo/tests/integration/repo_lang_stats_test.go
Gergely Nagy 2dbec730e8
[GITEA] Improved Linguist compatibility
Recognise the `linguist-documentation` and `linguist-detectable`
attributes in `.gitattributes` files, and use them in
`GetLanguageStats()` to make a decision whether to include a particular
file in the stats or not.

This allows one more control over which files in their repositories
contribute toward the language statistics, so that for a project that is
mostly documentation, the language stats can reflect that.

Fixes #1672.

Signed-off-by: Gergely Nagy <forgejo@gergo.csillger.hu>
(cherry picked from commit 6d4e02fe5f)
(cherry picked from commit ee1ead8189)
2024-01-28 11:01:16 +01:00

276 lines
8.1 KiB
Go

// Copyright 2024 The Forgejo Authors c/o Codeberg e.V.. All rights reserved.
// SPDX-License-Identifier: MIT
package integration
import (
"context"
"net/url"
"strings"
"testing"
"time"
"code.gitea.io/gitea/models/db"
repo_model "code.gitea.io/gitea/models/repo"
"code.gitea.io/gitea/models/unittest"
user_model "code.gitea.io/gitea/models/user"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/indexer/stats"
"code.gitea.io/gitea/modules/queue"
repo_service "code.gitea.io/gitea/services/repository"
files_service "code.gitea.io/gitea/services/repository/files"
"code.gitea.io/gitea/tests"
"github.com/stretchr/testify/assert"
)
func createLangStatTestRepo(t *testing.T) (*repo_model.Repository, func()) {
t.Helper()
user2 := unittest.AssertExistsAndLoadBean(t, &user_model.User{ID: 2})
// Create a new repository
repo, err := repo_service.CreateRepository(db.DefaultContext, user2, user2, repo_service.CreateRepoOptions{
Name: "lang-stat-test",
Description: "minimal repo for language stats testing",
AutoInit: true,
Gitignores: "Go",
License: "MIT",
Readme: "Default",
DefaultBranch: "main",
IsPrivate: false,
})
assert.NoError(t, err)
assert.NotEmpty(t, repo)
return repo, func() {
repo_service.DeleteRepository(db.DefaultContext, user2, repo, false)
}
}
func addLangStatTestFiles(t *testing.T, repo *repo_model.Repository, contents string) string {
t.Helper()
owner := unittest.AssertExistsAndLoadBean(t, &user_model.User{ID: repo.OwnerID})
addFilesResp, err := files_service.ChangeRepoFiles(git.DefaultContext, repo, owner, &files_service.ChangeRepoFilesOptions{
Files: []*files_service.ChangeRepoFile{
{
Operation: "create",
TreePath: ".gitattributes",
ContentReader: strings.NewReader(contents),
},
{
Operation: "create",
TreePath: "docs.md",
ContentReader: strings.NewReader("This **is** a `markdown` file.\n"),
},
{
Operation: "create",
TreePath: "foo.c",
ContentReader: strings.NewReader(`#include <stdio.h>\nint main() {\n printf("Hello world!\n");\n return 0;\n}\n`),
},
{
Operation: "create",
TreePath: "foo.nib",
ContentReader: strings.NewReader("Pinky promise, this is not a generated file!\n"),
},
{
Operation: "create",
TreePath: ".dot.pas",
ContentReader: strings.NewReader("program Hello;\nbegin\n writeln('Hello, world.');\nend.\n"),
},
{
Operation: "create",
TreePath: "cpplint.py",
ContentReader: strings.NewReader(`#! /usr/bin/env python\n\nprint("Hello world!")\n`),
},
{
Operation: "create",
TreePath: "some-file.xml",
ContentReader: strings.NewReader(`<?xml version="1.0"?>\n<foo>\n <bar>Hello</bar>\n</foo>\n`),
},
},
Message: "add files",
OldBranch: "main",
NewBranch: "main",
Author: &files_service.IdentityOptions{
Name: owner.Name,
Email: owner.Email,
},
Committer: &files_service.IdentityOptions{
Name: owner.Name,
Email: owner.Email,
},
Dates: &files_service.CommitDateOptions{
Author: time.Now(),
Committer: time.Now(),
},
})
assert.NoError(t, err)
assert.NotEmpty(t, addFilesResp)
return addFilesResp.Commit.SHA
}
func TestRepoLangStats(t *testing.T) {
onGiteaRun(t, func(t *testing.T, u *url.URL) {
/******************
** Preparations **
******************/
prep := func(t *testing.T, attribs string) (*repo_model.Repository, string, func()) {
t.Helper()
repo, f := createLangStatTestRepo(t)
sha := addLangStatTestFiles(t, repo, attribs)
return repo, sha, f
}
getFreshLanguageStats := func(t *testing.T, repo *repo_model.Repository, sha string) repo_model.LanguageStatList {
t.Helper()
err := stats.UpdateRepoIndexer(repo)
assert.NoError(t, err)
assert.NoError(t, queue.GetManager().FlushAll(context.Background(), 10*time.Second))
status, err := repo_model.GetIndexerStatus(db.DefaultContext, repo, repo_model.RepoIndexerTypeStats)
assert.NoError(t, err)
assert.Equal(t, sha, status.CommitSha)
langs, err := repo_model.GetTopLanguageStats(db.DefaultContext, repo, 5)
assert.NoError(t, err)
return langs
}
/***********
** Tests **
***********/
// 1. By default, documentation is not indexed
t.Run("default", func(t *testing.T) {
defer tests.PrintCurrentTest(t)()
repo, sha, f := prep(t, "")
defer f()
langs := getFreshLanguageStats(t, repo, sha)
// While this is a fairly short test, this exercises a number of
// things:
//
// - `.gitattributes` is empty, so `isDetectable.IsFalse()`,
// `isVendored.IsTrue()`, and `isDocumentation.IsTrue()` will be
// false for every file, because these are only true if an
// attribute is explicitly set.
//
// - There is `.dot.pas`, which would be considered Pascal source,
// but it is a dotfile (thus, `enry.IsDotFile()` applies), and as
// such, is not considered.
//
// - `some-file.xml` will be skipped because Enry considers XML
// configuration, and `enry.IsConfiguration()` will catch it.
//
// - `!isVendored.IsFalse()` evaluates to true, so
// `analyze.isVendor()` will be called on `cpplint.py`, which will
// be considered vendored, even though both the filename and
// contents would otherwise make it Python.
//
// - `!isDocumentation.IsFalse()` evaluates to true, so
// `enry.IsDocumentation()` will be called for `docs.md`, and will
// be considered documentation, thus, skipped.
//
// Thus, this exercises all of the conditions in the first big if
// that is supposed to filter out files early. With two short asserts!
assert.Len(t, langs, 1)
assert.Equal(t, "C", langs[0].Language)
})
// 2. Marking foo.c as non-detectable
t.Run("foo.c non-detectable", func(t *testing.T) {
defer tests.PrintCurrentTest(t)()
repo, sha, f := prep(t, "foo.c linguist-detectable=false\n")
defer f()
langs := getFreshLanguageStats(t, repo, sha)
assert.Empty(t, langs)
})
// 3. Marking Markdown detectable
t.Run("detectable markdown", func(t *testing.T) {
defer tests.PrintCurrentTest(t)()
repo, sha, f := prep(t, "*.md linguist-detectable\n")
defer f()
langs := getFreshLanguageStats(t, repo, sha)
assert.Len(t, langs, 2)
assert.Equal(t, "C", langs[0].Language)
assert.Equal(t, "Markdown", langs[1].Language)
})
// 4. Marking foo.c as documentation
t.Run("foo.c as documentation", func(t *testing.T) {
defer tests.PrintCurrentTest(t)()
repo, sha, f := prep(t, "foo.c linguist-documentation\n")
defer f()
langs := getFreshLanguageStats(t, repo, sha)
assert.Empty(t, langs)
})
// 5. Overriding a generated file
t.Run("linguist-generated=false", func(t *testing.T) {
defer tests.PrintCurrentTest(t)()
repo, sha, f := prep(t, "foo.nib linguist-generated=false\nfoo.nib linguist-language=Perl\n")
defer f()
langs := getFreshLanguageStats(t, repo, sha)
assert.Len(t, langs, 2)
assert.Equal(t, "C", langs[0].Language)
assert.Equal(t, "Perl", langs[1].Language)
})
// 6. Disabling vendoring for a file
t.Run("linguist-vendored=false", func(t *testing.T) {
defer tests.PrintCurrentTest(t)()
repo, sha, f := prep(t, "cpplint.py linguist-vendored=false\n")
defer f()
langs := getFreshLanguageStats(t, repo, sha)
assert.Len(t, langs, 2)
assert.Equal(t, "C", langs[0].Language)
assert.Equal(t, "Python", langs[1].Language)
})
// 7. Disabling vendoring for a file, with -linguist-vendored
t.Run("-linguist-vendored", func(t *testing.T) {
defer tests.PrintCurrentTest(t)()
repo, sha, f := prep(t, "cpplint.py -linguist-vendored\n")
defer f()
langs := getFreshLanguageStats(t, repo, sha)
assert.Len(t, langs, 2)
assert.Equal(t, "C", langs[0].Language)
assert.Equal(t, "Python", langs[1].Language)
})
// 8. Marking foo.c as vendored
t.Run("foo.c as vendored", func(t *testing.T) {
defer tests.PrintCurrentTest(t)()
repo, sha, f := prep(t, "foo.c linguist-vendored\n")
defer f()
langs := getFreshLanguageStats(t, repo, sha)
assert.Empty(t, langs)
})
})
}